summaryrefslogtreecommitdiffstats
path: root/editor/libeditor/tests/browserscope/lib/richtext2/richtext2/static/js/canonicalize.js
diff options
context:
space:
mode:
Diffstat (limited to 'editor/libeditor/tests/browserscope/lib/richtext2/richtext2/static/js/canonicalize.js')
-rw-r--r--editor/libeditor/tests/browserscope/lib/richtext2/richtext2/static/js/canonicalize.js436
1 files changed, 436 insertions, 0 deletions
diff --git a/editor/libeditor/tests/browserscope/lib/richtext2/richtext2/static/js/canonicalize.js b/editor/libeditor/tests/browserscope/lib/richtext2/richtext2/static/js/canonicalize.js
new file mode 100644
index 0000000000..2236d9dfc5
--- /dev/null
+++ b/editor/libeditor/tests/browserscope/lib/richtext2/richtext2/static/js/canonicalize.js
@@ -0,0 +1,436 @@
+/**
+ * @fileoverview
+ * Canonicalization functions used in the RTE test suite.
+ *
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the 'License')
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an 'AS IS' BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @version 0.1
+ * @author rolandsteiner@google.com
+ */
+
+/**
+ * Canonicalize HTML entities to their actual character
+ *
+ * @param str {String} the HTML string to be canonicalized
+ * @return {String} the canonicalized string
+ */
+
+function canonicalizeEntities(str) {
+ // TODO(rolandsteiner): this function is very much not optimized, but that shouldn't
+ // theoretically matter too much - look into it at some point.
+ var match;
+ while (match = str.match(/&#x([0-9A-F]+);/i)) {
+ str = str.replace('&#x' + match[1] + ';', String.fromCharCode(parseInt(match[1], 16)));
+ }
+ while (match = str.match(/&#([0-9]+);/)) {
+ str = str.replace('&#' + match[1] + ';', String.fromCharCode(Number(match[1])));
+ }
+ return str;
+}
+
+/**
+ * Canonicalize the contents of the HTML 'style' attribute.
+ * I.e. sorts the CSS attributes alphabetically and canonicalizes the values
+ * CSS attributes where necessary.
+ *
+ * If this would return an empty string, return null instead to suppress the
+ * whole 'style' attribute.
+ *
+ * Avoid tests that contain {, } or : within CSS values!
+ *
+ * Note that this function relies on the spaces of the input string already
+ * having been normalized by canonicalizeSpaces!
+ *
+ * FIXME: does not canonicalize the contents of compound attributes
+ * (e.g., 'border').
+ *
+ * @param str {String} contents of the 'style' attribute
+ * @param emitFlags {Object} flags used for this output
+ * @return {String/null} canonicalized string, null instead of the empty string
+ */
+function canonicalizeStyle(str, emitFlags) {
+ // Remove any enclosing curly brackets
+ str = str.replace(/ ?[\{\}] ?/g, '');
+
+ var attributes = str.split(';');
+ var count = attributes.length;
+ var resultArr = [];
+
+ for (var a = 0; a < count; ++a) {
+ // Retrieve "name: value" pair
+ // Note: may expectedly fail if the last pair was terminated with ';'
+ var avPair = attributes[a].match(/ ?([^ :]+) ?: ?(.+)/);
+ if (!avPair)
+ continue;
+
+ var name = avPair[1];
+ var value = avPair[2].replace(/ $/, ''); // Remove any trailing space.
+
+ switch (name) {
+ case 'color':
+ case 'background-color':
+ case 'border-color':
+ if (emitFlags.canonicalizeUnits) {
+ resultArr.push(name + ': #' + new Color(value).toHexString());
+ } else {
+ resultArr.push(name + ': ' + value);
+ }
+ break;
+
+ case 'font-family':
+ if (emitFlags.canonicalizeUnits) {
+ resultArr.push(name + ': ' + new FontName(value).toString());
+ } else {
+ resultArr.push(name + ': ' + value);
+ }
+ break;
+
+ case 'font-size':
+ if (emitFlags.canonicalizeUnits) {
+ resultArr.push(name + ': ' + new FontSize(value).toString());
+ } else {
+ resultArr.push(name + ': ' + value);
+ }
+ break;
+
+ default:
+ resultArr.push(name + ': ' + value);
+ }
+ }
+
+ // Sort by name, assuming no duplicate CSS attribute names.
+ resultArr.sort();
+
+ return resultArr.join('; ') || null;
+}
+
+/**
+ * Canonicalize a single attribute value.
+ *
+ * Note that this function relies on the spaces of the input string already
+ * having been normalized by canonicalizeSpaces!
+ *
+ * @param elemName {String} the name of the element
+ * @param attrName {String} the name of the attribute
+ * @param attrValue {String} the value of the attribute
+ * @param emitFlags {Object} flags used for this output
+ * @return {String/null} the canonicalized value, or null if the attribute should be skipped.
+ */
+function canonicalizeSingleAttribute(elemName, attrName, attrValue, emitFlags) {
+ // We emit attributes as name="value", so change any contained apostrophes
+ // to quote marks.
+ attrValue = attrValue.replace(/\x22/, '\x27');
+
+ switch (attrName) {
+ case 'class':
+ return emitFlags.emitClass ? attrValue : null;
+
+ case 'id':
+ if (!emitFlags.emitID) {
+ return null;
+ }
+ if (attrValue && attrValue.substr(0, 7) == 'editor-') {
+ return null;
+ }
+ return attrValue;
+
+ // Remove empty style attributes, canonicalize the contents otherwise,
+ // provided the test cares for styles.
+ case 'style':
+ return (emitFlags.emitStyle && attrValue)
+ ? canonicalizeStyle(attrValue, emitFlags)
+ : null;
+
+ // Never output onload handlers as they are set by the test environment.
+ case 'onload':
+ return null;
+
+ // Canonicalize colors.
+ case 'bgcolor':
+ case 'color':
+ if (!attrValue) {
+ return null;
+ }
+ return emitFlags.canonicalizeUnits ? new Color(attrValue).toString() : attrValue;
+
+ // Canonicalize font names.
+ case 'face':
+ return emitFlags.canonicalizeUnits ? new FontName(attrValue).toString() : attrValue;
+
+ // Canonicalize font sizes (leave other 'size' attributes as-is).
+ case 'size':
+ if (!attrValue) {
+ return null;
+ }
+ switch (elemName) {
+ case 'basefont':
+ case 'font':
+ return emitFlags.canonicalizeUnits ? new FontSize(attrValue).toString() : attrValue;
+ }
+ return attrValue;
+
+ // Remove spans with value 1. Retain spans with other values, even if
+ // empty or with a value 0, since those indicate a flawed implementation.
+ case 'colspan':
+ case 'rowspan':
+ case 'span':
+ return (attrValue == '1' || attrValue === '') ? null : attrValue;
+
+ // Boolean attributes: presence equals true. If present, the value must be
+ // the empty string or the attribute's canonical name.
+ // (http://www.whatwg.org/specs/web-apps/current-work/#boolean-attributes)
+ // Below we only normalize empty string to the canonical name for
+ // comparison purposes. All other values are not touched and will therefore
+ // in all likelihood result in a failed test (even if they may be accepted
+ // by the UA).
+ case 'async':
+ case 'autofocus':
+ case 'checked':
+ case 'compact':
+ case 'declare':
+ case 'defer':
+ case 'disabled':
+ case 'formnovalidate':
+ case 'frameborder':
+ case 'ismap':
+ case 'loop':
+ case 'multiple':
+ case 'nohref':
+ case 'nosize':
+ case 'noshade':
+ case 'novalidate':
+ case 'nowrap':
+ case 'open':
+ case 'readonly':
+ case 'required':
+ case 'reversed':
+ case 'seamless':
+ case 'selected':
+ return attrValue ? attrValue : attrName;
+
+ default:
+ return attrValue;
+ }
+}
+
+/**
+ * Canonicalize the contents of an element tag.
+ *
+ * I.e. sorts the attributes alphabetically and canonicalizes their
+ * values where necessary. Also removes attributes we're not interested in.
+ *
+ * Note that this function relies on the spaces of the input string already
+ * having been normalized by canonicalizeSpaces!
+ *
+ * @param str {String} the contens of the element tag, excluding < and >.
+ * @param emitFlags {Object} flags used for this output
+ * @return {String} the canonicalized contents.
+ */
+function canonicalizeElementTag(str, emitFlags) {
+ // FIXME: lowercase only if emitFlags.lowercase is set
+ str = str.toLowerCase();
+
+ var pos = str.search(' ');
+
+ // element name only
+ if (pos == -1) {
+ return str;
+ }
+
+ var elemName = str.substr(0, pos);
+ str = str.substr(pos + 1);
+
+ // Even if emitFlags.emitAttrs is not set, we must iterate over the
+ // attributes to catch the special selection attribute and/or selection
+ // markers. :(
+
+ // Iterate over attributes, add them to an array, canonicalize their
+ // contents, and finally output the (remaining) attributes in sorted order.
+ // Note: We can't do a simple split on space here, because the value of,
+ // e.g., 'style' attributes may also contain spaces.
+ var attrs = [];
+ var selStartInTag = false;
+ var selEndInTag = false;
+
+ while (str) {
+ var attrName;
+ var attrValue = '';
+
+ pos = str.search(/[ =]/);
+ if (pos >= 0) {
+ attrName = str.substr(0, pos);
+ if (str.charAt(pos) == ' ') {
+ ++pos;
+ }
+ if (str.charAt(pos) == '=') {
+ ++pos;
+ if (str.charAt(pos) == ' ') {
+ ++pos;
+ }
+ str = str.substr(pos);
+ switch (str.charAt(0)) {
+ case '"':
+ case "'":
+ pos = str.indexOf(str.charAt(0), 1);
+ pos = (pos < 0) ? str.length : pos;
+ attrValue = str.substring(1, pos);
+ ++pos;
+ break;
+
+ default:
+ pos = str.indexOf(' ', 0);
+ pos = (pos < 0) ? str.length : pos;
+ attrValue = (pos == -1) ? str : str.substr(0, pos);
+ break;
+ }
+ attrValue = attrValue.replace(/^ /, '');
+ attrValue = attrValue.replace(/ $/, '');
+ }
+ } else {
+ attrName = str;
+ }
+ str = (pos == -1 || pos >= str.length) ? '' : str.substr(pos + 1);
+
+ // Remove special selection attributes.
+ switch (attrName) {
+ case ATTRNAME_SEL_START:
+ selStartInTag = true;
+ continue;
+
+ case ATTRNAME_SEL_END:
+ selEndInTag = true;
+ continue;
+ }
+
+ switch (attrName) {
+ case '':
+ case 'onload':
+ case 'xmlns':
+ break;
+
+ default:
+ if (!emitFlags.emitAttrs) {
+ break;
+ }
+ // >>> fall through >>>
+
+ case 'contenteditable':
+ attrValue = canonicalizeEntities(attrValue);
+ attrValue = canonicalizeSingleAttribute(elemName, attrName, attrValue, emitFlags);
+ if (attrValue !== null) {
+ attrs.push(attrName + '="' + attrValue + '"');
+ }
+ }
+ }
+
+ var result = elemName;
+
+ // Sort alphabetically (on full string rather than just attribute value for
+ // simplicity. Also, attribute names will differ when encountering the '=').
+ if (attrs.length > 0) {
+ attrs.sort();
+ result += ' ' + attrs.join(' ');
+ }
+
+ // Add intra-tag selection marker(s) or attribute(s), if any, at the end.
+ if (selStartInTag && selEndInTag) {
+ result += ' |';
+ } else if (selStartInTag) {
+ result += ' {';
+ } else if (selEndInTag) {
+ result += ' }';
+ }
+
+ return result;
+}
+
+/**
+ * Canonicalize elements and attributes to facilitate comparison to the
+ * expectation string: sort attributes, canonicalize values and remove chaff.
+ *
+ * Note that this function relies on the spaces of the input string already
+ * having been normalized by canonicalizeSpaces!
+ *
+ * @param str {String} the HTML string to be canonicalized
+ * @param emitFlags {Object} flags used for this output
+ * @return {String} the canonicalized string
+ */
+function canonicalizeElementsAndAttributes(str, emitFlags) {
+ var tagStart = str.indexOf('<');
+ var tagEnd = 0;
+ var result = '';
+
+ while (tagStart >= 0) {
+ ++tagStart;
+ if (str.charAt(tagStart) == '/') {
+ ++tagStart;
+ }
+ result = result + canonicalizeEntities(str.substring(tagEnd, tagStart));
+ tagEnd = str.indexOf('>', tagStart);
+ if (tagEnd < 0) {
+ tagEnd = str.length - 1;
+ }
+ if (str.charAt(tagEnd - 1) == '/') {
+ --tagEnd;
+ }
+ var elemStr = str.substring(tagStart, tagEnd);
+ elemStr = canonicalizeElementTag(elemStr, emitFlags);
+ result = result + elemStr;
+ tagStart = str.indexOf('<', tagEnd);
+ }
+ return result + canonicalizeEntities(str.substring(tagEnd));
+}
+
+/**
+ * Canonicalize an innerHTML string to uniform single whitespaces.
+ *
+ * FIXME: running this prevents testing for pre-formatted content
+ * and the CSS 'white-space' attribute.
+ *
+ * @param str {String} the HTML string to be canonicalized
+ * @return {String} the canonicalized string
+ */
+function canonicalizeSpaces(str) {
+ // Collapse sequential whitespace.
+ str = str.replace(/\s+/g, ' ');
+
+ // Remove spaces immediately inside angle brackets <, >, </ and />.
+ // While doing this also canonicalize <.../> to <...>.
+ str = str.replace(/\< ?/g, '<');
+ str = str.replace(/\<\/ ?/g, '</');
+ str = str.replace(/ ?\/?\>/g, '>');
+
+ return str;
+}
+
+/**
+ * Canonicalize an innerHTML string to uniform single whitespaces.
+ * Also remove comments to retain only embedded selection markers, and
+ * remove </br> and </hr> if present.
+ *
+ * FIXME: running this prevents testing for pre-formatted content
+ * and the CSS 'white-space' attribute.
+ *
+ * @param str {String} the HTML string to be canonicalized
+ * @return {String} the canonicalized string
+ */
+function initialCanonicalizationOf(str) {
+ str = canonicalizeSpaces(str);
+ str = str.replace(/ ?<!-- ?/g, '');
+ str = str.replace(/ ?--> ?/g, '');
+ str = str.replace(/<\/[bh]r>/g, '');
+
+ return str;
+}