diff options
Diffstat (limited to 'editor/libeditor/tests/browserscope/lib/richtext2/richtext2/static/js/canonicalize.js')
-rw-r--r-- | editor/libeditor/tests/browserscope/lib/richtext2/richtext2/static/js/canonicalize.js | 436 |
1 files changed, 436 insertions, 0 deletions
diff --git a/editor/libeditor/tests/browserscope/lib/richtext2/richtext2/static/js/canonicalize.js b/editor/libeditor/tests/browserscope/lib/richtext2/richtext2/static/js/canonicalize.js new file mode 100644 index 0000000000..2236d9dfc5 --- /dev/null +++ b/editor/libeditor/tests/browserscope/lib/richtext2/richtext2/static/js/canonicalize.js @@ -0,0 +1,436 @@ +/** + * @fileoverview + * Canonicalization functions used in the RTE test suite. + * + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the 'License') + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an 'AS IS' BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @version 0.1 + * @author rolandsteiner@google.com + */ + +/** + * Canonicalize HTML entities to their actual character + * + * @param str {String} the HTML string to be canonicalized + * @return {String} the canonicalized string + */ + +function canonicalizeEntities(str) { + // TODO(rolandsteiner): this function is very much not optimized, but that shouldn't + // theoretically matter too much - look into it at some point. + var match; + while (match = str.match(/&#x([0-9A-F]+);/i)) { + str = str.replace('&#x' + match[1] + ';', String.fromCharCode(parseInt(match[1], 16))); + } + while (match = str.match(/&#([0-9]+);/)) { + str = str.replace('&#' + match[1] + ';', String.fromCharCode(Number(match[1]))); + } + return str; +} + +/** + * Canonicalize the contents of the HTML 'style' attribute. + * I.e. sorts the CSS attributes alphabetically and canonicalizes the values + * CSS attributes where necessary. + * + * If this would return an empty string, return null instead to suppress the + * whole 'style' attribute. + * + * Avoid tests that contain {, } or : within CSS values! + * + * Note that this function relies on the spaces of the input string already + * having been normalized by canonicalizeSpaces! + * + * FIXME: does not canonicalize the contents of compound attributes + * (e.g., 'border'). + * + * @param str {String} contents of the 'style' attribute + * @param emitFlags {Object} flags used for this output + * @return {String/null} canonicalized string, null instead of the empty string + */ +function canonicalizeStyle(str, emitFlags) { + // Remove any enclosing curly brackets + str = str.replace(/ ?[\{\}] ?/g, ''); + + var attributes = str.split(';'); + var count = attributes.length; + var resultArr = []; + + for (var a = 0; a < count; ++a) { + // Retrieve "name: value" pair + // Note: may expectedly fail if the last pair was terminated with ';' + var avPair = attributes[a].match(/ ?([^ :]+) ?: ?(.+)/); + if (!avPair) + continue; + + var name = avPair[1]; + var value = avPair[2].replace(/ $/, ''); // Remove any trailing space. + + switch (name) { + case 'color': + case 'background-color': + case 'border-color': + if (emitFlags.canonicalizeUnits) { + resultArr.push(name + ': #' + new Color(value).toHexString()); + } else { + resultArr.push(name + ': ' + value); + } + break; + + case 'font-family': + if (emitFlags.canonicalizeUnits) { + resultArr.push(name + ': ' + new FontName(value).toString()); + } else { + resultArr.push(name + ': ' + value); + } + break; + + case 'font-size': + if (emitFlags.canonicalizeUnits) { + resultArr.push(name + ': ' + new FontSize(value).toString()); + } else { + resultArr.push(name + ': ' + value); + } + break; + + default: + resultArr.push(name + ': ' + value); + } + } + + // Sort by name, assuming no duplicate CSS attribute names. + resultArr.sort(); + + return resultArr.join('; ') || null; +} + +/** + * Canonicalize a single attribute value. + * + * Note that this function relies on the spaces of the input string already + * having been normalized by canonicalizeSpaces! + * + * @param elemName {String} the name of the element + * @param attrName {String} the name of the attribute + * @param attrValue {String} the value of the attribute + * @param emitFlags {Object} flags used for this output + * @return {String/null} the canonicalized value, or null if the attribute should be skipped. + */ +function canonicalizeSingleAttribute(elemName, attrName, attrValue, emitFlags) { + // We emit attributes as name="value", so change any contained apostrophes + // to quote marks. + attrValue = attrValue.replace(/\x22/, '\x27'); + + switch (attrName) { + case 'class': + return emitFlags.emitClass ? attrValue : null; + + case 'id': + if (!emitFlags.emitID) { + return null; + } + if (attrValue && attrValue.substr(0, 7) == 'editor-') { + return null; + } + return attrValue; + + // Remove empty style attributes, canonicalize the contents otherwise, + // provided the test cares for styles. + case 'style': + return (emitFlags.emitStyle && attrValue) + ? canonicalizeStyle(attrValue, emitFlags) + : null; + + // Never output onload handlers as they are set by the test environment. + case 'onload': + return null; + + // Canonicalize colors. + case 'bgcolor': + case 'color': + if (!attrValue) { + return null; + } + return emitFlags.canonicalizeUnits ? new Color(attrValue).toString() : attrValue; + + // Canonicalize font names. + case 'face': + return emitFlags.canonicalizeUnits ? new FontName(attrValue).toString() : attrValue; + + // Canonicalize font sizes (leave other 'size' attributes as-is). + case 'size': + if (!attrValue) { + return null; + } + switch (elemName) { + case 'basefont': + case 'font': + return emitFlags.canonicalizeUnits ? new FontSize(attrValue).toString() : attrValue; + } + return attrValue; + + // Remove spans with value 1. Retain spans with other values, even if + // empty or with a value 0, since those indicate a flawed implementation. + case 'colspan': + case 'rowspan': + case 'span': + return (attrValue == '1' || attrValue === '') ? null : attrValue; + + // Boolean attributes: presence equals true. If present, the value must be + // the empty string or the attribute's canonical name. + // (http://www.whatwg.org/specs/web-apps/current-work/#boolean-attributes) + // Below we only normalize empty string to the canonical name for + // comparison purposes. All other values are not touched and will therefore + // in all likelihood result in a failed test (even if they may be accepted + // by the UA). + case 'async': + case 'autofocus': + case 'checked': + case 'compact': + case 'declare': + case 'defer': + case 'disabled': + case 'formnovalidate': + case 'frameborder': + case 'ismap': + case 'loop': + case 'multiple': + case 'nohref': + case 'nosize': + case 'noshade': + case 'novalidate': + case 'nowrap': + case 'open': + case 'readonly': + case 'required': + case 'reversed': + case 'seamless': + case 'selected': + return attrValue ? attrValue : attrName; + + default: + return attrValue; + } +} + +/** + * Canonicalize the contents of an element tag. + * + * I.e. sorts the attributes alphabetically and canonicalizes their + * values where necessary. Also removes attributes we're not interested in. + * + * Note that this function relies on the spaces of the input string already + * having been normalized by canonicalizeSpaces! + * + * @param str {String} the contens of the element tag, excluding < and >. + * @param emitFlags {Object} flags used for this output + * @return {String} the canonicalized contents. + */ +function canonicalizeElementTag(str, emitFlags) { + // FIXME: lowercase only if emitFlags.lowercase is set + str = str.toLowerCase(); + + var pos = str.search(' '); + + // element name only + if (pos == -1) { + return str; + } + + var elemName = str.substr(0, pos); + str = str.substr(pos + 1); + + // Even if emitFlags.emitAttrs is not set, we must iterate over the + // attributes to catch the special selection attribute and/or selection + // markers. :( + + // Iterate over attributes, add them to an array, canonicalize their + // contents, and finally output the (remaining) attributes in sorted order. + // Note: We can't do a simple split on space here, because the value of, + // e.g., 'style' attributes may also contain spaces. + var attrs = []; + var selStartInTag = false; + var selEndInTag = false; + + while (str) { + var attrName; + var attrValue = ''; + + pos = str.search(/[ =]/); + if (pos >= 0) { + attrName = str.substr(0, pos); + if (str.charAt(pos) == ' ') { + ++pos; + } + if (str.charAt(pos) == '=') { + ++pos; + if (str.charAt(pos) == ' ') { + ++pos; + } + str = str.substr(pos); + switch (str.charAt(0)) { + case '"': + case "'": + pos = str.indexOf(str.charAt(0), 1); + pos = (pos < 0) ? str.length : pos; + attrValue = str.substring(1, pos); + ++pos; + break; + + default: + pos = str.indexOf(' ', 0); + pos = (pos < 0) ? str.length : pos; + attrValue = (pos == -1) ? str : str.substr(0, pos); + break; + } + attrValue = attrValue.replace(/^ /, ''); + attrValue = attrValue.replace(/ $/, ''); + } + } else { + attrName = str; + } + str = (pos == -1 || pos >= str.length) ? '' : str.substr(pos + 1); + + // Remove special selection attributes. + switch (attrName) { + case ATTRNAME_SEL_START: + selStartInTag = true; + continue; + + case ATTRNAME_SEL_END: + selEndInTag = true; + continue; + } + + switch (attrName) { + case '': + case 'onload': + case 'xmlns': + break; + + default: + if (!emitFlags.emitAttrs) { + break; + } + // >>> fall through >>> + + case 'contenteditable': + attrValue = canonicalizeEntities(attrValue); + attrValue = canonicalizeSingleAttribute(elemName, attrName, attrValue, emitFlags); + if (attrValue !== null) { + attrs.push(attrName + '="' + attrValue + '"'); + } + } + } + + var result = elemName; + + // Sort alphabetically (on full string rather than just attribute value for + // simplicity. Also, attribute names will differ when encountering the '='). + if (attrs.length > 0) { + attrs.sort(); + result += ' ' + attrs.join(' '); + } + + // Add intra-tag selection marker(s) or attribute(s), if any, at the end. + if (selStartInTag && selEndInTag) { + result += ' |'; + } else if (selStartInTag) { + result += ' {'; + } else if (selEndInTag) { + result += ' }'; + } + + return result; +} + +/** + * Canonicalize elements and attributes to facilitate comparison to the + * expectation string: sort attributes, canonicalize values and remove chaff. + * + * Note that this function relies on the spaces of the input string already + * having been normalized by canonicalizeSpaces! + * + * @param str {String} the HTML string to be canonicalized + * @param emitFlags {Object} flags used for this output + * @return {String} the canonicalized string + */ +function canonicalizeElementsAndAttributes(str, emitFlags) { + var tagStart = str.indexOf('<'); + var tagEnd = 0; + var result = ''; + + while (tagStart >= 0) { + ++tagStart; + if (str.charAt(tagStart) == '/') { + ++tagStart; + } + result = result + canonicalizeEntities(str.substring(tagEnd, tagStart)); + tagEnd = str.indexOf('>', tagStart); + if (tagEnd < 0) { + tagEnd = str.length - 1; + } + if (str.charAt(tagEnd - 1) == '/') { + --tagEnd; + } + var elemStr = str.substring(tagStart, tagEnd); + elemStr = canonicalizeElementTag(elemStr, emitFlags); + result = result + elemStr; + tagStart = str.indexOf('<', tagEnd); + } + return result + canonicalizeEntities(str.substring(tagEnd)); +} + +/** + * Canonicalize an innerHTML string to uniform single whitespaces. + * + * FIXME: running this prevents testing for pre-formatted content + * and the CSS 'white-space' attribute. + * + * @param str {String} the HTML string to be canonicalized + * @return {String} the canonicalized string + */ +function canonicalizeSpaces(str) { + // Collapse sequential whitespace. + str = str.replace(/\s+/g, ' '); + + // Remove spaces immediately inside angle brackets <, >, </ and />. + // While doing this also canonicalize <.../> to <...>. + str = str.replace(/\< ?/g, '<'); + str = str.replace(/\<\/ ?/g, '</'); + str = str.replace(/ ?\/?\>/g, '>'); + + return str; +} + +/** + * Canonicalize an innerHTML string to uniform single whitespaces. + * Also remove comments to retain only embedded selection markers, and + * remove </br> and </hr> if present. + * + * FIXME: running this prevents testing for pre-formatted content + * and the CSS 'white-space' attribute. + * + * @param str {String} the HTML string to be canonicalized + * @return {String} the canonicalized string + */ +function initialCanonicalizationOf(str) { + str = canonicalizeSpaces(str); + str = str.replace(/ ?<!-- ?/g, ''); + str = str.replace(/ ?--> ?/g, ''); + str = str.replace(/<\/[bh]r>/g, ''); + + return str; +} |