summaryrefslogtreecommitdiffstats
path: root/devtools/shared/css
diff options
context:
space:
mode:
Diffstat (limited to 'devtools/shared/css')
-rw-r--r--devtools/shared/css/lexer.js1520
-rw-r--r--devtools/shared/css/parsing-utils.js86
2 files changed, 138 insertions, 1468 deletions
diff --git a/devtools/shared/css/lexer.js b/devtools/shared/css/lexer.js
index e4544efd35..f52d208bbb 100644
--- a/devtools/shared/css/lexer.js
+++ b/devtools/shared/css/lexer.js
@@ -2,143 +2,35 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-// A CSS Lexer. This file is a bit unusual -- it is a more or less
-// direct translation of layout/style/nsCSSScanner.cpp and
-// layout/style/CSSLexer.cpp into JS. This implemented the
-// CSSLexer.webidl interface, and the intent is to try to keep it in
-// sync with changes to the platform CSS lexer. Due to this goal,
-// this file violates some naming conventions and consequently locally
-// disables some eslint rules.
-
-/* eslint-disable camelcase, mozilla/no-aArgs, no-else-return, complexity */
-
"use strict";
-// White space of any kind. No value fields are used. Note that
-// comments do *not* count as white space; comments separate tokens
-// but are not themselves tokens.
-const eCSSToken_Whitespace = "whitespace"; //
-// A comment.
-const eCSSToken_Comment = "comment"; // /*...*/
-
-// Identifier-like tokens. mIdent is the text of the identifier.
-// The difference between ID and Hash is: if the text after the #
-// would have been a valid Ident if the # hadn't been there, the
-// scanner produces an ID token. Otherwise it produces a Hash token.
-// (This distinction is required by css3-selectors.)
-const eCSSToken_Ident = "ident"; // word
-const eCSSToken_Function = "function"; // word(
-const eCSSToken_AtKeyword = "at"; // @word
-const eCSSToken_ID = "id"; // #word
-const eCSSToken_Hash = "hash"; // #0word
-
-// Numeric tokens. mNumber is the floating-point value of the
-// number, and mHasSign indicates whether there was an explicit sign
-// (+ or -) in front of the number. If mIntegerValid is true, the
-// number had the lexical form of an integer, and mInteger is its
-// integer value. Lexically integer values outside the range of a
-// 32-bit signed number are clamped to the maximum values; mNumber
-// will indicate a 'truer' value in that case. Percentage tokens
-// are always considered not to be integers, even if their numeric
-// value is integral (100% => mNumber = 1.0). For Dimension
-// tokens, mIdent holds the text of the unit.
-const eCSSToken_Number = "number"; // 1 -5 +2e3 3.14159 7.297352e-3
-const eCSSToken_Dimension = "dimension"; // 24px 8.5in
-const eCSSToken_Percentage = "percentage"; // 85% 1280.4%
-
-// String-like tokens. In all cases, mIdent holds the text
-// belonging to the string, and mSymbol holds the delimiter
-// character, which may be ', ", or zero (only for unquoted URLs).
-// Bad_String and Bad_URL tokens are emitted when the closing
-// delimiter or parenthesis was missing.
-const eCSSToken_String = "string"; // 'foo bar' "foo bar"
-const eCSSToken_Bad_String = "bad_string"; // 'foo bar
-const eCSSToken_URL = "url"; // url(foobar) url("foo bar")
-const eCSSToken_Bad_URL = "bad_url"; // url(foo
-
-// Any one-character symbol. mSymbol holds the character.
-const eCSSToken_Symbol = "symbol"; // . ; { } ! *
-
-// Match operators. These are single tokens rather than pairs of
-// Symbol tokens because css3-selectors forbids the presence of
-// comments between the two characters. No value fields are used;
-// the token type indicates which operator.
-const eCSSToken_Includes = "includes"; // ~=
-const eCSSToken_Dashmatch = "dashmatch"; // |=
-const eCSSToken_Beginsmatch = "beginsmatch"; // ^=
-const eCSSToken_Endsmatch = "endsmatch"; // $=
-const eCSSToken_Containsmatch = "containsmatch"; // *=
-
-// Unicode-range token: currently used only in @font-face.
-// The lexical rule for this token includes several forms that are
-// semantically invalid. Therefore, mIdent always holds the
-// complete original text of the token (so we can print it
-// accurately in diagnostics), and mIntegerValid is true iff the
-// token is semantically valid. In that case, mInteger holds the
-// lowest value included in the range, and mInteger2 holds the
-// highest value included in the range.
-const eCSSToken_URange = "urange"; // U+007e U+01?? U+2000-206F
-
-// HTML comment delimiters, ignored as a unit when they appear at
-// the top level of a style sheet, for compatibility with websites
-// written for compatibility with pre-CSS browsers. This token type
-// subsumes the css2.1 CDO and CDC tokens, which are always treated
-// the same by the parser. mIdent holds the text of the token, for
-// diagnostics.
-const eCSSToken_HTMLComment = "htmlcomment"; // <!-- -->
-
-const eEOFCharacters_None = 0x0000;
+const EEOFCHARACTERS_NONE = 0x0000;
// to handle \<EOF> inside strings
-const eEOFCharacters_DropBackslash = 0x0001;
+const EEOFCHARACTERS_DROPBACKSLASH = 0x0001;
// to handle \<EOF> outside strings
-const eEOFCharacters_ReplacementChar = 0x0002;
+const EEOFCHARACTERS_REPLACEMENTCHAR = 0x0002;
// to close comments
-const eEOFCharacters_Asterisk = 0x0004;
-const eEOFCharacters_Slash = 0x0008;
+const EEOFCHARACTERS_ASTERISK = 0x0004;
+const EEOFCHARACTERS_SLASH = 0x0008;
// to close double-quoted strings
-const eEOFCharacters_DoubleQuote = 0x0010;
+const EEOFCHARACTERS_DOUBLEQUOTE = 0x0010;
// to close single-quoted strings
-const eEOFCharacters_SingleQuote = 0x0020;
+const EEOFCHARACTERS_SINGLEQUOTE = 0x0020;
// to close URLs
-const eEOFCharacters_CloseParen = 0x0040;
+const EEOFCHARACTERS_CLOSEPAREN = 0x0040;
// Bridge the char/string divide.
const APOSTROPHE = "'".charCodeAt(0);
const ASTERISK = "*".charCodeAt(0);
-const CARRIAGE_RETURN = "\r".charCodeAt(0);
-const CIRCUMFLEX_ACCENT = "^".charCodeAt(0);
-const COMMERCIAL_AT = "@".charCodeAt(0);
-const DIGIT_NINE = "9".charCodeAt(0);
-const DIGIT_ZERO = "0".charCodeAt(0);
-const DOLLAR_SIGN = "$".charCodeAt(0);
-const EQUALS_SIGN = "=".charCodeAt(0);
-const EXCLAMATION_MARK = "!".charCodeAt(0);
-const FULL_STOP = ".".charCodeAt(0);
-const GREATER_THAN_SIGN = ">".charCodeAt(0);
-const HYPHEN_MINUS = "-".charCodeAt(0);
-const LATIN_CAPITAL_LETTER_E = "E".charCodeAt(0);
-const LATIN_CAPITAL_LETTER_U = "U".charCodeAt(0);
-const LATIN_SMALL_LETTER_E = "e".charCodeAt(0);
-const LATIN_SMALL_LETTER_U = "u".charCodeAt(0);
-const LEFT_PARENTHESIS = "(".charCodeAt(0);
-const LESS_THAN_SIGN = "<".charCodeAt(0);
-const LINE_FEED = "\n".charCodeAt(0);
-const NUMBER_SIGN = "#".charCodeAt(0);
-const PERCENT_SIGN = "%".charCodeAt(0);
-const PLUS_SIGN = "+".charCodeAt(0);
-const QUESTION_MARK = "?".charCodeAt(0);
const QUOTATION_MARK = '"'.charCodeAt(0);
-const REVERSE_SOLIDUS = "\\".charCodeAt(0);
const RIGHT_PARENTHESIS = ")".charCodeAt(0);
const SOLIDUS = "/".charCodeAt(0);
-const TILDE = "~".charCodeAt(0);
-const VERTICAL_LINE = "|".charCodeAt(0);
const UCS2_REPLACEMENT_CHAR = 0xfffd;
@@ -152,409 +44,92 @@ const kImpliedEOFCharacters = [
0,
];
-//
-const ARGS_LENGTH_MAX = 500 * 1000;
-
/**
- * Several methods in this helper can reach the 500000 limit for arguments in
- * Firefox, see Bug 1414361.
- *
- * This will apply the provided method, on the provided scope with an array of
- * arguments which can exceed the 500k limit supported by Firefox.
- *
- * In practice, the arguments array will be split in several chunks of 500k
- * items maximum and each chunk will be applied separately.
- *
- * !! Note that if you are expecting to use the return value of the method, here
- * we will return an array of each return value for each chunk. It will be up to
- * the consumer to decide how to combine the results into a meaningful final
- * result !!
- *
- * @param {Function} method
- * The method to apply.
- * @param {*} scope
- * The scope ("this") to use when applying the method.
- * @param {Array} args
- * The array of arguments to apply.
- *
- * @returns {Array}
- * The array of return values, one item for each chunk that had to be
- * created.
+ * Wrapper around InspectorCSSParser.
+ * Once/if https://github.com/servo/rust-cssparser/pull/374 lands, we can remove this class.
*/
-function safeApply(method, scope, args) {
- let i = 0;
- const res = [];
- const length = args.length;
- while (i < length) {
- const _start = i;
- i += ARGS_LENGTH_MAX;
- res.push(method.apply(scope, args.slice(_start, i)));
- }
- return res;
-}
+class InspectorCSSParserWrapper {
+ #offset = 0;
+ #trackEOFChars;
+ #eofCharacters = EEOFCHARACTERS_NONE;
-/**
- * Ensure that the character is valid. If it is valid, return it;
- * otherwise, return the replacement character.
- *
- * @param {Number} c the character to check
- * @return {Number} the character or its replacement
- */
-function ensureValidChar(c) {
- if (c >= 0x00110000 || (c & 0xfff800) == 0xd800) {
- // Out of range or a surrogate.
- return UCS2_REPLACEMENT_CHAR;
+ /**
+ *
+ * @param {String} input: The CSS text to lex
+ * @param {Object} options
+ * @param {Boolean} options.trackEOFChars: Set to true if performEOFFixup will be called.
+ */
+ constructor(input, options = {}) {
+ this.parser = new InspectorCSSParser(input);
+ this.#trackEOFChars = options.trackEOFChars;
}
- return c;
-}
-/**
- * Turn a string into an array of character codes.
- *
- * @param {String} str the input string
- * @return {Array} an array of character codes, one per character in
- * the input string.
- */
-function stringToCodes(str) {
- // This is a hot path, and using a simple for loop is faster than any other mean (e.g.
- // Array#map ).
- const charCodes = [];
- for (let i = 0; i < str.length; i++) {
- charCodes.push(str.charCodeAt(i));
+ get lineNumber() {
+ return this.parser.lineNumber;
}
- return charCodes;
-}
-
-const IS_HEX_DIGIT = 0x01;
-const IS_IDSTART = 0x02;
-const IS_IDCHAR = 0x04;
-const IS_URL_CHAR = 0x08;
-const IS_HSPACE = 0x10;
-const IS_VSPACE = 0x20;
-const IS_SPACE = IS_HSPACE | IS_VSPACE;
-const IS_STRING = 0x40;
-const H = IS_HSPACE;
-const V = IS_VSPACE;
-const I = IS_IDCHAR;
-const J = IS_IDSTART;
-const U = IS_URL_CHAR;
-const S = IS_STRING;
-const X = IS_HEX_DIGIT;
-
-const SH = S | H;
-const SU = S | U;
-const SUI = S | U | I;
-const SUIJ = S | U | I | J;
-const SUIX = S | U | I | X;
-const SUIJX = S | U | I | J | X;
-
-/* eslint-disable indent, indent-legacy, no-multi-spaces, comma-spacing, spaced-comment */
-const gLexTable = [
- // 00 01 02 03 04 05 06 07
- 0,
- S,
- S,
- S,
- S,
- S,
- S,
- S,
- // 08 TAB LF 0B FF CR 0E 0F
- S,
- SH,
- V,
- S,
- V,
- V,
- S,
- S,
- // 10 11 12 13 14 15 16 17
- S,
- S,
- S,
- S,
- S,
- S,
- S,
- S,
- // 18 19 1A 1B 1C 1D 1E 1F
- S,
- S,
- S,
- S,
- S,
- S,
- S,
- S,
- //SPC ! " # $ % & '
- SH,
- SU,
- 0,
- SU,
- SU,
- SU,
- SU,
- 0,
- // ( ) * + , - . /
- S,
- S,
- SU,
- SU,
- SU,
- SUI,
- SU,
- SU,
- // 0 1 2 3 4 5 6 7
- SUIX,
- SUIX,
- SUIX,
- SUIX,
- SUIX,
- SUIX,
- SUIX,
- SUIX,
- // 8 9 : ; < = > ?
- SUIX,
- SUIX,
- SU,
- SU,
- SU,
- SU,
- SU,
- SU,
- // @ A B C D E F G
- SU,
- SUIJX,
- SUIJX,
- SUIJX,
- SUIJX,
- SUIJX,
- SUIJX,
- SUIJ,
- // H I J K L M N O
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- // P Q R S T U V W
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- // X Y Z [ \ ] ^ _
- SUIJ,
- SUIJ,
- SUIJ,
- SU,
- J,
- SU,
- SU,
- SUIJ,
- // ` a b c d e f g
- SU,
- SUIJX,
- SUIJX,
- SUIJX,
- SUIJX,
- SUIJX,
- SUIJX,
- SUIJ,
- // h i j k l m n o
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- // p q r s t u v w
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- SUIJ,
- // x y z { | } ~ 7F
- SUIJ,
- SUIJ,
- SUIJ,
- SU,
- SU,
- SU,
- SU,
- S,
-];
-/* eslint-enable indent, indent-legacy, no-multi-spaces, comma-spacing, spaced-comment */
-
-/**
- * True if 'ch' is in character class 'cls', which should be one of
- * the constants above or some combination of them. All characters
- * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
- */
-function IsOpenCharClass(ch, cls) {
- return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0);
-}
-
-/**
- * True if 'ch' is in character class 'cls', which should be one of
- * the constants above or some combination of them. No characters
- * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
- */
-function IsClosedCharClass(ch, cls) {
- return ch >= 0 && ch < 128 && (gLexTable[ch] & cls) != 0;
-}
-
-/**
- * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
- * TAB, LF, FF, CR, or SPC.
- */
-function IsWhitespace(ch) {
- return IsClosedCharClass(ch, IS_SPACE);
-}
-
-/**
- * True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
- */
-function IsHorzSpace(ch) {
- return IsClosedCharClass(ch, IS_HSPACE);
-}
-
-/**
- * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR. Vertical
- * whitespace requires special handling when consumed, see AdvanceLine.
- */
-function IsVertSpace(ch) {
- return IsClosedCharClass(ch, IS_VSPACE);
-}
-
-/**
- * True if 'ch' is a character that can appear in the middle of an identifier.
- * This includes U+0000 since it is handled as U+FFFD, but for purposes of
- * GatherText it should not be included in IsOpenCharClass.
- */
-function IsIdentChar(ch) {
- return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
-}
-
-/**
- * True if 'ch' is a character that by itself begins an identifier.
- * This includes U+0000 since it is handled as U+FFFD, but for purposes of
- * GatherText it should not be included in IsOpenCharClass.
- * (This is a subset of IsIdentChar.)
- */
-function IsIdentStart(ch) {
- return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
-}
-
-/**
- * True if the two-character sequence aFirstChar+aSecondChar begins an
- * identifier.
- */
-function StartsIdent(aFirstChar, aSecondChar) {
- return (
- IsIdentStart(aFirstChar) ||
- (aFirstChar == HYPHEN_MINUS &&
- (aSecondChar == HYPHEN_MINUS || IsIdentStart(aSecondChar)))
- );
-}
+ get columnNumber() {
+ return this.parser.columnNumber;
+ }
-/**
- * True if 'ch' is a decimal digit.
- */
-function IsDigit(ch) {
- return ch >= DIGIT_ZERO && ch <= DIGIT_NINE;
-}
+ nextToken() {
+ const token = this.parser.nextToken();
+ if (!token) {
+ return token;
+ }
-/**
- * True if 'ch' is a hexadecimal digit.
- */
-function IsHexDigit(ch) {
- return IsClosedCharClass(ch, IS_HEX_DIGIT);
-}
+ if (this.#trackEOFChars) {
+ const { tokenType, text } = token;
+ const lastChar = text[text.length - 1];
+ if (tokenType === "Comment" && lastChar !== `/`) {
+ if (lastChar === `*`) {
+ this.#eofCharacters = EEOFCHARACTERS_SLASH;
+ } else {
+ this.#eofCharacters = EEOFCHARACTERS_ASTERISK | EEOFCHARACTERS_SLASH;
+ }
+ } else if (tokenType === "QuotedString" || tokenType === "BadString") {
+ if (lastChar === "\\") {
+ this.#eofCharacters =
+ this.#eofCharacters | EEOFCHARACTERS_DROPBACKSLASH;
+ }
+ if (text[0] !== lastChar) {
+ this.#eofCharacters =
+ this.#eofCharacters |
+ (text[0] === `"`
+ ? EEOFCHARACTERS_DOUBLEQUOTE
+ : EEOFCHARACTERS_SINGLEQUOTE);
+ }
+ } else {
+ if (lastChar === "\\") {
+ this.#eofCharacters = EEOFCHARACTERS_REPLACEMENTCHAR;
+ }
-/**
- * Assuming that 'ch' is a decimal digit, return its numeric value.
- */
-function DecimalDigitValue(ch) {
- return ch - DIGIT_ZERO;
-}
+ // For some reason, we only automatically close `url`, other functions
+ // will have their opening parenthesis escaped.
+ if (
+ (tokenType === "Function" && token.value === "url") ||
+ tokenType === "BadUrl" ||
+ (tokenType === "UnquotedUrl" && lastChar !== ")")
+ ) {
+ this.#eofCharacters = this.#eofCharacters | EEOFCHARACTERS_CLOSEPAREN;
+ }
-/**
- * Assuming that 'ch' is a hexadecimal digit, return its numeric value.
- */
-function HexDigitValue(ch) {
- if (IsDigit(ch)) {
- return DecimalDigitValue(ch);
- } else {
- // Note: c&7 just keeps the low three bits which causes
- // upper and lower case alphabetics to both yield their
- // "relative to 10" value for computing the hex value.
- return (ch & 0x7) + 9;
- }
-}
+ if (tokenType === "CloseParenthesis") {
+ this.#eofCharacters =
+ this.#eofCharacters & ~EEOFCHARACTERS_CLOSEPAREN;
+ }
+ }
+ }
-/**
- * If 'ch' can be the first character of a two-character match operator
- * token, return the token type code for that token, otherwise return
- * eCSSToken_Symbol to indicate that it can't.
- */
-function MatchOperatorType(ch) {
- switch (ch) {
- case TILDE:
- return eCSSToken_Includes;
- case VERTICAL_LINE:
- return eCSSToken_Dashmatch;
- case CIRCUMFLEX_ACCENT:
- return eCSSToken_Beginsmatch;
- case DOLLAR_SIGN:
- return eCSSToken_Endsmatch;
- case ASTERISK:
- return eCSSToken_Containsmatch;
- default:
- return eCSSToken_Symbol;
+ // At the moment, InspectorCSSParser doesn't expose offsets, so we need to compute
+ // them manually here.
+ // We can do that because we are retrieving every token in the input string, and so the
+ // end offset of the last token is the start offset of the new token.
+ token.startOffset = this.#offset;
+ this.#offset += token.text.length;
+ token.endOffset = this.#offset;
+ return token;
}
-}
-
-function Scanner(buffer) {
- this.mBuffer = buffer || "";
- this.mOffset = 0;
- this.mCount = this.mBuffer.length;
- this.mLineNumber = 1;
- this.mLineOffset = 0;
- this.mTokenLineOffset = 0;
- this.mTokenOffset = 0;
- this.mTokenLineNumber = 1;
- this.mEOFCharacters = eEOFCharacters_None;
-}
-
-Scanner.prototype = {
- /**
- * The line number of the most recently returned token. Line
- * numbers are 0-based.
- */
- get lineNumber() {
- return this.mTokenLineNumber - 1;
- },
-
- /**
- * The column number of the most recently returned token. Column
- * numbers are 0-based.
- */
- get columnNumber() {
- return this.mTokenOffset - this.mTokenLineOffset;
- },
/**
* When EOF is reached, the last token might be unterminated in some
@@ -569,954 +144,51 @@ Scanner.prototype = {
* string. This is handled in different ways, depending on the
* context and arguments.
*
- * If preserveBackslash is true, then the existing backslash at the
- * end of inputString is preserved, and a new backslash is appended.
+ * The existing backslash at the end of inputString is preserved, and a new backslash
+ * is appended.
* That is, the input |\| is transformed to |\\|, and the
* input |'\| is transformed to |'\\'|.
*
- * Otherwise, preserveBackslash is false:
- * If the backslash appears in a string context, then the trailing
- * backslash is dropped from inputString. That is, |"\| is
- * transformed to |""|.
- * If the backslash appears outside of a string context, then
- * U+FFFD is appended. That is, |\| is transformed to a string
- * with two characters: backslash followed by U+FFFD.
- *
- * Passing false for preserveBackslash makes the result conform to
- * the CSS Syntax specification. However, passing true may give
- * somewhat more intuitive behavior.
- *
* @param inputString the input string
- * @param preserveBackslash how to handle trailing backslashes
* @return the input string with the termination characters appended
*/
- performEOFFixup(aInputString, aPreserveBackslash) {
- let result = aInputString;
-
- let eofChars = this.mEOFCharacters;
+ performEOFFixup(inputString) {
+ let result = inputString;
+ let eofChars = this.#eofCharacters;
if (
- aPreserveBackslash &&
(eofChars &
- (eEOFCharacters_DropBackslash | eEOFCharacters_ReplacementChar)) !=
- 0
+ (EEOFCHARACTERS_DROPBACKSLASH | EEOFCHARACTERS_REPLACEMENTCHAR)) !=
+ 0
) {
eofChars &= ~(
- eEOFCharacters_DropBackslash | eEOFCharacters_ReplacementChar
+ EEOFCHARACTERS_DROPBACKSLASH | EEOFCHARACTERS_REPLACEMENTCHAR
);
result += "\\";
}
if (
- (eofChars & eEOFCharacters_DropBackslash) != 0 &&
+ (eofChars & EEOFCHARACTERS_DROPBACKSLASH) != 0 &&
!!result.length &&
result.endsWith("\\")
) {
result = result.slice(0, -1);
}
- const extra = [];
- this.AppendImpliedEOFCharacters(eofChars, extra);
- const asString = String.fromCharCode.apply(null, extra);
-
- return result + asString;
- },
-
- /**
- * Return the next token, or null at EOF.
- *
- * The token object is described by the following WebIDL definition:
- *
- * dictionary CSSToken {
- * // The token type.
- * CSSTokenType tokenType = "whitespace";
- *
- * // Offset of the first character of the token.
- * unsigned long startOffset = 0;
- * // Offset of the character after the final character of the token.
- * // This is chosen so that the offsets can be passed to |substring|
- * // to yield the exact contents of the token.
- * unsigned long endOffset = 0;
- *
- * // If the token is a number, percentage, or dimension, this holds
- * // the value. This is not present for other token types.
- * double number;
- * // If the token is a number, percentage, or dimension, this is true
- * // iff the number had an explicit sign. This is not present for
- * // other token types.
- * boolean hasSign;
- * // If the token is a number, percentage, or dimension, this is true
- * // iff the number was specified as an integer. This is not present
- * // for other token types.
- * boolean isInteger;
- *
- * // Text associated with the token. This is not present for all
- * // token types. In particular it is:
- * //
- * // Token type Meaning
- * // ===============================
- * // ident The identifier.
- * // function The function name. Note that the "(" is part
- * // of the token but is not present in |text|.
- * // at The word.
- * // id The word.
- * // hash The word.
- * // dimension The dimension.
- * // string The string contents after escape processing.
- * // bad_string Ditto.
- * // url The URL after escape processing.
- * // bad_url Ditto.
- * // symbol The symbol text.
- * DOMString text;
- * };
- */
- nextToken() {
- const token = {};
- if (!this.Next(token)) {
- return null;
- }
-
- const resultToken = {};
- resultToken.tokenType = token.mType;
- resultToken.startOffset = this.mTokenOffset;
- resultToken.endOffset = this.mOffset;
- const constructText = () => {
- return safeApply(String.fromCharCode, null, token.mIdent).join("");
- };
-
- switch (token.mType) {
- case eCSSToken_Whitespace:
- break;
-
- case eCSSToken_Ident:
- case eCSSToken_Function:
- case eCSSToken_AtKeyword:
- case eCSSToken_ID:
- case eCSSToken_Hash:
- resultToken.text = constructText();
- break;
-
- case eCSSToken_Dimension:
- resultToken.text = constructText();
- /* Fall through. */
- case eCSSToken_Number:
- case eCSSToken_Percentage:
- resultToken.number = token.mNumber;
- resultToken.hasSign = token.mHasSign;
- resultToken.isInteger = token.mIntegerValid;
- break;
-
- case eCSSToken_String:
- case eCSSToken_Bad_String:
- case eCSSToken_URL:
- case eCSSToken_Bad_URL:
- resultToken.text = constructText();
- /* Don't bother emitting the delimiter, as it is readily extracted
- from the source string when needed. */
- break;
-
- case eCSSToken_Symbol:
- resultToken.text = String.fromCharCode(token.mSymbol);
- break;
-
- case eCSSToken_Includes:
- case eCSSToken_Dashmatch:
- case eCSSToken_Beginsmatch:
- case eCSSToken_Endsmatch:
- case eCSSToken_Containsmatch:
- case eCSSToken_URange:
- break;
-
- case eCSSToken_Comment:
- case eCSSToken_HTMLComment:
- /* The comment text is easily extracted from the source string,
- and is rarely useful. */
- break;
- }
-
- return resultToken;
- },
-
- /**
- * Return the raw UTF-16 code unit at position |this.mOffset + n| within
- * the read buffer. If that is beyond the end of the buffer, returns
- * -1 to indicate end of input.
- */
- Peek(n = 0) {
- if (this.mOffset + n >= this.mCount) {
- return -1;
- }
- return this.mBuffer.charCodeAt(this.mOffset + n);
- },
-
- /**
- * Advance |this.mOffset| over |n| code units. Advance(0) is a no-op.
- * If |n| is greater than the distance to end of input, will silently
- * stop at the end. May not be used to advance over a line boundary;
- * AdvanceLine() must be used instead.
- */
- Advance(n = 1) {
- if (this.mOffset + n >= this.mCount || this.mOffset + n < this.mOffset) {
- this.mOffset = this.mCount;
- } else {
- this.mOffset += n;
- }
- },
-
- /**
- * Advance |this.mOffset| over a line boundary.
- */
- AdvanceLine() {
- // Advance over \r\n as a unit.
- if (
- this.mBuffer.charCodeAt(this.mOffset) == CARRIAGE_RETURN &&
- this.mOffset + 1 < this.mCount &&
- this.mBuffer.charCodeAt(this.mOffset + 1) == LINE_FEED
- ) {
- this.mOffset += 2;
- } else {
- this.mOffset += 1;
- }
- // 0 is a magical line number meaning that we don't know (i.e., script)
- if (this.mLineNumber != 0) {
- this.mLineNumber++;
- }
- this.mLineOffset = this.mOffset;
- },
-
- /**
- * Skip over a sequence of whitespace characters (vertical or
- * horizontal) starting at the current read position.
- */
- SkipWhitespace() {
- for (;;) {
- const ch = this.Peek();
- if (!IsWhitespace(ch)) {
- // EOF counts as non-whitespace
- break;
- }
- if (IsVertSpace(ch)) {
- this.AdvanceLine();
- } else {
- this.Advance();
- }
- }
- },
-
- /**
- * Skip over one CSS comment starting at the current read position.
- */
- SkipComment() {
- this.Advance(2);
- for (;;) {
- let ch = this.Peek();
- if (ch < 0) {
- this.SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
- return;
- }
- if (ch == ASTERISK) {
- this.Advance();
- ch = this.Peek();
- if (ch < 0) {
- this.SetEOFCharacters(eEOFCharacters_Slash);
- return;
- }
- if (ch == SOLIDUS) {
- this.Advance();
- return;
- }
- } else if (IsVertSpace(ch)) {
- this.AdvanceLine();
- } else {
- this.Advance();
- }
- }
- },
-
- /**
- * If there is a valid escape sequence starting at the current read
- * position, consume it, decode it, append the result to |aOutput|,
- * and return true. Otherwise, consume nothing, leave |aOutput|
- * unmodified, and return false. If |aInString| is true, accept the
- * additional form of escape sequence allowed within string-like tokens.
- */
- GatherEscape(aOutput, aInString) {
- let ch = this.Peek(1);
- if (ch < 0) {
- // If we are in a string (or a url() containing a string), we want to drop
- // the backslash on the floor. Otherwise, we want to treat it as a U+FFFD
- // character.
- this.Advance();
- if (aInString) {
- this.SetEOFCharacters(eEOFCharacters_DropBackslash);
- } else {
- aOutput.push(UCS2_REPLACEMENT_CHAR);
- this.SetEOFCharacters(eEOFCharacters_ReplacementChar);
- }
- return true;
- }
- if (IsVertSpace(ch)) {
- if (aInString) {
- // In strings (and in url() containing a string), escaped
- // newlines are completely removed, to allow splitting over
- // multiple lines.
- this.Advance();
- this.AdvanceLine();
- return true;
- }
- // Outside of strings, backslash followed by a newline is not an escape.
- return false;
- }
-
- if (!IsHexDigit(ch)) {
- // "Any character (except a hexadecimal digit, linefeed, carriage
- // return, or form feed) can be escaped with a backslash to remove
- // its special meaning." -- CSS2.1 section 4.1.3
- this.Advance(2);
- if (ch == 0) {
- aOutput.push(UCS2_REPLACEMENT_CHAR);
- } else {
- aOutput.push(ch);
- }
- return true;
- }
-
- // "[at most six hexadecimal digits following a backslash] stand
- // for the ISO 10646 character with that number, which must not be
- // zero. (It is undefined in CSS 2.1 what happens if a style sheet
- // does contain a character with Unicode codepoint zero.)"
- // -- CSS2.1 section 4.1.3
-
- // At this point we know we have \ followed by at least one
- // hexadecimal digit, therefore the escape sequence is valid and we
- // can go ahead and consume the backslash.
- this.Advance();
- let val = 0;
- let i = 0;
- do {
- val = val * 16 + HexDigitValue(ch);
- i++;
- this.Advance();
- ch = this.Peek();
- } while (i < 6 && IsHexDigit(ch));
-
- // "Interpret the hex digits as a hexadecimal number. If this
- // number is zero, or is greater than the maximum allowed
- // codepoint, return U+FFFD REPLACEMENT CHARACTER" -- CSS Syntax
- // Level 3
- if (val == 0) {
- aOutput.push(UCS2_REPLACEMENT_CHAR);
- } else {
- aOutput.push(ensureValidChar(val));
- }
-
- // Consume exactly one whitespace character after a
- // hexadecimal escape sequence.
- if (IsVertSpace(ch)) {
- this.AdvanceLine();
- } else if (IsHorzSpace(ch)) {
- this.Advance();
- }
- return true;
- },
-
- /**
- * Consume a run of "text" beginning with the current read position,
- * consisting of characters in the class |aClass| (which must be a
- * suitable argument to IsOpenCharClass) plus escape sequences.
- * Append the text to |aText|, after decoding escape sequences.
- *
- * Returns true if at least one character was appended to |aText|,
- * false otherwise.
- */
- GatherText(aClass, aText) {
- const start = this.mOffset;
- const inString = aClass == IS_STRING;
-
- for (;;) {
- // Consume runs of unescaped characters in one go.
- let n = this.mOffset;
- while (
- n < this.mCount &&
- IsOpenCharClass(this.mBuffer.charCodeAt(n), aClass)
- ) {
- n++;
- }
- if (n > this.mOffset) {
- const codes = stringToCodes(this.mBuffer.slice(this.mOffset, n));
- safeApply(Array.prototype.push, aText, codes);
- this.mOffset = n;
- }
- if (n == this.mCount) {
- break;
- }
-
- const ch = this.Peek();
- if (ch == 0) {
- this.Advance();
- aText.push(UCS2_REPLACEMENT_CHAR);
- continue;
- }
-
- if (ch != REVERSE_SOLIDUS) {
- break;
- }
- if (!this.GatherEscape(aText, inString)) {
- break;
- }
- }
-
- return this.mOffset > start;
- },
-
- /**
- * Scan an Ident token. This also handles Function and URL tokens,
- * both of which begin indistinguishably from an identifier. It can
- * produce a Symbol token when an apparent identifier actually led
- * into an invalid escape sequence.
- */
- ScanIdent(aToken) {
- if (!this.GatherText(IS_IDCHAR, aToken.mIdent)) {
- aToken.mSymbol = this.Peek();
- this.Advance();
- return true;
- }
-
- if (this.Peek() != LEFT_PARENTHESIS) {
- aToken.mType = eCSSToken_Ident;
- return true;
- }
-
- this.Advance();
- aToken.mType = eCSSToken_Function;
-
- const asString = String.fromCharCode.apply(null, aToken.mIdent);
- if (asString.toLowerCase() === "url") {
- this.NextURL(aToken);
- }
- return true;
- },
-
- /**
- * Scan an AtKeyword token. Also handles production of Symbol when
- * an '@' is not followed by an identifier.
- */
- ScanAtKeyword(aToken) {
- // Fall back for when '@' isn't followed by an identifier.
- aToken.mSymbol = COMMERCIAL_AT;
- this.Advance();
-
- const ch = this.Peek();
- if (StartsIdent(ch, this.Peek(1))) {
- if (this.GatherText(IS_IDCHAR, aToken.mIdent)) {
- aToken.mType = eCSSToken_AtKeyword;
- }
- }
- return true;
- },
-
- /**
- * Scan a Hash token. Handles the distinction between eCSSToken_ID
- * and eCSSToken_Hash, and handles production of Symbol when a '#'
- * is not followed by identifier characters.
- */
- ScanHash(aToken) {
- // Fall back for when '#' isn't followed by identifier characters.
- aToken.mSymbol = NUMBER_SIGN;
- this.Advance();
-
- const ch = this.Peek();
- if (IsIdentChar(ch) || ch == REVERSE_SOLIDUS) {
- const type = StartsIdent(ch, this.Peek(1))
- ? eCSSToken_ID
- : eCSSToken_Hash;
- aToken.mIdent.length = 0;
- if (this.GatherText(IS_IDCHAR, aToken.mIdent)) {
- aToken.mType = type;
- }
- }
-
- return true;
- },
-
- /**
- * Scan a Number, Percentage, or Dimension token (all of which begin
- * like a Number). Can produce a Symbol when a '.' is not followed by
- * digits, or when '+' or '-' are not followed by either a digit or a
- * '.' and then a digit. Can also produce a HTMLComment when it
- * encounters '-->'.
- */
- ScanNumber(aToken) {
- let c = this.Peek();
-
- // Sign of the mantissa (-1 or 1).
- const sign = c == HYPHEN_MINUS ? -1 : 1;
- // Absolute value of the integer part of the mantissa. This is a double so
- // we don't run into overflow issues for consumers that only care about our
- // floating-point value while still being able to express the full int32_t
- // range for consumers who want integers.
- let intPart = 0;
- // Fractional part of the mantissa. This is a double so that when
- // we convert to float at the end we'll end up rounding to nearest
- // float instead of truncating down (as we would if fracPart were
- // a float and we just effectively lost the last several digits).
- let fracPart = 0;
- // Absolute value of the power of 10 that we should multiply by
- // (only relevant for numbers in scientific notation). Has to be
- // a signed integer, because multiplication of signed by unsigned
- // converts the unsigned to signed, so if we plan to actually
- // multiply by expSign...
- let exponent = 0;
- // Sign of the exponent.
- let expSign = 1;
-
- aToken.mHasSign = c == PLUS_SIGN || c == HYPHEN_MINUS;
- if (aToken.mHasSign) {
- this.Advance();
- c = this.Peek();
- }
-
- let gotDot = c == FULL_STOP;
-
- if (!gotDot) {
- // Scan the integer part of the mantissa.
- do {
- intPart = 10 * intPart + DecimalDigitValue(c);
- this.Advance();
- c = this.Peek();
- } while (IsDigit(c));
-
- gotDot = c == FULL_STOP && IsDigit(this.Peek(1));
- }
-
- if (gotDot) {
- // Scan the fractional part of the mantissa.
- this.Advance();
- c = this.Peek();
- // Power of ten by which we need to divide our next digit
- let divisor = 10;
- do {
- fracPart += DecimalDigitValue(c) / divisor;
- divisor *= 10;
- this.Advance();
- c = this.Peek();
- } while (IsDigit(c));
- }
-
- let gotE = false;
- if (c == LATIN_SMALL_LETTER_E || c == LATIN_CAPITAL_LETTER_E) {
- const expSignChar = this.Peek(1);
- const nextChar = this.Peek(2);
- if (
- IsDigit(expSignChar) ||
- ((expSignChar == HYPHEN_MINUS || expSignChar == PLUS_SIGN) &&
- IsDigit(nextChar))
- ) {
- gotE = true;
- if (expSignChar == HYPHEN_MINUS) {
- expSign = -1;
- }
- this.Advance(); // consumes the E
- if (expSignChar == HYPHEN_MINUS || expSignChar == PLUS_SIGN) {
- this.Advance();
- c = nextChar;
- } else {
- c = expSignChar;
- }
- do {
- exponent = 10 * exponent + DecimalDigitValue(c);
- this.Advance();
- c = this.Peek();
- } while (IsDigit(c));
- }
- }
-
- let type = eCSSToken_Number;
-
- // Set mIntegerValid for all cases (except %, below) because we need
- // it for the "2n" in :nth-child(2n).
- aToken.mIntegerValid = false;
-
- // Time to reassemble our number.
- // Do all the math in double precision so it's truncated only once.
- let value = sign * (intPart + fracPart);
- if (gotE) {
- // Explicitly cast expSign*exponent to double to avoid issues with
- // overloaded pow() on Windows.
- value *= Math.pow(10.0, expSign * exponent);
- } else if (!gotDot) {
- // Clamp values outside of integer range.
- if (sign > 0) {
- aToken.mInteger = Math.min(intPart, Number.MAX_SAFE_INTEGER);
- } else {
- aToken.mInteger = Math.max(-intPart, Number.MIN_SAFE_INTEGER);
- }
- aToken.mIntegerValid = true;
- }
-
- const ident = aToken.mIdent;
-
- // Check for Dimension and Percentage tokens.
- if (c >= 0) {
- if (StartsIdent(c, this.Peek(1))) {
- if (this.GatherText(IS_IDCHAR, ident)) {
- type = eCSSToken_Dimension;
- }
- } else if (c == PERCENT_SIGN) {
- this.Advance();
- type = eCSSToken_Percentage;
- value = value / 100.0;
- aToken.mIntegerValid = false;
- }
- }
- aToken.mNumber = value;
- aToken.mType = type;
- return true;
- },
-
- /**
- * Scan a string constant ('foo' or "foo"). Will always produce
- * either a String or a Bad_String token; the latter occurs when the
- * close quote is missing. Always returns true (for convenience in Next()).
- */
- ScanString(aToken) {
- const aStop = this.Peek();
- aToken.mType = eCSSToken_String;
- aToken.mSymbol = aStop; // Remember how it's quoted.
- this.Advance();
-
- for (;;) {
- this.GatherText(IS_STRING, aToken.mIdent);
-
- const ch = this.Peek();
- if (ch == -1) {
- this.AddEOFCharacters(
- aStop == QUOTATION_MARK
- ? eEOFCharacters_DoubleQuote
- : eEOFCharacters_SingleQuote
- );
- break; // EOF ends a string token with no error.
- }
- if (ch == aStop) {
- this.Advance();
- break;
- }
- // Both " and ' are excluded from IS_STRING.
- if (ch == QUOTATION_MARK || ch == APOSTROPHE) {
- aToken.mIdent.push(ch);
- this.Advance();
- continue;
- }
-
- aToken.mType = eCSSToken_Bad_String;
- break;
- }
- return true;
- },
-
- /**
- * Scan a unicode-range token. These match the regular expression
- *
- * u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
- *
- * However, some such tokens are "invalid". There are three valid forms:
- *
- * u+[0-9a-f]{x} 1 <= x <= 6
- * u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
- * u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
- *
- * All unicode-range tokens have their text recorded in mIdent; valid ones
- * are also decoded into mInteger and mInteger2, and mIntegerValid is set.
- * Note that this does not validate the numeric range, only the syntactic
- * form.
- */
- ScanURange(aResult) {
- const intro1 = this.Peek();
- const intro2 = this.Peek(1);
- let ch = this.Peek(2);
-
- aResult.mIdent.push(intro1);
- aResult.mIdent.push(intro2);
- this.Advance(2);
-
- let valid = true;
- let haveQues = false;
- let low = 0;
- let high = 0;
- let i = 0;
-
- do {
- aResult.mIdent.push(ch);
- if (IsHexDigit(ch)) {
- if (haveQues) {
- valid = false; // All question marks should be at the end.
- }
- low = low * 16 + HexDigitValue(ch);
- high = high * 16 + HexDigitValue(ch);
- } else {
- haveQues = true;
- low = low * 16 + 0x0;
- high = high * 16 + 0xf;
- }
-
- i++;
- this.Advance();
- ch = this.Peek();
- } while (i < 6 && (IsHexDigit(ch) || ch == QUESTION_MARK));
-
- if (ch == HYPHEN_MINUS && IsHexDigit(this.Peek(1))) {
- if (haveQues) {
- valid = false;
- }
-
- aResult.mIdent.push(ch);
- this.Advance();
- ch = this.Peek();
- high = 0;
- i = 0;
- do {
- aResult.mIdent.push(ch);
- high = high * 16 + HexDigitValue(ch);
-
- i++;
- this.Advance();
- ch = this.Peek();
- } while (i < 6 && IsHexDigit(ch));
- }
-
- aResult.mInteger = low;
- aResult.mInteger2 = high;
- aResult.mIntegerValid = valid;
- aResult.mType = eCSSToken_URange;
- return true;
- },
-
- SetEOFCharacters(aEOFCharacters) {
- this.mEOFCharacters = aEOFCharacters;
- },
-
- AddEOFCharacters(aEOFCharacters) {
- this.mEOFCharacters = this.mEOFCharacters | aEOFCharacters;
- },
-
- AppendImpliedEOFCharacters(aEOFCharacters, aResult) {
- // First, ignore eEOFCharacters_DropBackslash.
- let c = aEOFCharacters >> 1;
+ // First, ignore EEOFCHARACTERS_DROPBACKSLASH.
+ let c = eofChars >> 1;
// All of the remaining EOFCharacters bits represent appended characters,
// and the bits are in the order that they need appending.
for (const p of kImpliedEOFCharacters) {
if (c & 1) {
- aResult.push(p);
+ result += String.fromCharCode(p);
}
c >>= 1;
}
- },
-
- /**
- * Consume the part of an URL token after the initial 'url('. Caller
- * is assumed to have consumed 'url(' already. Will always produce
- * either an URL or a Bad_URL token.
- *
- * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
- * the special lexical rules for URL tokens in a nonstandard context.
- */
- NextURL(aToken) {
- this.SkipWhitespace();
-
- // aToken.mIdent may be "url" at this point; clear that out
- aToken.mIdent.length = 0;
-
- let hasString = false;
- let ch = this.Peek();
- // Do we have a string?
- if (ch == QUOTATION_MARK || ch == APOSTROPHE) {
- this.ScanString(aToken);
- if (aToken.mType == eCSSToken_Bad_String) {
- aToken.mType = eCSSToken_Bad_URL;
- return;
- }
- hasString = true;
- } else {
- // Otherwise, this is the start of a non-quoted url (which may be empty).
- aToken.mSymbol = 0;
- this.GatherText(IS_URL_CHAR, aToken.mIdent);
- }
-
- // Consume trailing whitespace and then look for a close parenthesis.
- this.SkipWhitespace();
- ch = this.Peek();
- // ch can be less than zero indicating EOF
- if (ch < 0 || ch == RIGHT_PARENTHESIS) {
- this.Advance();
- aToken.mType = eCSSToken_URL;
- if (ch < 0) {
- this.AddEOFCharacters(eEOFCharacters_CloseParen);
- }
- } else {
- aToken.mType = eCSSToken_Bad_URL;
- if (!hasString) {
- // Consume until before the next right parenthesis, which follows
- // how <bad-url-token> is consumed in CSS Syntax 3 spec.
- // Note that, we only do this when "url(" is not followed by a
- // string, because in the spec, "url(" followed by a string is
- // handled as a url function rather than a <url-token>, so the
- // rest of content before ")" should be consumed in balance,
- // which will be done by the parser.
- // The closing ")" is not consumed here. It is left to the parser
- // so that the parser can handle both cases.
- do {
- if (IsVertSpace(ch)) {
- this.AdvanceLine();
- } else {
- this.Advance();
- }
- ch = this.Peek();
- } while (ch >= 0 && ch != RIGHT_PARENTHESIS);
- }
- }
- },
-
- /**
- * Primary scanner entry point. Consume one token and fill in
- * |aToken| accordingly. Will skip over any number of comments first,
- * and will also skip over rather than return whitespace and comment
- * tokens.
- *
- * Returns true if it successfully consumed a token, false if EOF has
- * been reached. Will always advance the current read position by at
- * least one character unless called when already at EOF.
- */
- Next(aToken) {
- // do this here so we don't have to do it in dozens of other places
- aToken.mIdent = [];
- aToken.mType = eCSSToken_Symbol;
-
- this.mTokenOffset = this.mOffset;
- this.mTokenLineOffset = this.mLineOffset;
- this.mTokenLineNumber = this.mLineNumber;
-
- const ch = this.Peek();
- if (IsWhitespace(ch)) {
- this.SkipWhitespace();
- aToken.mType = eCSSToken_Whitespace;
- return true;
- }
- if (
- ch == SOLIDUS && // !IsSVGMode() &&
- this.Peek(1) == ASTERISK
- ) {
- this.SkipComment();
- aToken.mType = eCSSToken_Comment;
- return true;
- }
-
- // EOF
- if (ch < 0) {
- return false;
- }
-
- // 'u' could be UNICODE-RANGE or an identifier-family token
- if (ch == LATIN_SMALL_LETTER_U || ch == LATIN_CAPITAL_LETTER_U) {
- const c2 = this.Peek(1);
- const c3 = this.Peek(2);
- if (c2 == PLUS_SIGN && (IsHexDigit(c3) || c3 == QUESTION_MARK)) {
- return this.ScanURange(aToken);
- }
- return this.ScanIdent(aToken);
- }
- // identifier family
- if (IsIdentStart(ch)) {
- return this.ScanIdent(aToken);
- }
-
- // number family
- if (IsDigit(ch)) {
- return this.ScanNumber(aToken);
- }
-
- if (ch == FULL_STOP && IsDigit(this.Peek(1))) {
- return this.ScanNumber(aToken);
- }
-
- if (ch == PLUS_SIGN) {
- const c2 = this.Peek(1);
- if (IsDigit(c2) || (c2 == FULL_STOP && IsDigit(this.Peek(2)))) {
- return this.ScanNumber(aToken);
- }
- }
-
- // HYPHEN_MINUS can start an identifier-family token, a number-family token,
- // or an HTML-comment
- if (ch == HYPHEN_MINUS) {
- const c2 = this.Peek(1);
- const c3 = this.Peek(2);
- if (IsIdentStart(c2) || (c2 == HYPHEN_MINUS && c3 != GREATER_THAN_SIGN)) {
- return this.ScanIdent(aToken);
- }
- if (IsDigit(c2) || (c2 == FULL_STOP && IsDigit(c3))) {
- return this.ScanNumber(aToken);
- }
- if (c2 == HYPHEN_MINUS && c3 == GREATER_THAN_SIGN) {
- this.Advance(3);
- aToken.mType = eCSSToken_HTMLComment;
- aToken.mIdent = stringToCodes("-->");
- return true;
- }
- }
-
- // the other HTML-comment token
- if (
- ch == LESS_THAN_SIGN &&
- this.Peek(1) == EXCLAMATION_MARK &&
- this.Peek(2) == HYPHEN_MINUS &&
- this.Peek(3) == HYPHEN_MINUS
- ) {
- this.Advance(4);
- aToken.mType = eCSSToken_HTMLComment;
- aToken.mIdent = stringToCodes("<!--");
- return true;
- }
-
- // AT_KEYWORD
- if (ch == COMMERCIAL_AT) {
- return this.ScanAtKeyword(aToken);
- }
-
- // HASH
- if (ch == NUMBER_SIGN) {
- return this.ScanHash(aToken);
- }
-
- // STRING
- if (ch == QUOTATION_MARK || ch == APOSTROPHE) {
- return this.ScanString(aToken);
- }
-
- // Match operators: ~= |= ^= $= *=
- const opType = MatchOperatorType(ch);
- if (opType != eCSSToken_Symbol && this.Peek(1) == EQUALS_SIGN) {
- aToken.mType = opType;
- this.Advance(2);
- return true;
- }
-
- // Otherwise, a symbol (DELIM).
- aToken.mSymbol = ch;
- this.Advance();
- return true;
- },
-};
-
-/**
- * Create and return a new CSS lexer.
- *
- * @param {String} input the CSS text to lex
- * @return {CSSLexer} the new lexer
- */
-function getCSSLexer(input) {
- return new Scanner(input);
+ return result;
+ }
}
-exports.getCSSLexer = getCSSLexer;
+exports.InspectorCSSParserWrapper = InspectorCSSParserWrapper;
diff --git a/devtools/shared/css/parsing-utils.js b/devtools/shared/css/parsing-utils.js
index 6234eb3255..bf16790d66 100644
--- a/devtools/shared/css/parsing-utils.js
+++ b/devtools/shared/css/parsing-utils.js
@@ -11,7 +11,9 @@
"use strict";
-const { getCSSLexer } = require("resource://devtools/shared/css/lexer.js");
+const {
+ InspectorCSSParserWrapper,
+} = require("resource://devtools/shared/css/lexer.js");
loader.lazyRequireGetter(
this,
@@ -23,7 +25,6 @@ loader.lazyRequireGetter(
const SELECTOR_ATTRIBUTE = (exports.SELECTOR_ATTRIBUTE = 1);
const SELECTOR_ELEMENT = (exports.SELECTOR_ELEMENT = 2);
const SELECTOR_PSEUDO_CLASS = (exports.SELECTOR_PSEUDO_CLASS = 3);
-const CSS_BLOCKS = { "(": ")", "[": "]" };
// When commenting out a declaration, we put this character into the
// comment opener so that future parses of the commented text know to
@@ -40,14 +41,14 @@ const COMMENT_PARSING_HEURISTIC_BYPASS_CHAR =
* @see CSSToken for details about the returned tokens
*/
function* cssTokenizer(string) {
- const lexer = getCSSLexer(string);
+ const lexer = new InspectorCSSParserWrapper(string);
while (true) {
const token = lexer.nextToken();
if (!token) {
break;
}
// None of the existing consumers want comments.
- if (token.tokenType !== "comment") {
+ if (token.tokenType !== "Comment") {
yield token;
}
}
@@ -73,7 +74,7 @@ function* cssTokenizer(string) {
* line and column information.
*/
function cssTokenizerWithLineColumn(string) {
- const lexer = getCSSLexer(string);
+ const lexer = new InspectorCSSParserWrapper(string);
const result = [];
let prevToken = undefined;
while (true) {
@@ -92,7 +93,7 @@ function cssTokenizerWithLineColumn(string) {
break;
}
- if (token.tokenType === "comment") {
+ if (token.tokenType === "Comment") {
// We've already dealt with the previous token's location.
prevToken = undefined;
} else {
@@ -296,7 +297,9 @@ function parseDeclarationsInternal(
throw new Error("empty input string");
}
- const lexer = getCSSLexer(inputString);
+ const lexer = new InspectorCSSParserWrapper(inputString, {
+ trackEOFChars: true,
+ });
let declarations = [getEmptyDeclaration()];
let lastProp = declarations[0];
@@ -340,7 +343,7 @@ function parseDeclarationsInternal(
// Update the start and end offsets of the declaration, but only
// when we see a significant token.
- if (token.tokenType !== "whitespace" && token.tokenType !== "comment") {
+ if (token.tokenType !== "WhiteSpace" && token.tokenType !== "Comment") {
if (lastProp.offsets[0] === undefined) {
lastProp.offsets[0] = token.startOffset;
}
@@ -361,9 +364,8 @@ function parseDeclarationsInternal(
if (
// If we're not already in a nested rule
!isInNested &&
- token.tokenType === "symbol" &&
// and there's an opening curly bracket
- token.text == "{" &&
+ token.tokenType === "CurlyBracketBlock" &&
// and we're not inside a function or an attribute
!currentBlocks.length
) {
@@ -373,13 +375,10 @@ function parseDeclarationsInternal(
continue;
} else if (isInNested) {
- if (token.tokenType === "symbol") {
- if (token.text == "{") {
- nestingLevel++;
- }
- if (token.text == "}") {
- nestingLevel--;
- }
+ if (token.tokenType == "CurlyBracketBlock") {
+ nestingLevel++;
+ } else if (token.tokenType == "CloseCurlyBracket") {
+ nestingLevel--;
}
// If we were in a nested rule, and we saw the last closing curly bracket,
@@ -392,21 +391,24 @@ function parseDeclarationsInternal(
}
continue;
} else if (
- token.tokenType === "symbol" &&
- CSS_BLOCKS[currentBlocks.at(-1)] === token.text
+ token.tokenType === "CloseParenthesis" ||
+ token.tokenType === "CloseSquareBracket"
) {
// Closing the last block that was opened.
currentBlocks.pop();
current += token.text;
- } else if (token.tokenType === "symbol" && CSS_BLOCKS[token.text]) {
+ } else if (
+ token.tokenType === "ParenthesisBlock" ||
+ token.tokenType === "SquareBracketBlock"
+ ) {
// Opening a new block.
currentBlocks.push(token.text);
current += token.text;
- } else if (token.tokenType === "function") {
+ } else if (token.tokenType === "Function") {
// Opening a function is like opening a new block, so push one to the stack.
currentBlocks.push("(");
- current += token.text + "(";
- } else if (token.tokenType === "symbol" && token.text === ":") {
+ current += token.text;
+ } else if (token.tokenType === "Colon") {
// Either way, a "!important" we've seen is no longer valid now.
importantState = 0;
importantWS = false;
@@ -432,11 +434,7 @@ function parseDeclarationsInternal(
// with colons)
current += ":";
}
- } else if (
- token.tokenType === "symbol" &&
- token.text === ";" &&
- !currentBlocks.length
- ) {
+ } else if (token.tokenType === "Semicolon" && !currentBlocks.length) {
lastProp.terminator = "";
// When parsing a comment, if the name hasn't been set, then we
// have probably just seen an ordinary semicolon used in text,
@@ -456,7 +454,7 @@ function parseDeclarationsInternal(
}
lastProp.value = cssTrim(current);
resetStateForNextDeclaration();
- } else if (token.tokenType === "ident") {
+ } else if (token.tokenType === "Ident") {
if (token.text === "important" && importantState === 1) {
importantState = 2;
} else {
@@ -471,17 +469,15 @@ function parseDeclarationsInternal(
importantState = 0;
importantWS = false;
}
- // Re-escape the token to avoid dequoting problems.
- // See bug 1287620.
- current += CSS.escape(token.text);
+ current += token.text;
}
- } else if (token.tokenType === "symbol" && token.text === "!") {
+ } else if (token.tokenType === "Delim" && token.text === "!") {
importantState = 1;
- } else if (token.tokenType === "whitespace") {
+ } else if (token.tokenType === "WhiteSpace") {
if (current !== "") {
current = current.trimEnd() + " ";
}
- } else if (token.tokenType === "comment") {
+ } else if (token.tokenType === "Comment") {
if (parseComments && !lastProp.name && !lastProp.value) {
const commentText = inputString.substring(
token.startOffset + 2,
@@ -536,7 +532,7 @@ function parseDeclarationsInternal(
current += "!";
}
lastProp.value = cssTrim(current);
- const terminator = lexer.performEOFFixup("", true);
+ const terminator = lexer.performEOFFixup("");
lastProp.terminator = terminator + ";";
// If the input was unterminated, attribute the remainder to
// this property. This avoids some bad behavior when rewriting
@@ -644,15 +640,17 @@ function parsePseudoClassesAndAttributes(value) {
throw new Error("empty input string");
}
- const tokens = cssTokenizer(value);
+ // See InspectorCSSToken dictionnary in InspectorUtils.webidl for more information
+ // about the tokens.
+ const tokensIterator = cssTokenizer(value);
const result = [];
let current = "";
let functionCount = 0;
let hasAttribute = false;
let hasColon = false;
- for (const token of tokens) {
- if (token.tokenType === "ident") {
+ for (const token of tokensIterator) {
+ if (token.tokenType === "Ident") {
current += value.substring(token.startOffset, token.endOffset);
if (hasColon && !functionCount) {
@@ -663,7 +661,7 @@ function parsePseudoClassesAndAttributes(value) {
current = "";
hasColon = false;
}
- } else if (token.tokenType === "symbol" && token.text === ":") {
+ } else if (token.tokenType === "Colon") {
if (!hasColon) {
if (current) {
result.push({ value: current, type: SELECTOR_ELEMENT });
@@ -674,10 +672,10 @@ function parsePseudoClassesAndAttributes(value) {
}
current += token.text;
- } else if (token.tokenType === "function") {
+ } else if (token.tokenType === "Function") {
current += value.substring(token.startOffset, token.endOffset);
functionCount++;
- } else if (token.tokenType === "symbol" && token.text === ")") {
+ } else if (token.tokenType === "CloseParenthesis") {
current += token.text;
if (hasColon && functionCount == 1) {
@@ -691,7 +689,7 @@ function parsePseudoClassesAndAttributes(value) {
} else {
functionCount--;
}
- } else if (token.tokenType === "symbol" && token.text === "[") {
+ } else if (token.tokenType === "SquareBracketBlock") {
if (!hasAttribute && !functionCount) {
if (current) {
result.push({ value: current, type: SELECTOR_ELEMENT });
@@ -702,7 +700,7 @@ function parsePseudoClassesAndAttributes(value) {
}
current += token.text;
- } else if (token.tokenType === "symbol" && token.text === "]") {
+ } else if (token.tokenType === "CloseSquareBracket") {
current += token.text;
if (hasAttribute && !functionCount) {