summaryrefslogtreecommitdiffstats
path: root/devtools/shared/css/lexer.js
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /devtools/shared/css/lexer.js
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'devtools/shared/css/lexer.js')
-rw-r--r--devtools/shared/css/lexer.js1522
1 files changed, 1522 insertions, 0 deletions
diff --git a/devtools/shared/css/lexer.js b/devtools/shared/css/lexer.js
new file mode 100644
index 0000000000..18e78717d1
--- /dev/null
+++ b/devtools/shared/css/lexer.js
@@ -0,0 +1,1522 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// A CSS Lexer. This file is a bit unusual -- it is a more or less
+// direct translation of layout/style/nsCSSScanner.cpp and
+// layout/style/CSSLexer.cpp into JS. This implemented the
+// CSSLexer.webidl interface, and the intent is to try to keep it in
+// sync with changes to the platform CSS lexer. Due to this goal,
+// this file violates some naming conventions and consequently locally
+// disables some eslint rules.
+
+/* eslint-disable camelcase, mozilla/no-aArgs, no-else-return, complexity */
+
+"use strict";
+
+// White space of any kind. No value fields are used. Note that
+// comments do *not* count as white space; comments separate tokens
+// but are not themselves tokens.
+const eCSSToken_Whitespace = "whitespace"; //
+// A comment.
+const eCSSToken_Comment = "comment"; // /*...*/
+
+// Identifier-like tokens. mIdent is the text of the identifier.
+// The difference between ID and Hash is: if the text after the #
+// would have been a valid Ident if the # hadn't been there, the
+// scanner produces an ID token. Otherwise it produces a Hash token.
+// (This distinction is required by css3-selectors.)
+const eCSSToken_Ident = "ident"; // word
+const eCSSToken_Function = "function"; // word(
+const eCSSToken_AtKeyword = "at"; // @word
+const eCSSToken_ID = "id"; // #word
+const eCSSToken_Hash = "hash"; // #0word
+
+// Numeric tokens. mNumber is the floating-point value of the
+// number, and mHasSign indicates whether there was an explicit sign
+// (+ or -) in front of the number. If mIntegerValid is true, the
+// number had the lexical form of an integer, and mInteger is its
+// integer value. Lexically integer values outside the range of a
+// 32-bit signed number are clamped to the maximum values; mNumber
+// will indicate a 'truer' value in that case. Percentage tokens
+// are always considered not to be integers, even if their numeric
+// value is integral (100% => mNumber = 1.0). For Dimension
+// tokens, mIdent holds the text of the unit.
+const eCSSToken_Number = "number"; // 1 -5 +2e3 3.14159 7.297352e-3
+const eCSSToken_Dimension = "dimension"; // 24px 8.5in
+const eCSSToken_Percentage = "percentage"; // 85% 1280.4%
+
+// String-like tokens. In all cases, mIdent holds the text
+// belonging to the string, and mSymbol holds the delimiter
+// character, which may be ', ", or zero (only for unquoted URLs).
+// Bad_String and Bad_URL tokens are emitted when the closing
+// delimiter or parenthesis was missing.
+const eCSSToken_String = "string"; // 'foo bar' "foo bar"
+const eCSSToken_Bad_String = "bad_string"; // 'foo bar
+const eCSSToken_URL = "url"; // url(foobar) url("foo bar")
+const eCSSToken_Bad_URL = "bad_url"; // url(foo
+
+// Any one-character symbol. mSymbol holds the character.
+const eCSSToken_Symbol = "symbol"; // . ; { } ! *
+
+// Match operators. These are single tokens rather than pairs of
+// Symbol tokens because css3-selectors forbids the presence of
+// comments between the two characters. No value fields are used;
+// the token type indicates which operator.
+const eCSSToken_Includes = "includes"; // ~=
+const eCSSToken_Dashmatch = "dashmatch"; // |=
+const eCSSToken_Beginsmatch = "beginsmatch"; // ^=
+const eCSSToken_Endsmatch = "endsmatch"; // $=
+const eCSSToken_Containsmatch = "containsmatch"; // *=
+
+// Unicode-range token: currently used only in @font-face.
+// The lexical rule for this token includes several forms that are
+// semantically invalid. Therefore, mIdent always holds the
+// complete original text of the token (so we can print it
+// accurately in diagnostics), and mIntegerValid is true iff the
+// token is semantically valid. In that case, mInteger holds the
+// lowest value included in the range, and mInteger2 holds the
+// highest value included in the range.
+const eCSSToken_URange = "urange"; // U+007e U+01?? U+2000-206F
+
+// HTML comment delimiters, ignored as a unit when they appear at
+// the top level of a style sheet, for compatibility with websites
+// written for compatibility with pre-CSS browsers. This token type
+// subsumes the css2.1 CDO and CDC tokens, which are always treated
+// the same by the parser. mIdent holds the text of the token, for
+// diagnostics.
+const eCSSToken_HTMLComment = "htmlcomment"; // <!-- -->
+
+const eEOFCharacters_None = 0x0000;
+
+// to handle \<EOF> inside strings
+const eEOFCharacters_DropBackslash = 0x0001;
+
+// to handle \<EOF> outside strings
+const eEOFCharacters_ReplacementChar = 0x0002;
+
+// to close comments
+const eEOFCharacters_Asterisk = 0x0004;
+const eEOFCharacters_Slash = 0x0008;
+
+// to close double-quoted strings
+const eEOFCharacters_DoubleQuote = 0x0010;
+
+// to close single-quoted strings
+const eEOFCharacters_SingleQuote = 0x0020;
+
+// to close URLs
+const eEOFCharacters_CloseParen = 0x0040;
+
+// Bridge the char/string divide.
+const APOSTROPHE = "'".charCodeAt(0);
+const ASTERISK = "*".charCodeAt(0);
+const CARRIAGE_RETURN = "\r".charCodeAt(0);
+const CIRCUMFLEX_ACCENT = "^".charCodeAt(0);
+const COMMERCIAL_AT = "@".charCodeAt(0);
+const DIGIT_NINE = "9".charCodeAt(0);
+const DIGIT_ZERO = "0".charCodeAt(0);
+const DOLLAR_SIGN = "$".charCodeAt(0);
+const EQUALS_SIGN = "=".charCodeAt(0);
+const EXCLAMATION_MARK = "!".charCodeAt(0);
+const FULL_STOP = ".".charCodeAt(0);
+const GREATER_THAN_SIGN = ">".charCodeAt(0);
+const HYPHEN_MINUS = "-".charCodeAt(0);
+const LATIN_CAPITAL_LETTER_E = "E".charCodeAt(0);
+const LATIN_CAPITAL_LETTER_U = "U".charCodeAt(0);
+const LATIN_SMALL_LETTER_E = "e".charCodeAt(0);
+const LATIN_SMALL_LETTER_U = "u".charCodeAt(0);
+const LEFT_PARENTHESIS = "(".charCodeAt(0);
+const LESS_THAN_SIGN = "<".charCodeAt(0);
+const LINE_FEED = "\n".charCodeAt(0);
+const NUMBER_SIGN = "#".charCodeAt(0);
+const PERCENT_SIGN = "%".charCodeAt(0);
+const PLUS_SIGN = "+".charCodeAt(0);
+const QUESTION_MARK = "?".charCodeAt(0);
+const QUOTATION_MARK = '"'.charCodeAt(0);
+const REVERSE_SOLIDUS = "\\".charCodeAt(0);
+const RIGHT_PARENTHESIS = ")".charCodeAt(0);
+const SOLIDUS = "/".charCodeAt(0);
+const TILDE = "~".charCodeAt(0);
+const VERTICAL_LINE = "|".charCodeAt(0);
+
+const UCS2_REPLACEMENT_CHAR = 0xfffd;
+
+const kImpliedEOFCharacters = [
+ UCS2_REPLACEMENT_CHAR,
+ ASTERISK,
+ SOLIDUS,
+ QUOTATION_MARK,
+ APOSTROPHE,
+ RIGHT_PARENTHESIS,
+ 0,
+];
+
+//
+const ARGS_LENGTH_MAX = 500 * 1000;
+
+/**
+ * Several methods in this helper can reach the 500000 limit for arguments in
+ * Firefox, see Bug 1414361.
+ *
+ * This will apply the provided method, on the provided scope with an array of
+ * arguments which can exceed the 500k limit supported by Firefox.
+ *
+ * In practice, the arguments array will be split in several chunks of 500k
+ * items maximum and each chunk will be applied separately.
+ *
+ * !! Note that if you are expecting to use the return value of the method, here
+ * we will return an array of each return value for each chunk. It will be up to
+ * the consumer to decide how to combine the results into a meaningful final
+ * result !!
+ *
+ * @param {Function} method
+ * The method to apply.
+ * @param {*} scope
+ * The scope ("this") to use when applying the method.
+ * @param {Array} args
+ * The array of arguments to apply.
+ *
+ * @returns {Array}
+ * The array of return values, one item for each chunk that had to be
+ * created.
+ */
+function safeApply(method, scope, args) {
+ let i = 0;
+ const res = [];
+ const length = args.length;
+ while (i < length) {
+ const _start = i;
+ i += ARGS_LENGTH_MAX;
+ res.push(method.apply(scope, args.slice(_start, i)));
+ }
+ return res;
+}
+
+/**
+ * Ensure that the character is valid. If it is valid, return it;
+ * otherwise, return the replacement character.
+ *
+ * @param {Number} c the character to check
+ * @return {Number} the character or its replacement
+ */
+function ensureValidChar(c) {
+ if (c >= 0x00110000 || (c & 0xfff800) == 0xd800) {
+ // Out of range or a surrogate.
+ return UCS2_REPLACEMENT_CHAR;
+ }
+ return c;
+}
+
+/**
+ * Turn a string into an array of character codes.
+ *
+ * @param {String} str the input string
+ * @return {Array} an array of character codes, one per character in
+ * the input string.
+ */
+function stringToCodes(str) {
+ // This is a hot path, and using a simple for loop is faster than any other mean (e.g.
+ // Array#map ).
+ const charCodes = [];
+ for (let i = 0; i < str.length; i++) {
+ charCodes.push(str.charCodeAt(i));
+ }
+ return charCodes;
+}
+
+const IS_HEX_DIGIT = 0x01;
+const IS_IDSTART = 0x02;
+const IS_IDCHAR = 0x04;
+const IS_URL_CHAR = 0x08;
+const IS_HSPACE = 0x10;
+const IS_VSPACE = 0x20;
+const IS_SPACE = IS_HSPACE | IS_VSPACE;
+const IS_STRING = 0x40;
+
+const H = IS_HSPACE;
+const V = IS_VSPACE;
+const I = IS_IDCHAR;
+const J = IS_IDSTART;
+const U = IS_URL_CHAR;
+const S = IS_STRING;
+const X = IS_HEX_DIGIT;
+
+const SH = S | H;
+const SU = S | U;
+const SUI = S | U | I;
+const SUIJ = S | U | I | J;
+const SUIX = S | U | I | X;
+const SUIJX = S | U | I | J | X;
+
+/* eslint-disable indent, indent-legacy, no-multi-spaces, comma-spacing, spaced-comment */
+const gLexTable = [
+ // 00 01 02 03 04 05 06 07
+ 0,
+ S,
+ S,
+ S,
+ S,
+ S,
+ S,
+ S,
+ // 08 TAB LF 0B FF CR 0E 0F
+ S,
+ SH,
+ V,
+ S,
+ V,
+ V,
+ S,
+ S,
+ // 10 11 12 13 14 15 16 17
+ S,
+ S,
+ S,
+ S,
+ S,
+ S,
+ S,
+ S,
+ // 18 19 1A 1B 1C 1D 1E 1F
+ S,
+ S,
+ S,
+ S,
+ S,
+ S,
+ S,
+ S,
+ //SPC ! " # $ % & '
+ SH,
+ SU,
+ 0,
+ SU,
+ SU,
+ SU,
+ SU,
+ 0,
+ // ( ) * + , - . /
+ S,
+ S,
+ SU,
+ SU,
+ SU,
+ SUI,
+ SU,
+ SU,
+ // 0 1 2 3 4 5 6 7
+ SUIX,
+ SUIX,
+ SUIX,
+ SUIX,
+ SUIX,
+ SUIX,
+ SUIX,
+ SUIX,
+ // 8 9 : ; < = > ?
+ SUIX,
+ SUIX,
+ SU,
+ SU,
+ SU,
+ SU,
+ SU,
+ SU,
+ // @ A B C D E F G
+ SU,
+ SUIJX,
+ SUIJX,
+ SUIJX,
+ SUIJX,
+ SUIJX,
+ SUIJX,
+ SUIJ,
+ // H I J K L M N O
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ // P Q R S T U V W
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ // X Y Z [ \ ] ^ _
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SU,
+ J,
+ SU,
+ SU,
+ SUIJ,
+ // ` a b c d e f g
+ SU,
+ SUIJX,
+ SUIJX,
+ SUIJX,
+ SUIJX,
+ SUIJX,
+ SUIJX,
+ SUIJ,
+ // h i j k l m n o
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ // p q r s t u v w
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ // x y z { | } ~ 7F
+ SUIJ,
+ SUIJ,
+ SUIJ,
+ SU,
+ SU,
+ SU,
+ SU,
+ S,
+];
+/* eslint-enable indent, indent-legacy, no-multi-spaces, comma-spacing, spaced-comment */
+
+/**
+ * True if 'ch' is in character class 'cls', which should be one of
+ * the constants above or some combination of them. All characters
+ * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
+ */
+function IsOpenCharClass(ch, cls) {
+ return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0);
+}
+
+/**
+ * True if 'ch' is in character class 'cls', which should be one of
+ * the constants above or some combination of them. No characters
+ * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
+ */
+function IsClosedCharClass(ch, cls) {
+ return ch >= 0 && ch < 128 && (gLexTable[ch] & cls) != 0;
+}
+
+/**
+ * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
+ * TAB, LF, FF, CR, or SPC.
+ */
+function IsWhitespace(ch) {
+ return IsClosedCharClass(ch, IS_SPACE);
+}
+
+/**
+ * True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
+ */
+function IsHorzSpace(ch) {
+ return IsClosedCharClass(ch, IS_HSPACE);
+}
+
+/**
+ * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR. Vertical
+ * whitespace requires special handling when consumed, see AdvanceLine.
+ */
+function IsVertSpace(ch) {
+ return IsClosedCharClass(ch, IS_VSPACE);
+}
+
+/**
+ * True if 'ch' is a character that can appear in the middle of an identifier.
+ * This includes U+0000 since it is handled as U+FFFD, but for purposes of
+ * GatherText it should not be included in IsOpenCharClass.
+ */
+function IsIdentChar(ch) {
+ return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
+}
+
+/**
+ * True if 'ch' is a character that by itself begins an identifier.
+ * This includes U+0000 since it is handled as U+FFFD, but for purposes of
+ * GatherText it should not be included in IsOpenCharClass.
+ * (This is a subset of IsIdentChar.)
+ */
+function IsIdentStart(ch) {
+ return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
+}
+
+/**
+ * True if the two-character sequence aFirstChar+aSecondChar begins an
+ * identifier.
+ */
+function StartsIdent(aFirstChar, aSecondChar) {
+ return (
+ IsIdentStart(aFirstChar) ||
+ (aFirstChar == HYPHEN_MINUS &&
+ (aSecondChar == HYPHEN_MINUS || IsIdentStart(aSecondChar)))
+ );
+}
+
+/**
+ * True if 'ch' is a decimal digit.
+ */
+function IsDigit(ch) {
+ return ch >= DIGIT_ZERO && ch <= DIGIT_NINE;
+}
+
+/**
+ * True if 'ch' is a hexadecimal digit.
+ */
+function IsHexDigit(ch) {
+ return IsClosedCharClass(ch, IS_HEX_DIGIT);
+}
+
+/**
+ * Assuming that 'ch' is a decimal digit, return its numeric value.
+ */
+function DecimalDigitValue(ch) {
+ return ch - DIGIT_ZERO;
+}
+
+/**
+ * Assuming that 'ch' is a hexadecimal digit, return its numeric value.
+ */
+function HexDigitValue(ch) {
+ if (IsDigit(ch)) {
+ return DecimalDigitValue(ch);
+ } else {
+ // Note: c&7 just keeps the low three bits which causes
+ // upper and lower case alphabetics to both yield their
+ // "relative to 10" value for computing the hex value.
+ return (ch & 0x7) + 9;
+ }
+}
+
+/**
+ * If 'ch' can be the first character of a two-character match operator
+ * token, return the token type code for that token, otherwise return
+ * eCSSToken_Symbol to indicate that it can't.
+ */
+function MatchOperatorType(ch) {
+ switch (ch) {
+ case TILDE:
+ return eCSSToken_Includes;
+ case VERTICAL_LINE:
+ return eCSSToken_Dashmatch;
+ case CIRCUMFLEX_ACCENT:
+ return eCSSToken_Beginsmatch;
+ case DOLLAR_SIGN:
+ return eCSSToken_Endsmatch;
+ case ASTERISK:
+ return eCSSToken_Containsmatch;
+ default:
+ return eCSSToken_Symbol;
+ }
+}
+
+function Scanner(buffer) {
+ this.mBuffer = buffer || "";
+ this.mOffset = 0;
+ this.mCount = this.mBuffer.length;
+ this.mLineNumber = 1;
+ this.mLineOffset = 0;
+ this.mTokenLineOffset = 0;
+ this.mTokenOffset = 0;
+ this.mTokenLineNumber = 1;
+ this.mEOFCharacters = eEOFCharacters_None;
+}
+
+Scanner.prototype = {
+ /**
+ * The line number of the most recently returned token. Line
+ * numbers are 0-based.
+ */
+ get lineNumber() {
+ return this.mTokenLineNumber - 1;
+ },
+
+ /**
+ * The column number of the most recently returned token. Column
+ * numbers are 0-based.
+ */
+ get columnNumber() {
+ return this.mTokenOffset - this.mTokenLineOffset;
+ },
+
+ /**
+ * When EOF is reached, the last token might be unterminated in some
+ * ways. This method takes an input string and appends the needed
+ * terminators. In particular:
+ *
+ * 1. If EOF occurs mid-string, this will append the correct quote.
+ * 2. If EOF occurs in a url token, this will append the close paren.
+ * 3. If EOF occurs in a comment this will append the comment closer.
+ *
+ * A trailing backslash might also have been present in the input
+ * string. This is handled in different ways, depending on the
+ * context and arguments.
+ *
+ * If preserveBackslash is true, then the existing backslash at the
+ * end of inputString is preserved, and a new backslash is appended.
+ * That is, the input |\| is transformed to |\\|, and the
+ * input |'\| is transformed to |'\\'|.
+ *
+ * Otherwise, preserveBackslash is false:
+ * If the backslash appears in a string context, then the trailing
+ * backslash is dropped from inputString. That is, |"\| is
+ * transformed to |""|.
+ * If the backslash appears outside of a string context, then
+ * U+FFFD is appended. That is, |\| is transformed to a string
+ * with two characters: backslash followed by U+FFFD.
+ *
+ * Passing false for preserveBackslash makes the result conform to
+ * the CSS Syntax specification. However, passing true may give
+ * somewhat more intuitive behavior.
+ *
+ * @param inputString the input string
+ * @param preserveBackslash how to handle trailing backslashes
+ * @return the input string with the termination characters appended
+ */
+ performEOFFixup(aInputString, aPreserveBackslash) {
+ let result = aInputString;
+
+ let eofChars = this.mEOFCharacters;
+
+ if (
+ aPreserveBackslash &&
+ (eofChars &
+ (eEOFCharacters_DropBackslash | eEOFCharacters_ReplacementChar)) !=
+ 0
+ ) {
+ eofChars &= ~(
+ eEOFCharacters_DropBackslash | eEOFCharacters_ReplacementChar
+ );
+ result += "\\";
+ }
+
+ if (
+ (eofChars & eEOFCharacters_DropBackslash) != 0 &&
+ !!result.length &&
+ result.endsWith("\\")
+ ) {
+ result = result.slice(0, -1);
+ }
+
+ const extra = [];
+ this.AppendImpliedEOFCharacters(eofChars, extra);
+ const asString = String.fromCharCode.apply(null, extra);
+
+ return result + asString;
+ },
+
+ /**
+ * Return the next token, or null at EOF.
+ *
+ * The token object is described by the following WebIDL definition:
+ *
+ * dictionary CSSToken {
+ * // The token type.
+ * CSSTokenType tokenType = "whitespace";
+ *
+ * // Offset of the first character of the token.
+ * unsigned long startOffset = 0;
+ * // Offset of the character after the final character of the token.
+ * // This is chosen so that the offsets can be passed to |substring|
+ * // to yield the exact contents of the token.
+ * unsigned long endOffset = 0;
+ *
+ * // If the token is a number, percentage, or dimension, this holds
+ * // the value. This is not present for other token types.
+ * double number;
+ * // If the token is a number, percentage, or dimension, this is true
+ * // iff the number had an explicit sign. This is not present for
+ * // other token types.
+ * boolean hasSign;
+ * // If the token is a number, percentage, or dimension, this is true
+ * // iff the number was specified as an integer. This is not present
+ * // for other token types.
+ * boolean isInteger;
+ *
+ * // Text associated with the token. This is not present for all
+ * // token types. In particular it is:
+ * //
+ * // Token type Meaning
+ * // ===============================
+ * // ident The identifier.
+ * // function The function name. Note that the "(" is part
+ * // of the token but is not present in |text|.
+ * // at The word.
+ * // id The word.
+ * // hash The word.
+ * // dimension The dimension.
+ * // string The string contents after escape processing.
+ * // bad_string Ditto.
+ * // url The URL after escape processing.
+ * // bad_url Ditto.
+ * // symbol The symbol text.
+ * DOMString text;
+ * };
+ */
+ nextToken() {
+ const token = {};
+ if (!this.Next(token)) {
+ return null;
+ }
+
+ const resultToken = {};
+ resultToken.tokenType = token.mType;
+ resultToken.startOffset = this.mTokenOffset;
+ resultToken.endOffset = this.mOffset;
+ const constructText = () => {
+ return safeApply(String.fromCharCode, null, token.mIdent).join("");
+ };
+
+ switch (token.mType) {
+ case eCSSToken_Whitespace:
+ break;
+
+ case eCSSToken_Ident:
+ case eCSSToken_Function:
+ case eCSSToken_AtKeyword:
+ case eCSSToken_ID:
+ case eCSSToken_Hash:
+ resultToken.text = constructText();
+ break;
+
+ case eCSSToken_Dimension:
+ resultToken.text = constructText();
+ /* Fall through. */
+ case eCSSToken_Number:
+ case eCSSToken_Percentage:
+ resultToken.number = token.mNumber;
+ resultToken.hasSign = token.mHasSign;
+ resultToken.isInteger = token.mIntegerValid;
+ break;
+
+ case eCSSToken_String:
+ case eCSSToken_Bad_String:
+ case eCSSToken_URL:
+ case eCSSToken_Bad_URL:
+ resultToken.text = constructText();
+ /* Don't bother emitting the delimiter, as it is readily extracted
+ from the source string when needed. */
+ break;
+
+ case eCSSToken_Symbol:
+ resultToken.text = String.fromCharCode(token.mSymbol);
+ break;
+
+ case eCSSToken_Includes:
+ case eCSSToken_Dashmatch:
+ case eCSSToken_Beginsmatch:
+ case eCSSToken_Endsmatch:
+ case eCSSToken_Containsmatch:
+ case eCSSToken_URange:
+ break;
+
+ case eCSSToken_Comment:
+ case eCSSToken_HTMLComment:
+ /* The comment text is easily extracted from the source string,
+ and is rarely useful. */
+ break;
+ }
+
+ return resultToken;
+ },
+
+ /**
+ * Return the raw UTF-16 code unit at position |this.mOffset + n| within
+ * the read buffer. If that is beyond the end of the buffer, returns
+ * -1 to indicate end of input.
+ */
+ Peek(n = 0) {
+ if (this.mOffset + n >= this.mCount) {
+ return -1;
+ }
+ return this.mBuffer.charCodeAt(this.mOffset + n);
+ },
+
+ /**
+ * Advance |this.mOffset| over |n| code units. Advance(0) is a no-op.
+ * If |n| is greater than the distance to end of input, will silently
+ * stop at the end. May not be used to advance over a line boundary;
+ * AdvanceLine() must be used instead.
+ */
+ Advance(n = 1) {
+ if (this.mOffset + n >= this.mCount || this.mOffset + n < this.mOffset) {
+ this.mOffset = this.mCount;
+ } else {
+ this.mOffset += n;
+ }
+ },
+
+ /**
+ * Advance |this.mOffset| over a line boundary.
+ */
+ AdvanceLine() {
+ // Advance over \r\n as a unit.
+ if (
+ this.mBuffer.charCodeAt(this.mOffset) == CARRIAGE_RETURN &&
+ this.mOffset + 1 < this.mCount &&
+ this.mBuffer.charCodeAt(this.mOffset + 1) == LINE_FEED
+ ) {
+ this.mOffset += 2;
+ } else {
+ this.mOffset += 1;
+ }
+ // 0 is a magical line number meaning that we don't know (i.e., script)
+ if (this.mLineNumber != 0) {
+ this.mLineNumber++;
+ }
+ this.mLineOffset = this.mOffset;
+ },
+
+ /**
+ * Skip over a sequence of whitespace characters (vertical or
+ * horizontal) starting at the current read position.
+ */
+ SkipWhitespace() {
+ for (;;) {
+ const ch = this.Peek();
+ if (!IsWhitespace(ch)) {
+ // EOF counts as non-whitespace
+ break;
+ }
+ if (IsVertSpace(ch)) {
+ this.AdvanceLine();
+ } else {
+ this.Advance();
+ }
+ }
+ },
+
+ /**
+ * Skip over one CSS comment starting at the current read position.
+ */
+ SkipComment() {
+ this.Advance(2);
+ for (;;) {
+ let ch = this.Peek();
+ if (ch < 0) {
+ this.SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
+ return;
+ }
+ if (ch == ASTERISK) {
+ this.Advance();
+ ch = this.Peek();
+ if (ch < 0) {
+ this.SetEOFCharacters(eEOFCharacters_Slash);
+ return;
+ }
+ if (ch == SOLIDUS) {
+ this.Advance();
+ return;
+ }
+ } else if (IsVertSpace(ch)) {
+ this.AdvanceLine();
+ } else {
+ this.Advance();
+ }
+ }
+ },
+
+ /**
+ * If there is a valid escape sequence starting at the current read
+ * position, consume it, decode it, append the result to |aOutput|,
+ * and return true. Otherwise, consume nothing, leave |aOutput|
+ * unmodified, and return false. If |aInString| is true, accept the
+ * additional form of escape sequence allowed within string-like tokens.
+ */
+ GatherEscape(aOutput, aInString) {
+ let ch = this.Peek(1);
+ if (ch < 0) {
+ // If we are in a string (or a url() containing a string), we want to drop
+ // the backslash on the floor. Otherwise, we want to treat it as a U+FFFD
+ // character.
+ this.Advance();
+ if (aInString) {
+ this.SetEOFCharacters(eEOFCharacters_DropBackslash);
+ } else {
+ aOutput.push(UCS2_REPLACEMENT_CHAR);
+ this.SetEOFCharacters(eEOFCharacters_ReplacementChar);
+ }
+ return true;
+ }
+ if (IsVertSpace(ch)) {
+ if (aInString) {
+ // In strings (and in url() containing a string), escaped
+ // newlines are completely removed, to allow splitting over
+ // multiple lines.
+ this.Advance();
+ this.AdvanceLine();
+ return true;
+ }
+ // Outside of strings, backslash followed by a newline is not an escape.
+ return false;
+ }
+
+ if (!IsHexDigit(ch)) {
+ // "Any character (except a hexadecimal digit, linefeed, carriage
+ // return, or form feed) can be escaped with a backslash to remove
+ // its special meaning." -- CSS2.1 section 4.1.3
+ this.Advance(2);
+ if (ch == 0) {
+ aOutput.push(UCS2_REPLACEMENT_CHAR);
+ } else {
+ aOutput.push(ch);
+ }
+ return true;
+ }
+
+ // "[at most six hexadecimal digits following a backslash] stand
+ // for the ISO 10646 character with that number, which must not be
+ // zero. (It is undefined in CSS 2.1 what happens if a style sheet
+ // does contain a character with Unicode codepoint zero.)"
+ // -- CSS2.1 section 4.1.3
+
+ // At this point we know we have \ followed by at least one
+ // hexadecimal digit, therefore the escape sequence is valid and we
+ // can go ahead and consume the backslash.
+ this.Advance();
+ let val = 0;
+ let i = 0;
+ do {
+ val = val * 16 + HexDigitValue(ch);
+ i++;
+ this.Advance();
+ ch = this.Peek();
+ } while (i < 6 && IsHexDigit(ch));
+
+ // "Interpret the hex digits as a hexadecimal number. If this
+ // number is zero, or is greater than the maximum allowed
+ // codepoint, return U+FFFD REPLACEMENT CHARACTER" -- CSS Syntax
+ // Level 3
+ if (val == 0) {
+ aOutput.push(UCS2_REPLACEMENT_CHAR);
+ } else {
+ aOutput.push(ensureValidChar(val));
+ }
+
+ // Consume exactly one whitespace character after a
+ // hexadecimal escape sequence.
+ if (IsVertSpace(ch)) {
+ this.AdvanceLine();
+ } else if (IsHorzSpace(ch)) {
+ this.Advance();
+ }
+ return true;
+ },
+
+ /**
+ * Consume a run of "text" beginning with the current read position,
+ * consisting of characters in the class |aClass| (which must be a
+ * suitable argument to IsOpenCharClass) plus escape sequences.
+ * Append the text to |aText|, after decoding escape sequences.
+ *
+ * Returns true if at least one character was appended to |aText|,
+ * false otherwise.
+ */
+ GatherText(aClass, aText) {
+ const start = this.mOffset;
+ const inString = aClass == IS_STRING;
+
+ for (;;) {
+ // Consume runs of unescaped characters in one go.
+ let n = this.mOffset;
+ while (
+ n < this.mCount &&
+ IsOpenCharClass(this.mBuffer.charCodeAt(n), aClass)
+ ) {
+ n++;
+ }
+ if (n > this.mOffset) {
+ const codes = stringToCodes(this.mBuffer.slice(this.mOffset, n));
+ safeApply(Array.prototype.push, aText, codes);
+ this.mOffset = n;
+ }
+ if (n == this.mCount) {
+ break;
+ }
+
+ const ch = this.Peek();
+ if (ch == 0) {
+ this.Advance();
+ aText.push(UCS2_REPLACEMENT_CHAR);
+ continue;
+ }
+
+ if (ch != REVERSE_SOLIDUS) {
+ break;
+ }
+ if (!this.GatherEscape(aText, inString)) {
+ break;
+ }
+ }
+
+ return this.mOffset > start;
+ },
+
+ /**
+ * Scan an Ident token. This also handles Function and URL tokens,
+ * both of which begin indistinguishably from an identifier. It can
+ * produce a Symbol token when an apparent identifier actually led
+ * into an invalid escape sequence.
+ */
+ ScanIdent(aToken) {
+ if (!this.GatherText(IS_IDCHAR, aToken.mIdent)) {
+ aToken.mSymbol = this.Peek();
+ this.Advance();
+ return true;
+ }
+
+ if (this.Peek() != LEFT_PARENTHESIS) {
+ aToken.mType = eCSSToken_Ident;
+ return true;
+ }
+
+ this.Advance();
+ aToken.mType = eCSSToken_Function;
+
+ const asString = String.fromCharCode.apply(null, aToken.mIdent);
+ if (asString.toLowerCase() === "url") {
+ this.NextURL(aToken);
+ }
+ return true;
+ },
+
+ /**
+ * Scan an AtKeyword token. Also handles production of Symbol when
+ * an '@' is not followed by an identifier.
+ */
+ ScanAtKeyword(aToken) {
+ // Fall back for when '@' isn't followed by an identifier.
+ aToken.mSymbol = COMMERCIAL_AT;
+ this.Advance();
+
+ const ch = this.Peek();
+ if (StartsIdent(ch, this.Peek(1))) {
+ if (this.GatherText(IS_IDCHAR, aToken.mIdent)) {
+ aToken.mType = eCSSToken_AtKeyword;
+ }
+ }
+ return true;
+ },
+
+ /**
+ * Scan a Hash token. Handles the distinction between eCSSToken_ID
+ * and eCSSToken_Hash, and handles production of Symbol when a '#'
+ * is not followed by identifier characters.
+ */
+ ScanHash(aToken) {
+ // Fall back for when '#' isn't followed by identifier characters.
+ aToken.mSymbol = NUMBER_SIGN;
+ this.Advance();
+
+ const ch = this.Peek();
+ if (IsIdentChar(ch) || ch == REVERSE_SOLIDUS) {
+ const type = StartsIdent(ch, this.Peek(1))
+ ? eCSSToken_ID
+ : eCSSToken_Hash;
+ aToken.mIdent.length = 0;
+ if (this.GatherText(IS_IDCHAR, aToken.mIdent)) {
+ aToken.mType = type;
+ }
+ }
+
+ return true;
+ },
+
+ /**
+ * Scan a Number, Percentage, or Dimension token (all of which begin
+ * like a Number). Can produce a Symbol when a '.' is not followed by
+ * digits, or when '+' or '-' are not followed by either a digit or a
+ * '.' and then a digit. Can also produce a HTMLComment when it
+ * encounters '-->'.
+ */
+ ScanNumber(aToken) {
+ let c = this.Peek();
+
+ // Sign of the mantissa (-1 or 1).
+ const sign = c == HYPHEN_MINUS ? -1 : 1;
+ // Absolute value of the integer part of the mantissa. This is a double so
+ // we don't run into overflow issues for consumers that only care about our
+ // floating-point value while still being able to express the full int32_t
+ // range for consumers who want integers.
+ let intPart = 0;
+ // Fractional part of the mantissa. This is a double so that when
+ // we convert to float at the end we'll end up rounding to nearest
+ // float instead of truncating down (as we would if fracPart were
+ // a float and we just effectively lost the last several digits).
+ let fracPart = 0;
+ // Absolute value of the power of 10 that we should multiply by
+ // (only relevant for numbers in scientific notation). Has to be
+ // a signed integer, because multiplication of signed by unsigned
+ // converts the unsigned to signed, so if we plan to actually
+ // multiply by expSign...
+ let exponent = 0;
+ // Sign of the exponent.
+ let expSign = 1;
+
+ aToken.mHasSign = c == PLUS_SIGN || c == HYPHEN_MINUS;
+ if (aToken.mHasSign) {
+ this.Advance();
+ c = this.Peek();
+ }
+
+ let gotDot = c == FULL_STOP;
+
+ if (!gotDot) {
+ // Scan the integer part of the mantissa.
+ do {
+ intPart = 10 * intPart + DecimalDigitValue(c);
+ this.Advance();
+ c = this.Peek();
+ } while (IsDigit(c));
+
+ gotDot = c == FULL_STOP && IsDigit(this.Peek(1));
+ }
+
+ if (gotDot) {
+ // Scan the fractional part of the mantissa.
+ this.Advance();
+ c = this.Peek();
+ // Power of ten by which we need to divide our next digit
+ let divisor = 10;
+ do {
+ fracPart += DecimalDigitValue(c) / divisor;
+ divisor *= 10;
+ this.Advance();
+ c = this.Peek();
+ } while (IsDigit(c));
+ }
+
+ let gotE = false;
+ if (c == LATIN_SMALL_LETTER_E || c == LATIN_CAPITAL_LETTER_E) {
+ const expSignChar = this.Peek(1);
+ const nextChar = this.Peek(2);
+ if (
+ IsDigit(expSignChar) ||
+ ((expSignChar == HYPHEN_MINUS || expSignChar == PLUS_SIGN) &&
+ IsDigit(nextChar))
+ ) {
+ gotE = true;
+ if (expSignChar == HYPHEN_MINUS) {
+ expSign = -1;
+ }
+ this.Advance(); // consumes the E
+ if (expSignChar == HYPHEN_MINUS || expSignChar == PLUS_SIGN) {
+ this.Advance();
+ c = nextChar;
+ } else {
+ c = expSignChar;
+ }
+ do {
+ exponent = 10 * exponent + DecimalDigitValue(c);
+ this.Advance();
+ c = this.Peek();
+ } while (IsDigit(c));
+ }
+ }
+
+ let type = eCSSToken_Number;
+
+ // Set mIntegerValid for all cases (except %, below) because we need
+ // it for the "2n" in :nth-child(2n).
+ aToken.mIntegerValid = false;
+
+ // Time to reassemble our number.
+ // Do all the math in double precision so it's truncated only once.
+ let value = sign * (intPart + fracPart);
+ if (gotE) {
+ // Explicitly cast expSign*exponent to double to avoid issues with
+ // overloaded pow() on Windows.
+ value *= Math.pow(10.0, expSign * exponent);
+ } else if (!gotDot) {
+ // Clamp values outside of integer range.
+ if (sign > 0) {
+ aToken.mInteger = Math.min(intPart, Number.MAX_SAFE_INTEGER);
+ } else {
+ aToken.mInteger = Math.max(-intPart, Number.MIN_SAFE_INTEGER);
+ }
+ aToken.mIntegerValid = true;
+ }
+
+ const ident = aToken.mIdent;
+
+ // Check for Dimension and Percentage tokens.
+ if (c >= 0) {
+ if (StartsIdent(c, this.Peek(1))) {
+ if (this.GatherText(IS_IDCHAR, ident)) {
+ type = eCSSToken_Dimension;
+ }
+ } else if (c == PERCENT_SIGN) {
+ this.Advance();
+ type = eCSSToken_Percentage;
+ value = value / 100.0;
+ aToken.mIntegerValid = false;
+ }
+ }
+ aToken.mNumber = value;
+ aToken.mType = type;
+ return true;
+ },
+
+ /**
+ * Scan a string constant ('foo' or "foo"). Will always produce
+ * either a String or a Bad_String token; the latter occurs when the
+ * close quote is missing. Always returns true (for convenience in Next()).
+ */
+ ScanString(aToken) {
+ const aStop = this.Peek();
+ aToken.mType = eCSSToken_String;
+ aToken.mSymbol = aStop; // Remember how it's quoted.
+ this.Advance();
+
+ for (;;) {
+ this.GatherText(IS_STRING, aToken.mIdent);
+
+ const ch = this.Peek();
+ if (ch == -1) {
+ this.AddEOFCharacters(
+ aStop == QUOTATION_MARK
+ ? eEOFCharacters_DoubleQuote
+ : eEOFCharacters_SingleQuote
+ );
+ break; // EOF ends a string token with no error.
+ }
+ if (ch == aStop) {
+ this.Advance();
+ break;
+ }
+ // Both " and ' are excluded from IS_STRING.
+ if (ch == QUOTATION_MARK || ch == APOSTROPHE) {
+ aToken.mIdent.push(ch);
+ this.Advance();
+ continue;
+ }
+
+ aToken.mType = eCSSToken_Bad_String;
+ break;
+ }
+ return true;
+ },
+
+ /**
+ * Scan a unicode-range token. These match the regular expression
+ *
+ * u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
+ *
+ * However, some such tokens are "invalid". There are three valid forms:
+ *
+ * u+[0-9a-f]{x} 1 <= x <= 6
+ * u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
+ * u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
+ *
+ * All unicode-range tokens have their text recorded in mIdent; valid ones
+ * are also decoded into mInteger and mInteger2, and mIntegerValid is set.
+ * Note that this does not validate the numeric range, only the syntactic
+ * form.
+ */
+ ScanURange(aResult) {
+ const intro1 = this.Peek();
+ const intro2 = this.Peek(1);
+ let ch = this.Peek(2);
+
+ aResult.mIdent.push(intro1);
+ aResult.mIdent.push(intro2);
+ this.Advance(2);
+
+ let valid = true;
+ let haveQues = false;
+ let low = 0;
+ let high = 0;
+ let i = 0;
+
+ do {
+ aResult.mIdent.push(ch);
+ if (IsHexDigit(ch)) {
+ if (haveQues) {
+ valid = false; // All question marks should be at the end.
+ }
+ low = low * 16 + HexDigitValue(ch);
+ high = high * 16 + HexDigitValue(ch);
+ } else {
+ haveQues = true;
+ low = low * 16 + 0x0;
+ high = high * 16 + 0xf;
+ }
+
+ i++;
+ this.Advance();
+ ch = this.Peek();
+ } while (i < 6 && (IsHexDigit(ch) || ch == QUESTION_MARK));
+
+ if (ch == HYPHEN_MINUS && IsHexDigit(this.Peek(1))) {
+ if (haveQues) {
+ valid = false;
+ }
+
+ aResult.mIdent.push(ch);
+ this.Advance();
+ ch = this.Peek();
+ high = 0;
+ i = 0;
+ do {
+ aResult.mIdent.push(ch);
+ high = high * 16 + HexDigitValue(ch);
+
+ i++;
+ this.Advance();
+ ch = this.Peek();
+ } while (i < 6 && IsHexDigit(ch));
+ }
+
+ aResult.mInteger = low;
+ aResult.mInteger2 = high;
+ aResult.mIntegerValid = valid;
+ aResult.mType = eCSSToken_URange;
+ return true;
+ },
+
+ SetEOFCharacters(aEOFCharacters) {
+ this.mEOFCharacters = aEOFCharacters;
+ },
+
+ AddEOFCharacters(aEOFCharacters) {
+ this.mEOFCharacters = this.mEOFCharacters | aEOFCharacters;
+ },
+
+ AppendImpliedEOFCharacters(aEOFCharacters, aResult) {
+ // First, ignore eEOFCharacters_DropBackslash.
+ let c = aEOFCharacters >> 1;
+
+ // All of the remaining EOFCharacters bits represent appended characters,
+ // and the bits are in the order that they need appending.
+ for (const p of kImpliedEOFCharacters) {
+ if (c & 1) {
+ aResult.push(p);
+ }
+ c >>= 1;
+ }
+ },
+
+ /**
+ * Consume the part of an URL token after the initial 'url('. Caller
+ * is assumed to have consumed 'url(' already. Will always produce
+ * either an URL or a Bad_URL token.
+ *
+ * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
+ * the special lexical rules for URL tokens in a nonstandard context.
+ */
+ NextURL(aToken) {
+ this.SkipWhitespace();
+
+ // aToken.mIdent may be "url" at this point; clear that out
+ aToken.mIdent.length = 0;
+
+ let hasString = false;
+ let ch = this.Peek();
+ // Do we have a string?
+ if (ch == QUOTATION_MARK || ch == APOSTROPHE) {
+ this.ScanString(aToken);
+ if (aToken.mType == eCSSToken_Bad_String) {
+ aToken.mType = eCSSToken_Bad_URL;
+ return;
+ }
+ hasString = true;
+ } else {
+ // Otherwise, this is the start of a non-quoted url (which may be empty).
+ aToken.mSymbol = 0;
+ this.GatherText(IS_URL_CHAR, aToken.mIdent);
+ }
+
+ // Consume trailing whitespace and then look for a close parenthesis.
+ this.SkipWhitespace();
+ ch = this.Peek();
+ // ch can be less than zero indicating EOF
+ if (ch < 0 || ch == RIGHT_PARENTHESIS) {
+ this.Advance();
+ aToken.mType = eCSSToken_URL;
+ if (ch < 0) {
+ this.AddEOFCharacters(eEOFCharacters_CloseParen);
+ }
+ } else {
+ aToken.mType = eCSSToken_Bad_URL;
+ if (!hasString) {
+ // Consume until before the next right parenthesis, which follows
+ // how <bad-url-token> is consumed in CSS Syntax 3 spec.
+ // Note that, we only do this when "url(" is not followed by a
+ // string, because in the spec, "url(" followed by a string is
+ // handled as a url function rather than a <url-token>, so the
+ // rest of content before ")" should be consumed in balance,
+ // which will be done by the parser.
+ // The closing ")" is not consumed here. It is left to the parser
+ // so that the parser can handle both cases.
+ do {
+ if (IsVertSpace(ch)) {
+ this.AdvanceLine();
+ } else {
+ this.Advance();
+ }
+ ch = this.Peek();
+ } while (ch >= 0 && ch != RIGHT_PARENTHESIS);
+ }
+ }
+ },
+
+ /**
+ * Primary scanner entry point. Consume one token and fill in
+ * |aToken| accordingly. Will skip over any number of comments first,
+ * and will also skip over rather than return whitespace and comment
+ * tokens, depending on the value of |aSkip|.
+ *
+ * Returns true if it successfully consumed a token, false if EOF has
+ * been reached. Will always advance the current read position by at
+ * least one character unless called when already at EOF.
+ */
+ Next(aToken, aSkip) {
+ // do this here so we don't have to do it in dozens of other places
+ aToken.mIdent = [];
+ aToken.mType = eCSSToken_Symbol;
+
+ this.mTokenOffset = this.mOffset;
+ this.mTokenLineOffset = this.mLineOffset;
+ this.mTokenLineNumber = this.mLineNumber;
+
+ const ch = this.Peek();
+ if (IsWhitespace(ch)) {
+ this.SkipWhitespace();
+ aToken.mType = eCSSToken_Whitespace;
+ return true;
+ }
+ if (
+ ch == SOLIDUS && // !IsSVGMode() &&
+ this.Peek(1) == ASTERISK
+ ) {
+ this.SkipComment();
+ aToken.mType = eCSSToken_Comment;
+ return true;
+ }
+
+ // EOF
+ if (ch < 0) {
+ return false;
+ }
+
+ // 'u' could be UNICODE-RANGE or an identifier-family token
+ if (ch == LATIN_SMALL_LETTER_U || ch == LATIN_CAPITAL_LETTER_U) {
+ const c2 = this.Peek(1);
+ const c3 = this.Peek(2);
+ if (c2 == PLUS_SIGN && (IsHexDigit(c3) || c3 == QUESTION_MARK)) {
+ return this.ScanURange(aToken);
+ }
+ return this.ScanIdent(aToken);
+ }
+
+ // identifier family
+ if (IsIdentStart(ch)) {
+ return this.ScanIdent(aToken);
+ }
+
+ // number family
+ if (IsDigit(ch)) {
+ return this.ScanNumber(aToken);
+ }
+
+ if (ch == FULL_STOP && IsDigit(this.Peek(1))) {
+ return this.ScanNumber(aToken);
+ }
+
+ if (ch == PLUS_SIGN) {
+ const c2 = this.Peek(1);
+ if (IsDigit(c2) || (c2 == FULL_STOP && IsDigit(this.Peek(2)))) {
+ return this.ScanNumber(aToken);
+ }
+ }
+
+ // HYPHEN_MINUS can start an identifier-family token, a number-family token,
+ // or an HTML-comment
+ if (ch == HYPHEN_MINUS) {
+ const c2 = this.Peek(1);
+ const c3 = this.Peek(2);
+ if (IsIdentStart(c2) || (c2 == HYPHEN_MINUS && c3 != GREATER_THAN_SIGN)) {
+ return this.ScanIdent(aToken);
+ }
+ if (IsDigit(c2) || (c2 == FULL_STOP && IsDigit(c3))) {
+ return this.ScanNumber(aToken);
+ }
+ if (c2 == HYPHEN_MINUS && c3 == GREATER_THAN_SIGN) {
+ this.Advance(3);
+ aToken.mType = eCSSToken_HTMLComment;
+ aToken.mIdent = stringToCodes("-->");
+ return true;
+ }
+ }
+
+ // the other HTML-comment token
+ if (
+ ch == LESS_THAN_SIGN &&
+ this.Peek(1) == EXCLAMATION_MARK &&
+ this.Peek(2) == HYPHEN_MINUS &&
+ this.Peek(3) == HYPHEN_MINUS
+ ) {
+ this.Advance(4);
+ aToken.mType = eCSSToken_HTMLComment;
+ aToken.mIdent = stringToCodes("<!--");
+ return true;
+ }
+
+ // AT_KEYWORD
+ if (ch == COMMERCIAL_AT) {
+ return this.ScanAtKeyword(aToken);
+ }
+
+ // HASH
+ if (ch == NUMBER_SIGN) {
+ return this.ScanHash(aToken);
+ }
+
+ // STRING
+ if (ch == QUOTATION_MARK || ch == APOSTROPHE) {
+ return this.ScanString(aToken);
+ }
+
+ // Match operators: ~= |= ^= $= *=
+ const opType = MatchOperatorType(ch);
+ if (opType != eCSSToken_Symbol && this.Peek(1) == EQUALS_SIGN) {
+ aToken.mType = opType;
+ this.Advance(2);
+ return true;
+ }
+
+ // Otherwise, a symbol (DELIM).
+ aToken.mSymbol = ch;
+ this.Advance();
+ return true;
+ },
+};
+
+/**
+ * Create and return a new CSS lexer.
+ *
+ * @param {String} input the CSS text to lex
+ * @return {CSSLexer} the new lexer
+ */
+function getCSSLexer(input) {
+ return new Scanner(input);
+}
+
+exports.getCSSLexer = getCSSLexer;