1 files changed, 1397 insertions, 0 deletions
diff --git a/third_party/rust/cssparser/src/tokenizer.rs b/third_party/rust/cssparser/src/tokenizer.rs
new file mode 100644
index 0000000000..62f3868362
--- /dev/null
+++ b/third_party/rust/cssparser/src/tokenizer.rs
@@ -0,0 +1,1397 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// https://drafts.csswg.org/css-syntax/#tokenization
+
+use self::Token::*;
+use crate::cow_rc_str::CowRcStr;
+use crate::parser::ParserState;
+use matches::matches;
+use std::char;
+use std::i32;
+use std::ops::Range;
+
+/// One of the pieces the CSS input is broken into.
+///
+/// Some components use `Cow` in order to borrow from the original input string
+/// and avoid allocating/copying when possible.
+#[derive(PartialEq, Debug, Clone)]
+pub enum Token<'a> {
+    /// A [`<ident-token>`](https://drafts.csswg.org/css-syntax/#ident-token-diagram)
+    Ident(CowRcStr<'a>),
+
+    /// A [`<at-keyword-token>`](https://drafts.csswg.org/css-syntax/#at-keyword-token-diagram)
+    ///
+    /// The value does not include the `@` marker.
+    AtKeyword(CowRcStr<'a>),
+
+    /// A [`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type flag set to "unrestricted"
+    ///
+    /// The value does not include the `#` marker.
+    Hash(CowRcStr<'a>),
+
+    /// A [`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type flag set to "id"
+    ///
+    /// The value does not include the `#` marker.
+    IDHash(CowRcStr<'a>), // Hash that is a valid ID selector.
+
+    /// A [`<string-token>`](https://drafts.csswg.org/css-syntax/#string-token-diagram)
+    ///
+    /// The value does not include the quotes.
+    QuotedString(CowRcStr<'a>),
+
+    /// A [`<url-token>`](https://drafts.csswg.org/css-syntax/#url-token-diagram)
+    ///
+    /// The value does not include the `url(` `)` markers.  Note that `url( <string-token> )` is represented by a
+    /// `Function` token.
+    UnquotedUrl(CowRcStr<'a>),
+
+    /// A `<delim-token>`
+    Delim(char),
+
+    /// A [`<number-token>`](https://drafts.csswg.org/css-syntax/#number-token-diagram)
+    Number {
+        /// Whether the number had a `+` or `-` sign.
+        ///
+        /// This is used is some cases like the <An+B> micro syntax. (See the `parse_nth` function.)
+        has_sign: bool,
+
+        /// The value as a float
+        value: f32,
+
+        /// If the origin source did not include a fractional part, the value as an integer.
+        int_value: Option<i32>,
+    },
+
+    /// A [`<percentage-token>`](https://drafts.csswg.org/css-syntax/#percentage-token-diagram)
+    Percentage {
+        /// Whether the number had a `+` or `-` sign.
+        has_sign: bool,
+
+        /// The value as a float, divided by 100 so that the nominal range is 0.0 to 1.0.
+        unit_value: f32,
+
+        /// If the origin source did not include a fractional part, the value as an integer.
+        /// It is **not** divided by 100.
+        int_value: Option<i32>,
+    },
+
+    /// A [`<dimension-token>`](https://drafts.csswg.org/css-syntax/#dimension-token-diagram)
+    Dimension {
+        /// Whether the number had a `+` or `-` sign.
+        ///
+        /// This is used is some cases like the <An+B> micro syntax. (See the `parse_nth` function.)
+        has_sign: bool,
+
+        /// The value as a float
+        value: f32,
+
+        /// If the origin source did not include a fractional part, the value as an integer.
+        int_value: Option<i32>,
+
+        /// The unit, e.g. "px" in `12px`
+        unit: CowRcStr<'a>,
+    },
+
+    /// A [`<whitespace-token>`](https://drafts.csswg.org/css-syntax/#whitespace-token-diagram)
+    WhiteSpace(&'a str),
+
+    /// A comment.
+    ///
+    /// The CSS Syntax spec does not generate tokens for comments,
+    /// But we do, because we can (borrowed &str makes it cheap).
+    ///
+    /// The value does not include the `/*` `*/` markers.
+    Comment(&'a str),
+
+    /// A `:` `<colon-token>`
+    Colon, // :
+
+    /// A `;` `<semicolon-token>`
+    Semicolon, // ;
+
+    /// A `,` `<comma-token>`
+    Comma, // ,
+
+    /// A `~=` [`<include-match-token>`](https://drafts.csswg.org/css-syntax/#include-match-token-diagram)
+    IncludeMatch,
+
+    /// A `|=` [`<dash-match-token>`](https://drafts.csswg.org/css-syntax/#dash-match-token-diagram)
+    DashMatch,
+
+    /// A `^=` [`<prefix-match-token>`](https://drafts.csswg.org/css-syntax/#prefix-match-token-diagram)
+    PrefixMatch,
+
+    /// A `$=` [`<suffix-match-token>`](https://drafts.csswg.org/css-syntax/#suffix-match-token-diagram)
+    SuffixMatch,
+
+    /// A `*=` [`<substring-match-token>`](https://drafts.csswg.org/css-syntax/#substring-match-token-diagram)
+    SubstringMatch,
+
+    /// A `<!--` [`<CDO-token>`](https://drafts.csswg.org/css-syntax/#CDO-token-diagram)
+    CDO,
+
+    /// A `-->` [`<CDC-token>`](https://drafts.csswg.org/css-syntax/#CDC-token-diagram)
+    CDC,
+
+    /// A [`<function-token>`](https://drafts.csswg.org/css-syntax/#function-token-diagram)
+    ///
+    /// The value (name) does not include the `(` marker.
+    Function(CowRcStr<'a>),
+
+    /// A `<(-token>`
+    ParenthesisBlock,
+
+    /// A `<[-token>`
+    SquareBracketBlock,
+
+    /// A `<{-token>`
+    CurlyBracketBlock,
+
+    /// A `<bad-url-token>`
+    ///
+    /// This token always indicates a parse error.
+    BadUrl(CowRcStr<'a>),
+
+    /// A `<bad-string-token>`
+    ///
+    /// This token always indicates a parse error.
+    BadString(CowRcStr<'a>),
+
+    /// A `<)-token>`
+    ///
+    /// When obtained from one of the `Parser::next*` methods,
+    /// this token is always unmatched and indicates a parse error.
+    CloseParenthesis,
+
+    /// A `<]-token>`
+    ///
+    /// When obtained from one of the `Parser::next*` methods,
+    /// this token is always unmatched and indicates a parse error.
+    CloseSquareBracket,
+
+    /// A `<}-token>`
+    ///
+    /// When obtained from one of the `Parser::next*` methods,
+    /// this token is always unmatched and indicates a parse error.
+    CloseCurlyBracket,
+}
+
+impl<'a> Token<'a> {
+    /// Return whether this token represents a parse error.
+    ///
+    /// `BadUrl` and `BadString` are tokenizer-level parse errors.
+    ///
+    /// `CloseParenthesis`, `CloseSquareBracket`, and `CloseCurlyBracket` are *unmatched*
+    /// and therefore parse errors when returned by one of the `Parser::next*` methods.
+    pub fn is_parse_error(&self) -> bool {
+        matches!(
+            *self,
+            BadUrl(_) | BadString(_) | CloseParenthesis | CloseSquareBracket | CloseCurlyBracket
+        )
+    }
+}
+
+#[derive(Clone)]
+pub struct Tokenizer<'a> {
+    input: &'a str,
+    /// Counted in bytes, not code points. From 0.
+    position: usize,
+    /// The position at the start of the current line; but adjusted to
+    /// ensure that computing the column will give the result in units
+    /// of UTF-16 characters.
+    current_line_start_position: usize,
+    current_line_number: u32,
+    var_or_env_functions: SeenStatus,
+    source_map_url: Option<&'a str>,
+    source_url: Option<&'a str>,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq)]
+enum SeenStatus {
+    DontCare,
+    LookingForThem,
+    SeenAtLeastOne,
+}
+
+impl<'a> Tokenizer<'a> {
+    #[inline]
+    pub fn new(input: &str) -> Tokenizer {
+        Tokenizer::with_first_line_number(input, 0)
+    }
+
+    #[inline]
+    pub fn with_first_line_number(input: &str, first_line_number: u32) -> Tokenizer {
+        Tokenizer {
+            input: input,
+            position: 0,
+            current_line_start_position: 0,
+            current_line_number: first_line_number,
+            var_or_env_functions: SeenStatus::DontCare,
+            source_map_url: None,
+            source_url: None,
+        }
+    }
+
+    #[inline]
+    pub fn look_for_var_or_env_functions(&mut self) {
+        self.var_or_env_functions = SeenStatus::LookingForThem;
+    }
+
+    #[inline]
+    pub fn seen_var_or_env_functions(&mut self) -> bool {
+        let seen = self.var_or_env_functions == SeenStatus::SeenAtLeastOne;
+        self.var_or_env_functions = SeenStatus::DontCare;
+        seen
+    }
+
+    #[inline]
+    pub fn see_function(&mut self, name: &str) {
+        if self.var_or_env_functions == SeenStatus::LookingForThem {
+            if name.eq_ignore_ascii_case("var") || name.eq_ignore_ascii_case("env") {
+                self.var_or_env_functions = SeenStatus::SeenAtLeastOne;
+            }
+        }
+    }
+
+    #[inline]
+    pub fn next(&mut self) -> Result<Token<'a>, ()> {
+        next_token(self)
+    }
+
+    #[inline]
+    pub fn position(&self) -> SourcePosition {
+        SourcePosition(self.position)
+    }
+
+    #[inline]
+    pub fn current_source_location(&self) -> SourceLocation {
+        SourceLocation {
+            line: self.current_line_number,
+            column: (self.position - self.current_line_start_position + 1) as u32,
+        }
+    }
+
+    #[inline]
+    pub fn current_source_map_url(&self) -> Option<&'a str> {
+        self.source_map_url
+    }
+
+    #[inline]
+    pub fn current_source_url(&self) -> Option<&'a str> {
+        self.source_url
+    }
+
+    #[inline]
+    pub fn state(&self) -> ParserState {
+        ParserState {
+            position: self.position,
+            current_line_start_position: self.current_line_start_position,
+            current_line_number: self.current_line_number,
+            at_start_of: None,
+        }
+    }
+
+    #[inline]
+    pub fn reset(&mut self, state: &ParserState) {
+        self.position = state.position;
+        self.current_line_start_position = state.current_line_start_position;
+        self.current_line_number = state.current_line_number;
+    }
+
+    #[inline]
+    pub fn slice_from(&self, start_pos: SourcePosition) -> &'a str {
+        &self.input[start_pos.0..self.position]
+    }
+
+    #[inline]
+    pub fn slice(&self, range: Range<SourcePosition>) -> &'a str {
+        &self.input[range.start.0..range.end.0]
+    }
+
+    pub fn current_source_line(&self) -> &'a str {
+        let current = self.position;
+        let start = self.input[0..current]
+            .rfind(|c| matches!(c, '\r' | '\n' | '\x0C'))
+            .map_or(0, |start| start + 1);
+        let end = self.input[current..]
+            .find(|c| matches!(c, '\r' | '\n' | '\x0C'))
+            .map_or(self.input.len(), |end| current + end);
+        &self.input[start..end]
+    }
+
+    #[inline]
+    pub fn next_byte(&self) -> Option<u8> {
+        if self.is_eof() {
+            None
+        } else {
+            Some(self.input.as_bytes()[self.position])
+        }
+    }
+
+    // If false, `tokenizer.next_char()` will not panic.
+    #[inline]
+    fn is_eof(&self) -> bool {
+        !self.has_at_least(0)
+    }
+
+    // If true, the input has at least `n` bytes left *after* the current one.
+    // That is, `tokenizer.char_at(n)` will not panic.
+    #[inline]
+    fn has_at_least(&self, n: usize) -> bool {
+        self.position + n < self.input.len()
+    }
+
+    // Advance over N bytes in the input.  This function can advance
+    // over ASCII bytes (excluding newlines), or UTF-8 sequence
+    // leaders (excluding leaders for 4-byte sequences).
+    #[inline]
+    pub fn advance(&mut self, n: usize) {
+        if cfg!(debug_assertions) {
+            // Each byte must either be an ASCII byte or a sequence
+            // leader, but not a 4-byte leader; also newlines are
+            // rejected.
+            for i in 0..n {
+                let b = self.byte_at(i);
+                debug_assert!(b.is_ascii() || (b & 0xF0 != 0xF0 && b & 0xC0 != 0x80));
+                debug_assert!(b != b'\r' && b != b'\n' && b != b'\x0C');
+            }
+        }
+        self.position += n
+    }
+
+    // Assumes non-EOF
+    #[inline]
+    fn next_byte_unchecked(&self) -> u8 {
+        self.byte_at(0)
+    }
+
+    #[inline]
+    fn byte_at(&self, offset: usize) -> u8 {
+        self.input.as_bytes()[self.position + offset]
+    }
+
+    // Advance over a single byte; the byte must be a UTF-8 sequence
+    // leader for a 4-byte sequence.
+    #[inline]
+    fn consume_4byte_intro(&mut self) {
+        debug_assert!(self.next_byte_unchecked() & 0xF0 == 0xF0);
+        // This takes two UTF-16 characters to represent, so we
+        // actually have an undercount.
+        self.current_line_start_position = self.current_line_start_position.wrapping_sub(1);
+        self.position += 1;
+    }
+
+    // Advance over a single byte; the byte must be a UTF-8
+    // continuation byte.
+    #[inline]
+    fn consume_continuation_byte(&mut self) {
+        debug_assert!(self.next_byte_unchecked() & 0xC0 == 0x80);
+        // Continuation bytes contribute to column overcount.  Note
+        // that due to the special case for the 4-byte sequence intro,
+        // we must use wrapping add here.
+        self.current_line_start_position = self.current_line_start_position.wrapping_add(1);
+        self.position += 1;
+    }
+
+    // Advance over any kind of byte, excluding newlines.
+    #[inline(never)]
+    fn consume_known_byte(&mut self, byte: u8) {
+        debug_assert!(byte != b'\r' && byte != b'\n' && byte != b'\x0C');
+        self.position += 1;
+        // Continuation bytes contribute to column overcount.
+        if byte & 0xF0 == 0xF0 {
+            // This takes two UTF-16 characters to represent, so we
+            // actually have an undercount.
+            self.current_line_start_position = self.current_line_start_position.wrapping_sub(1);
+        } else if byte & 0xC0 == 0x80 {
+            // Note that due to the special case for the 4-byte
+            // sequence intro, we must use wrapping add here.
+            self.current_line_start_position = self.current_line_start_position.wrapping_add(1);
+        }
+    }
+
+    #[inline]
+    fn next_char(&self) -> char {
+        self.input[self.position..].chars().next().unwrap()
+    }
+
+    // Given that a newline has been seen, advance over the newline
+    // and update the state.
+    #[inline]
+    fn consume_newline(&mut self) {
+        let byte = self.next_byte_unchecked();
+        debug_assert!(byte == b'\r' || byte == b'\n' || byte == b'\x0C');
+        self.position += 1;
+        if byte == b'\r' && self.next_byte() == Some(b'\n') {
+            self.position += 1;
+        }
+        self.current_line_start_position = self.position;
+        self.current_line_number += 1;
+    }
+
+    #[inline]
+    fn has_newline_at(&self, offset: usize) -> bool {
+        self.position + offset < self.input.len()
+            && matches!(self.byte_at(offset), b'\n' | b'\r' | b'\x0C')
+    }
+
+    #[inline]
+    fn consume_char(&mut self) -> char {
+        let c = self.next_char();
+        let len_utf8 = c.len_utf8();
+        self.position += len_utf8;
+        // Note that due to the special case for the 4-byte sequence
+        // intro, we must use wrapping add here.
+        self.current_line_start_position = self
+            .current_line_start_position
+            .wrapping_add(len_utf8 - c.len_utf16());
+        c
+    }
+
+    #[inline]
+    fn starts_with(&self, needle: &[u8]) -> bool {
+        self.input.as_bytes()[self.position..].starts_with(needle)
+    }
+
+    pub fn skip_whitespace(&mut self) {
+        while !self.is_eof() {
+            match_byte! { self.next_byte_unchecked(),
+                b' ' | b'\t' => {
+                    self.advance(1)
+                },
+                b'\n' | b'\x0C' | b'\r' => {
+                    self.consume_newline();
+                },
+                b'/' => {
+                    if self.starts_with(b"/*") {
+                        consume_comment(self);
+                    } else {
+                        return
+                    }
+                }
+                _ => {
+                    return
+                }
+            }
+        }
+    }
+
+    pub fn skip_cdc_and_cdo(&mut self) {
+        while !self.is_eof() {
+            match_byte! { self.next_byte_unchecked(),
+                b' ' | b'\t' => {
+                    self.advance(1)
+                },
+                b'\n' | b'\x0C' | b'\r' => {
+                    self.consume_newline();
+                },
+                b'/' => {
+                    if self.starts_with(b"/*") {
+                        consume_comment(self);
+                    } else {
+                        return
+                    }
+                }
+                b'<' => {
+                    if self.starts_with(b"<!--") {
+                        self.advance(4)
+                    } else {
+                        return
+                    }
+                }
+                b'-' => {
+                    if self.starts_with(b"-->") {
+                        self.advance(3)
+                    } else {
+                        return
+                    }
+                }
+                _ => {
+                    return
+                }
+            }
+        }
+    }
+}
+
+/// A position from the start of the input, counted in UTF-8 bytes.
+#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
+pub struct SourcePosition(pub(crate) usize);
+
+impl SourcePosition {
+    /// Returns the current byte index in the original input.
+    #[inline]
+    pub fn byte_index(&self) -> usize {
+        self.0
+    }
+}
+
+/// The line and column number for a given position within the input.
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+pub struct SourceLocation {
+    /// The line number, starting at 0 for the first line, unless `with_first_line_number` was used.
+    pub line: u32,
+
+    /// The column number within a line, starting at 1 for first the character of the line.
+    /// Column numbers are counted in UTF-16 code units.
+    pub column: u32,
+}
+
+fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
+    if tokenizer.is_eof() {
+        return Err(());
+    }
+    let b = tokenizer.next_byte_unchecked();
+    let token = match_byte! { b,
+        b' ' | b'\t' => {
+            consume_whitespace(tokenizer, false)
+        },
+        b'\n' | b'\x0C' | b'\r' => {
+            consume_whitespace(tokenizer, true)
+        },
+        b'"' => { consume_string(tokenizer, false) },
+        b'#' => {
+            tokenizer.advance(1);
+            if is_ident_start(tokenizer) { IDHash(consume_name(tokenizer)) }
+            else if !tokenizer.is_eof() && match tokenizer.next_byte_unchecked() {
+                // Any other valid case here already resulted in IDHash.
+                b'0'..=b'9' | b'-' => true,
+                _ => false,
+            } { Hash(consume_name(tokenizer)) }
+            else { Delim('#') }
+        },
+        b'$' => {
+            if tokenizer.starts_with(b"$=") { tokenizer.advance(2); SuffixMatch }
+            else { tokenizer.advance(1); Delim('$') }
+        },
+        b'\'' => { consume_string(tokenizer, true) },
+        b'(' => { tokenizer.advance(1); ParenthesisBlock },
+        b')' => { tokenizer.advance(1); CloseParenthesis },
+        b'*' => {
+            if tokenizer.starts_with(b"*=") { tokenizer.advance(2); SubstringMatch }
+            else { tokenizer.advance(1); Delim('*') }
+        },
+        b'+' => {
+            if (
+                tokenizer.has_at_least(1)
+                && matches!(tokenizer.byte_at(1), b'0'..=b'9')
+            ) || (
+                tokenizer.has_at_least(2)
+                && tokenizer.byte_at(1) == b'.'
+                && matches!(tokenizer.byte_at(2), b'0'..=b'9')
+            ) {
+                consume_numeric(tokenizer)
+            } else {
+                tokenizer.advance(1);
+                Delim('+')
+            }
+        },
+        b',' => { tokenizer.advance(1); Comma },
+        b'-' => {
+            if (
+                tokenizer.has_at_least(1)
+                && matches!(tokenizer.byte_at(1), b'0'..=b'9')
+            ) || (
+                tokenizer.has_at_least(2)
+                && tokenizer.byte_at(1) == b'.'
+                && matches!(tokenizer.byte_at(2), b'0'..=b'9')
+            ) {
+                consume_numeric(tokenizer)
+            } else if tokenizer.starts_with(b"-->") {
+                tokenizer.advance(3);
+                CDC
+            } else if is_ident_start(tokenizer) {
+                consume_ident_like(tokenizer)
+            } else {
+                tokenizer.advance(1);
+                Delim('-')
+            }
+        },
+        b'.' => {
+            if tokenizer.has_at_least(1)
+                && matches!(tokenizer.byte_at(1), b'0'..=b'9'
+            ) {
+                consume_numeric(tokenizer)
+            } else {
+                tokenizer.advance(1);
+                Delim('.')
+            }
+        }
+        b'/' => {
+            if tokenizer.starts_with(b"/*") {
+                Comment(consume_comment(tokenizer))
+            } else {
+                tokenizer.advance(1);
+                Delim('/')
+            }
+        }
+        b'0'..=b'9' => { consume_numeric(tokenizer) },
+        b':' => { tokenizer.advance(1); Colon },
+        b';' => { tokenizer.advance(1); Semicolon },
+        b'<' => {
+            if tokenizer.starts_with(b"<!--") {
+                tokenizer.advance(4);
+                CDO
+            } else {
+                tokenizer.advance(1);
+                Delim('<')
+            }
+        },
+        b'@' => {
+            tokenizer.advance(1);
+            if is_ident_start(tokenizer) { AtKeyword(consume_name(tokenizer)) }
+            else { Delim('@') }
+        },
+        b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => { consume_ident_like(tokenizer) },
+        b'[' => { tokenizer.advance(1); SquareBracketBlock },
+        b'\\' => {
+            if !tokenizer.has_newline_at(1) { consume_ident_like(tokenizer) }
+            else { tokenizer.advance(1); Delim('\\') }
+        },
+        b']' => { tokenizer.advance(1); CloseSquareBracket },
+        b'^' => {
+            if tokenizer.starts_with(b"^=") { tokenizer.advance(2); PrefixMatch }
+            else { tokenizer.advance(1); Delim('^') }
+        },
+        b'{' => { tokenizer.advance(1); CurlyBracketBlock },
+        b'|' => {
+            if tokenizer.starts_with(b"|=") { tokenizer.advance(2); DashMatch }
+            else { tokenizer.advance(1); Delim('|') }
+        },
+        b'}' => { tokenizer.advance(1); CloseCurlyBracket },
+        b'~' => {
+            if tokenizer.starts_with(b"~=") { tokenizer.advance(2); IncludeMatch }
+            else { tokenizer.advance(1); Delim('~') }
+        },
+        _ => {
+            if !b.is_ascii() {
+                consume_ident_like(tokenizer)
+            } else {
+                tokenizer.advance(1);
+                Delim(b as char)
+            }
+        },
+    };
+    Ok(token)
+}
+
+fn consume_whitespace<'a>(tokenizer: &mut Tokenizer<'a>, newline: bool) -> Token<'a> {
+    let start_position = tokenizer.position();
+    if newline {
+        tokenizer.consume_newline();
+    } else {
+        tokenizer.advance(1);
+    }
+    while !tokenizer.is_eof() {
+        let b = tokenizer.next_byte_unchecked();
+        match_byte! { b,
+            b' ' | b'\t' => {
+                tokenizer.advance(1);
+            }
+            b'\n' | b'\x0C' | b'\r' => {
+                tokenizer.consume_newline();
+            }
+            _ => {
+                break
+            }
+        }
+    }
+    WhiteSpace(tokenizer.slice_from(start_position))
+}
+
+// Check for sourceMappingURL or sourceURL comments and update the
+// tokenizer appropriately.
+fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) {
+    let directive = "# sourceMappingURL=";
+    let directive_old = "@ sourceMappingURL=";
+
+    // If there is a source map directive, extract the URL.
+    if contents.starts_with(directive) || contents.starts_with(directive_old) {
+        let contents = &contents[directive.len()..];
+        tokenizer.source_map_url = contents
+            .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n')
+            .next()
+    }
+
+    let directive = "# sourceURL=";
+    let directive_old = "@ sourceURL=";
+
+    // If there is a source map directive, extract the URL.
+    if contents.starts_with(directive) || contents.starts_with(directive_old) {
+        let contents = &contents[directive.len()..];
+        tokenizer.source_url = contents
+            .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n')
+            .next()
+    }
+}
+
+fn consume_comment<'a>(tokenizer: &mut Tokenizer<'a>) -> &'a str {
+    tokenizer.advance(2); // consume "/*"
+    let start_position = tokenizer.position();
+    while !tokenizer.is_eof() {
+        match_byte! { tokenizer.next_byte_unchecked(),
+            b'*' => {
+                let end_position = tokenizer.position();
+                tokenizer.advance(1);
+                if tokenizer.next_byte() == Some(b'/') {
+                    tokenizer.advance(1);
+                    let contents = tokenizer.slice(start_position..end_position);
+                    check_for_source_map(tokenizer, contents);
+                    return contents
+                }
+            }
+            b'\n' | b'\x0C' | b'\r' => {
+                tokenizer.consume_newline();
+            }
+            b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
+            b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
+            _ => {
+                // ASCII or other leading byte.
+                tokenizer.advance(1);
+            }
+        }
+    }
+    let contents = tokenizer.slice_from(start_position);
+    check_for_source_map(tokenizer, contents);
+    contents
+}
+
+fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> {
+    match consume_quoted_string(tokenizer, single_quote) {
+        Ok(value) => QuotedString(value),
+        Err(value) => BadString(value),
+    }
+}
+
+/// Return `Err(())` on syntax error (ie. unescaped newline)
+fn consume_quoted_string<'a>(
+    tokenizer: &mut Tokenizer<'a>,
+    single_quote: bool,
+) -> Result<CowRcStr<'a>, CowRcStr<'a>> {
+    tokenizer.advance(1); // Skip the initial quote
+                          // start_pos is at code point boundary, after " or '
+    let start_pos = tokenizer.position();
+    let mut string_bytes;
+    loop {
+        if tokenizer.is_eof() {
+            return Ok(tokenizer.slice_from(start_pos).into());
+        }
+        match_byte! { tokenizer.next_byte_unchecked(),
+            b'"' => {
+                if !single_quote {
+                    let value = tokenizer.slice_from(start_pos);
+                    tokenizer.advance(1);
+                    return Ok(value.into())
+                }
+                tokenizer.advance(1);
+            }
+            b'\'' => {
+                if single_quote {
+                    let value = tokenizer.slice_from(start_pos);
+                    tokenizer.advance(1);
+                    return Ok(value.into())
+                }
+                tokenizer.advance(1);
+            }
+            b'\\' | b'\0' => {
+                // * The tokenizer’s input is UTF-8 since it’s `&str`.
+                // * start_pos is at a code point boundary
+                // * so is the current position (which is before '\\' or '\0'
+                //
+                // So `string_bytes` is well-formed UTF-8.
+                string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
+                break
+            }
+            b'\n' | b'\r' | b'\x0C' => {
+                return Err(tokenizer.slice_from(start_pos).into())
+            },
+            b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
+            b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
+            _ => {
+                // ASCII or other leading byte.
+                tokenizer.advance(1);
+            }
+        }
+    }
+
+    while !tokenizer.is_eof() {
+        let b = tokenizer.next_byte_unchecked();
+        match_byte! { b,
+            b'\n' | b'\r' | b'\x0C' => {
+                return Err(
+                    // string_bytes is well-formed UTF-8, see other comments.
+                    unsafe {
+                        from_utf8_release_unchecked(string_bytes)
+                    }.into()
+                );
+            }
+            b'"' => {
+                tokenizer.advance(1);
+                if !single_quote {
+                    break;
+                }
+            }
+            b'\'' => {
+                tokenizer.advance(1);
+                if single_quote {
+                    break;
+                }
+            }
+            b'\\' => {
+                tokenizer.advance(1);
+                if !tokenizer.is_eof() {
+                    match tokenizer.next_byte_unchecked() {
+                        // Escaped newline
+                        b'\n' | b'\x0C' | b'\r' => {
+                            tokenizer.consume_newline();
+                        }
+                        // This pushes one well-formed code point
+                        _ => consume_escape_and_write(tokenizer, &mut string_bytes)
+                    }
+                }
+                // else: escaped EOF, do nothing.
+                continue;
+            }
+            b'\0' => {
+                tokenizer.advance(1);
+                string_bytes.extend("\u{FFFD}".as_bytes());
+                continue;
+            }
+            b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
+            b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
+            _ => {
+                // ASCII or other leading byte.
+                tokenizer.advance(1);
+            },
+        }
+
+        // If this byte is part of a multi-byte code point,
+        // we’ll end up copying the whole code point before this loop does something else.
+        string_bytes.push(b);
+    }
+
+    Ok(
+        // string_bytes is well-formed UTF-8, see other comments.
+        unsafe { from_utf8_release_unchecked(string_bytes) }.into(),
+    )
+}
+
+#[inline]
+fn is_ident_start(tokenizer: &mut Tokenizer) -> bool {
+    !tokenizer.is_eof()
+        && match_byte! { tokenizer.next_byte_unchecked(),
+            b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => { true },
+            b'-' => {
+                tokenizer.has_at_least(1) && match_byte! { tokenizer.byte_at(1),
+                    b'a'..=b'z' | b'A'..=b'Z' | b'-' | b'_' | b'\0' => {
+                        true
+                    }
+                    b'\\' => { !tokenizer.has_newline_at(1) }
+                    b => { !b.is_ascii() },
+                }
+            },
+            b'\\' => { !tokenizer.has_newline_at(1) },
+            b => { !b.is_ascii() },
+        }
+}
+
+fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
+    let value = consume_name(tokenizer);
+    if !tokenizer.is_eof() && tokenizer.next_byte_unchecked() == b'(' {
+        tokenizer.advance(1);
+        if value.eq_ignore_ascii_case("url") {
+            consume_unquoted_url(tokenizer).unwrap_or(Function(value))
+        } else {
+            tokenizer.see_function(&value);
+            Function(value)
+        }
+    } else {
+        Ident(value)
+    }
+}
+
+fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> {
+    // start_pos is the end of the previous token, therefore at a code point boundary
+    let start_pos = tokenizer.position();
+    let mut value_bytes;
+    loop {
+        if tokenizer.is_eof() {
+            return tokenizer.slice_from(start_pos).into();
+        }
+        match_byte! { tokenizer.next_byte_unchecked(),
+            b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => { tokenizer.advance(1) },
+            b'\\' | b'\0' => {
+                // * The tokenizer’s input is UTF-8 since it’s `&str`.
+                // * start_pos is at a code point boundary
+                // * so is the current position (which is before '\\' or '\0'
+                //
+                // So `value_bytes` is well-formed UTF-8.
+                value_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
+                break
+            }
+            b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
+            b'\xC0'..=b'\xEF' => { tokenizer.advance(1); }
+            b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
+            _b => {
+                return tokenizer.slice_from(start_pos).into();
+            }
+        }
+    }
+
+    while !tokenizer.is_eof() {
+        let b = tokenizer.next_byte_unchecked();
+        match_byte! { b,
+            b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-'  => {
+                tokenizer.advance(1);
+                value_bytes.push(b)  // ASCII
+            }
+            b'\\' => {
+                if tokenizer.has_newline_at(1) { break }
+                tokenizer.advance(1);
+                // This pushes one well-formed code point
+                consume_escape_and_write(tokenizer, &mut value_bytes)
+            }
+            b'\0' => {
+                tokenizer.advance(1);
+                value_bytes.extend("\u{FFFD}".as_bytes());
+            },
+            b'\x80'..=b'\xBF' => {
+                // This byte *is* part of a multi-byte code point,
+                // we’ll end up copying the whole code point before this loop does something else.
+                tokenizer.consume_continuation_byte();
+                value_bytes.push(b)
+            }
+            b'\xC0'..=b'\xEF' => {
+                // This byte *is* part of a multi-byte code point,
+                // we’ll end up copying the whole code point before this loop does something else.
+                tokenizer.advance(1);
+                value_bytes.push(b)
+            }
+            b'\xF0'..=b'\xFF' => {
+                tokenizer.consume_4byte_intro();
+                value_bytes.push(b)
+            }
+            _ => {
+                // ASCII
+                break;
+            }
+        }
+    }
+    // string_bytes is well-formed UTF-8, see other comments.
+    unsafe { from_utf8_release_unchecked(value_bytes) }.into()
+}
+
+fn byte_to_hex_digit(b: u8) -> Option<u32> {
+    Some(match_byte! { b,
+        b'0' ..= b'9' => { b - b'0' },
+        b'a' ..= b'f' => { b - b'a' + 10 },
+        b'A' ..= b'F' => { b - b'A' + 10 },
+        _ => {
+            return None
+        }
+    } as u32)
+}
+
+fn byte_to_decimal_digit(b: u8) -> Option<u32> {
+    if b >= b'0' && b <= b'9' {
+        Some((b - b'0') as u32)
+    } else {
+        None
+    }
+}
+
+fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
+    // Parse [+-]?\d*(\.\d+)?([eE][+-]?\d+)?
+    // But this is always called so that there is at least one digit in \d*(\.\d+)?
+
+    // Do all the math in f64 so that large numbers overflow to +/-inf
+    // and i32::{MIN, MAX} are within range.
+
+    let (has_sign, sign) = match tokenizer.next_byte_unchecked() {
+        b'-' => (true, -1.),
+        b'+' => (true, 1.),
+        _ => (false, 1.),
+    };
+    if has_sign {
+        tokenizer.advance(1);
+    }
+
+    let mut integral_part: f64 = 0.;
+    while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
+        integral_part = integral_part * 10. + digit as f64;
+        tokenizer.advance(1);
+        if tokenizer.is_eof() {
+            break;
+        }
+    }
+
+    let mut is_integer = true;
+
+    let mut fractional_part: f64 = 0.;
+    if tokenizer.has_at_least(1)
+        && tokenizer.next_byte_unchecked() == b'.'
+        && matches!(tokenizer.byte_at(1), b'0'..=b'9')
+    {
+        is_integer = false;
+        tokenizer.advance(1); // Consume '.'
+        let mut factor = 0.1;
+        while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
+            fractional_part += digit as f64 * factor;
+            factor *= 0.1;
+            tokenizer.advance(1);
+            if tokenizer.is_eof() {
+                break;
+            }
+        }
+    }
+
+    let mut value = sign * (integral_part + fractional_part);
+
+    if tokenizer.has_at_least(1) && matches!(tokenizer.next_byte_unchecked(), b'e' | b'E') {
+        if matches!(tokenizer.byte_at(1), b'0'..=b'9')
+            || (tokenizer.has_at_least(2)
+                && matches!(tokenizer.byte_at(1), b'+' | b'-')
+                && matches!(tokenizer.byte_at(2), b'0'..=b'9'))
+        {
+            is_integer = false;
+            tokenizer.advance(1);
+            let (has_sign, sign) = match tokenizer.next_byte_unchecked() {
+                b'-' => (true, -1.),
+                b'+' => (true, 1.),
+                _ => (false, 1.),
+            };
+            if has_sign {
+                tokenizer.advance(1);
+            }
+            let mut exponent: f64 = 0.;
+            while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
+                exponent = exponent * 10. + digit as f64;
+                tokenizer.advance(1);
+                if tokenizer.is_eof() {
+                    break;
+                }
+            }
+            value *= f64::powf(10., sign * exponent);
+        }
+    }
+
+    let int_value = if is_integer {
+        Some(if value >= i32::MAX as f64 {
+            i32::MAX
+        } else if value <= i32::MIN as f64 {
+            i32::MIN
+        } else {
+            value as i32
+        })
+    } else {
+        None
+    };
+
+    if !tokenizer.is_eof() && tokenizer.next_byte_unchecked() == b'%' {
+        tokenizer.advance(1);
+        return Percentage {
+            unit_value: (value / 100.) as f32,
+            int_value: int_value,
+            has_sign: has_sign,
+        };
+    }
+    let value = value as f32;
+    if is_ident_start(tokenizer) {
+        let unit = consume_name(tokenizer);
+        Dimension {
+            value: value,
+            int_value: int_value,
+            has_sign: has_sign,
+            unit: unit,
+        }
+    } else {
+        Number {
+            value: value,
+            int_value: int_value,
+            has_sign: has_sign,
+        }
+    }
+}
+
+#[inline]
+unsafe fn from_utf8_release_unchecked(string_bytes: Vec<u8>) -> String {
+    if cfg!(debug_assertions) {
+        String::from_utf8(string_bytes).unwrap()
+    } else {
+        String::from_utf8_unchecked(string_bytes)
+    }
+}
+
+fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
+    // This is only called after "url(", so the current position is a code point boundary.
+    let start_position = tokenizer.position;
+    let from_start = &tokenizer.input[tokenizer.position..];
+    let mut newlines = 0;
+    let mut last_newline = 0;
+    let mut found_printable_char = false;
+    let mut iter = from_start.bytes().enumerate();
+    loop {
+        let (offset, b) = match iter.next() {
+            Some(item) => item,
+            None => {
+                tokenizer.position = tokenizer.input.len();
+                break;
+            }
+        };
+        match_byte! { b,
+            b' ' | b'\t' => {},
+            b'\n' | b'\x0C' => {
+                newlines += 1;
+                last_newline = offset;
+            }
+            b'\r' => {
+                if from_start.as_bytes().get(offset + 1) != Some(&b'\n') {
+                    newlines += 1;
+                    last_newline = offset;
+                }
+            }
+            b'"' | b'\'' => { return Err(()) },  // Do not advance
+            b')' => {
+                // Don't use advance, because we may be skipping
+                // newlines here, and we want to avoid the assert.
+                tokenizer.position += offset + 1;
+                break
+            }
+            _ => {
+                // Don't use advance, because we may be skipping
+                // newlines here, and we want to avoid the assert.
+                tokenizer.position += offset;
+                found_printable_char = true;
+                break
+            }
+        }
+    }
+
+    if newlines > 0 {
+        tokenizer.current_line_number += newlines;
+        // No need for wrapping_add here, because there's no possible
+        // way to wrap.
+        tokenizer.current_line_start_position = start_position + last_newline + 1;
+    }
+
+    if found_printable_char {
+        // This function only consumed ASCII (whitespace) bytes,
+        // so the current position is a code point boundary.
+        return Ok(consume_unquoted_url_internal(tokenizer));
+    } else {
+        return Ok(UnquotedUrl("".into()));
+    }
+
+    fn consume_unquoted_url_internal<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
+        // This function is only called with start_pos at a code point boundary.
+        let start_pos = tokenizer.position();
+        let mut string_bytes: Vec<u8>;
+        loop {
+            if tokenizer.is_eof() {
+                return UnquotedUrl(tokenizer.slice_from(start_pos).into());
+            }
+            match_byte! { tokenizer.next_byte_unchecked(),
+                b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
+                    let value = tokenizer.slice_from(start_pos);
+                    return consume_url_end(tokenizer, start_pos, value.into())
+                }
+                b')' => {
+                    let value = tokenizer.slice_from(start_pos);
+                    tokenizer.advance(1);
+                    return UnquotedUrl(value.into())
+                }
+                b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F'  // non-printable
+                    | b'"' | b'\'' | b'(' => {
+                    tokenizer.advance(1);
+                    return consume_bad_url(tokenizer, start_pos)
+                },
+                b'\\' | b'\0' => {
+                    // * The tokenizer’s input is UTF-8 since it’s `&str`.
+                    // * start_pos is at a code point boundary
+                    // * so is the current position (which is before '\\' or '\0'
+                    //
+                    // So `string_bytes` is well-formed UTF-8.
+                    string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
+                    break
+                }
+                b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); }
+                b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); }
+                _ => {
+                    // ASCII or other leading byte.
+                    tokenizer.advance(1);
+                }
+            }
+        }
+        while !tokenizer.is_eof() {
+            let b = tokenizer.next_byte_unchecked();
+            match_byte! { b,
+                b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {
+                    // string_bytes is well-formed UTF-8, see other comments.
+                    let string = unsafe { from_utf8_release_unchecked(string_bytes) }.into();
+                    return consume_url_end(tokenizer, start_pos, string)
+                }
+                b')' => {
+                    tokenizer.advance(1);
+                    break;
+                }
+                b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F'  // non-printable
+                    | b'"' | b'\'' | b'(' => {
+                    tokenizer.advance(1);
+                    return consume_bad_url(tokenizer, start_pos);
+                }
+                b'\\' => {
+                    tokenizer.advance(1);
+                    if tokenizer.has_newline_at(0) {
+                        return consume_bad_url(tokenizer, start_pos)
+                    }
+
+                    // This pushes one well-formed code point to string_bytes
+                    consume_escape_and_write(tokenizer, &mut string_bytes)
+                },
+                b'\0' => {
+                    tokenizer.advance(1);
+                    string_bytes.extend("\u{FFFD}".as_bytes());
+                }
+                b'\x80'..=b'\xBF' => {
+                    // We’ll end up copying the whole code point
+                    // before this loop does something else.
+                    tokenizer.consume_continuation_byte();
+                    string_bytes.push(b);
+                }
+                b'\xF0'..=b'\xFF' => {
+                    // We’ll end up copying the whole code point
+                    // before this loop does something else.
+                    tokenizer.consume_4byte_intro();
+                    string_bytes.push(b);
+                }
+                // If this byte is part of a multi-byte code point,
+                // we’ll end up copying the whole code point before this loop does something else.
+                b => {
+                    // ASCII or other leading byte.
+                    tokenizer.advance(1);
+                    string_bytes.push(b)
+                }
+            }
+        }
+        UnquotedUrl(
+            // string_bytes is well-formed UTF-8, see other comments.
+            unsafe { from_utf8_release_unchecked(string_bytes) }.into(),
+        )
+    }
+
+    fn consume_url_end<'a>(
+        tokenizer: &mut Tokenizer<'a>,
+        start_pos: SourcePosition,
+        string: CowRcStr<'a>,
+    ) -> Token<'a> {
+        while !tokenizer.is_eof() {
+            match_byte! { tokenizer.next_byte_unchecked(),
+                b')' => {
+                    tokenizer.advance(1);
+                    break
+                }
+                b' ' | b'\t' => { tokenizer.advance(1); }
+                b'\n' | b'\x0C' | b'\r' => {
+                    tokenizer.consume_newline();
+                }
+                b => {
+                    tokenizer.consume_known_byte(b);
+                    return consume_bad_url(tokenizer, start_pos);
+                }
+            }
+        }
+        UnquotedUrl(string)
+    }
+
+    fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>, start_pos: SourcePosition) -> Token<'a> {
+        // Consume up to the closing )
+        while !tokenizer.is_eof() {
+            match_byte! { tokenizer.next_byte_unchecked(),
+                b')' => {
+                    let contents = tokenizer.slice_from(start_pos).into();
+                    tokenizer.advance(1);
+                    return BadUrl(contents)
+                }
+                b'\\' => {
+                    tokenizer.advance(1);
+                    if matches!(tokenizer.next_byte(), Some(b')') | Some(b'\\')) {
+                        tokenizer.advance(1); // Skip an escaped ')' or '\'
+                    }
+                }
+                b'\n' | b'\x0C' | b'\r' => {
+                    tokenizer.consume_newline();
+                }
+                b => {
+                    tokenizer.consume_known_byte(b);
+                }
+            }
+        }
+        BadUrl(tokenizer.slice_from(start_pos).into())
+    }
+}
+
+// (value, number of digits up to 6)
+fn consume_hex_digits<'a>(tokenizer: &mut Tokenizer<'a>) -> (u32, u32) {
+    let mut value = 0;
+    let mut digits = 0;
+    while digits < 6 && !tokenizer.is_eof() {
+        match byte_to_hex_digit(tokenizer.next_byte_unchecked()) {
+            Some(digit) => {
+                value = value * 16 + digit;
+                digits += 1;
+                tokenizer.advance(1);
+            }
+            None => break,
+        }
+    }
+    (value, digits)
+}
+
+// Same constraints as consume_escape except it writes into `bytes` the result
+// instead of returning it.
+fn consume_escape_and_write(tokenizer: &mut Tokenizer, bytes: &mut Vec<u8>) {
+    bytes.extend(
+        consume_escape(tokenizer)
+            .encode_utf8(&mut [0; 4])
+            .as_bytes(),
+    )
+}
+
+// Assumes that the U+005C REVERSE SOLIDUS (\) has already been consumed
+// and that the next input character has already been verified
+// to not be a newline.
+fn consume_escape(tokenizer: &mut Tokenizer) -> char {
+    if tokenizer.is_eof() {
+        return '\u{FFFD}';
+    } // Escaped EOF
+    match_byte! { tokenizer.next_byte_unchecked(),
+        b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
+            let (c, _) = consume_hex_digits(tokenizer);
+            if !tokenizer.is_eof() {
+                match_byte! { tokenizer.next_byte_unchecked(),
+                    b' ' | b'\t' => {
+                        tokenizer.advance(1)
+                    }
+                    b'\n' | b'\x0C' | b'\r' => {
+                        tokenizer.consume_newline();
+                    }
+                    _ => {}
+                }
+            }
+            static REPLACEMENT_CHAR: char = '\u{FFFD}';
+            if c != 0 {
+                let c = char::from_u32(c);
+                c.unwrap_or(REPLACEMENT_CHAR)
+            } else {
+                REPLACEMENT_CHAR
+            }
+        },
+        b'\0' => {
+            tokenizer.advance(1);
+            '\u{FFFD}'
+        }
+        _ => { tokenizer.consume_char() }
+    }
+}