summaryrefslogtreecommitdiffstats
path: root/third_party/rust/jsparagus-parser/src/lexer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/jsparagus-parser/src/lexer.rs')
-rw-r--r--third_party/rust/jsparagus-parser/src/lexer.rs2325
1 files changed, 2325 insertions, 0 deletions
diff --git a/third_party/rust/jsparagus-parser/src/lexer.rs b/third_party/rust/jsparagus-parser/src/lexer.rs
new file mode 100644
index 0000000000..109558c5c2
--- /dev/null
+++ b/third_party/rust/jsparagus-parser/src/lexer.rs
@@ -0,0 +1,2325 @@
+//! JavaScript lexer.
+
+use crate::numeric_value::{parse_float, parse_int, NumericLiteralBase};
+use crate::parser::Parser;
+use crate::unicode::{is_id_continue, is_id_start};
+use ast::arena;
+use ast::source_atom_set::{CommonSourceAtomSetIndices, SourceAtomSet};
+use ast::source_slice_list::SourceSliceList;
+use ast::SourceLocation;
+use bumpalo::{collections::String, Bump};
+use generated_parser::{ParseError, Result, TerminalId, Token, TokenValue};
+use std::cell::RefCell;
+use std::convert::TryFrom;
+use std::rc::Rc;
+use std::str::Chars;
+
+pub struct Lexer<'alloc> {
+ allocator: &'alloc Bump,
+
+ /// Next token to be returned.
+ token: arena::Box<'alloc, Token>,
+
+ /// Length of the input text, in UTF-8 bytes.
+ source_length: usize,
+
+ /// Iterator over the remaining not-yet-parsed input.
+ chars: Chars<'alloc>,
+
+ atoms: Rc<RefCell<SourceAtomSet<'alloc>>>,
+
+ slices: Rc<RefCell<SourceSliceList<'alloc>>>,
+}
+
+enum NumericResult {
+ Int {
+ base: NumericLiteralBase,
+ },
+ Float,
+ BigInt {
+ #[allow(dead_code)]
+ base: NumericLiteralBase,
+ },
+}
+
+impl<'alloc> Lexer<'alloc> {
+ pub fn new(
+ allocator: &'alloc Bump,
+ chars: Chars<'alloc>,
+ atoms: Rc<RefCell<SourceAtomSet<'alloc>>>,
+ slices: Rc<RefCell<SourceSliceList<'alloc>>>,
+ ) -> Lexer<'alloc> {
+ Self::with_offset(allocator, chars, 0, atoms, slices)
+ }
+
+ /// Create a lexer for a part of a JS script or module. `offset` is the
+ /// total length of all previous parts, in bytes; source locations for
+ /// tokens created by the new lexer start counting from this number.
+ pub fn with_offset(
+ allocator: &'alloc Bump,
+ chars: Chars<'alloc>,
+ offset: usize,
+ atoms: Rc<RefCell<SourceAtomSet<'alloc>>>,
+ slices: Rc<RefCell<SourceSliceList<'alloc>>>,
+ ) -> Lexer<'alloc> {
+ let source_length = offset + chars.as_str().len();
+ let mut token = arena::alloc(allocator, new_token());
+ token.is_on_new_line = true;
+ Lexer {
+ allocator,
+ token,
+ source_length,
+ chars,
+ atoms,
+ slices,
+ }
+ }
+
+ fn is_looking_at(&self, s: &str) -> bool {
+ self.chars.as_str().starts_with(s)
+ }
+
+ pub fn offset(&self) -> usize {
+ self.source_length - self.chars.as_str().len()
+ }
+
+ fn peek(&self) -> Option<char> {
+ self.chars.as_str().chars().next()
+ }
+
+ fn double_peek(&self) -> Option<char> {
+ let mut chars = self.chars.as_str().chars();
+ chars.next();
+ chars.next()
+ }
+
+ fn set_result(
+ &mut self,
+ terminal_id: TerminalId,
+ loc: SourceLocation,
+ value: TokenValue,
+ ) -> Result<'alloc, ()> {
+ self.token.terminal_id = terminal_id;
+ self.token.loc = loc;
+ self.token.value = value;
+ Ok(())
+ }
+
+ #[inline]
+ pub fn next<'parser>(
+ &mut self,
+ parser: &Parser<'parser>,
+ ) -> Result<'alloc, arena::Box<'alloc, Token>> {
+ let mut next_token = arena::alloc_with(self.allocator, || new_token());
+ self.advance_impl(parser)?;
+ std::mem::swap(&mut self.token, &mut next_token);
+ Ok(next_token)
+ }
+
+ fn unexpected_err(&mut self) -> ParseError<'alloc> {
+ if let Some(ch) = self.peek() {
+ ParseError::IllegalCharacter(ch)
+ } else {
+ ParseError::UnexpectedEnd
+ }
+ }
+}
+
+/// Returns an empty token which is meant as a place holder to be mutated later.
+fn new_token() -> Token {
+ Token::basic_token(TerminalId::End, SourceLocation::default())
+}
+
+// ----------------------------------------------------------------------------
+// 11.1 Unicode Format-Control Characters
+
+/// U+200C ZERO WIDTH NON-JOINER, abbreviated in the spec as <ZWNJ>.
+/// Specially permitted in identifiers.
+const ZWNJ: char = '\u{200c}';
+
+/// U+200D ZERO WIDTH JOINER, abbreviated as <ZWJ>.
+/// Specially permitted in identifiers.
+const ZWJ: char = '\u{200d}';
+
+/// U+FEFF ZERO WIDTH NO-BREAK SPACE, abbreviated <ZWNBSP>.
+/// Considered a whitespace character in JS.
+const ZWNBSP: char = '\u{feff}';
+
+// ----------------------------------------------------------------------------
+// 11.2 White Space
+
+/// U+0009 CHARACTER TABULATION, abbreviated <TAB>.
+const TAB: char = '\u{9}';
+
+/// U+000B VERTICAL TAB, abbreviated <VT>.
+const VT: char = '\u{b}';
+
+/// U+000C FORM FEED, abbreviated <FF>.
+const FF: char = '\u{c}';
+
+/// U+0020 SPACE, abbreviated <SP>.
+const SP: char = '\u{20}';
+
+/// U+00A0 NON-BREAKING SPACE, abbreviated <NBSP>.
+const NBSP: char = '\u{a0}';
+
+// ----------------------------------------------------------------------------
+// 11.3 Line Terminators
+
+/// U+000A LINE FEED, abbreviated in the spec as <LF>.
+const LF: char = '\u{a}';
+
+/// U+000D CARRIAGE RETURN, abbreviated in the spec as <CR>.
+const CR: char = '\u{d}';
+
+/// U+2028 LINE SEPARATOR, abbreviated <LS>.
+const LS: char = '\u{2028}';
+
+/// U+2029 PARAGRAPH SEPARATOR, abbreviated <PS>.
+const PS: char = '\u{2029}';
+
+// ----------------------------------------------------------------------------
+// 11.4 Comments
+//
+// Comment::
+// MultiLineComment
+// SingleLineComment
+
+impl<'alloc> Lexer<'alloc> {
+ /// Skip a *MultiLineComment*.
+ ///
+ /// ```text
+ /// MultiLineComment ::
+ /// `/*` MultiLineCommentChars? `*/`
+ ///
+ /// MultiLineCommentChars ::
+ /// MultiLineNotAsteriskChar MultiLineCommentChars?
+ /// `*` PostAsteriskCommentChars?
+ ///
+ /// PostAsteriskCommentChars ::
+ /// MultiLineNotForwardSlashOrAsteriskChar MultiLineCommentChars?
+ /// `*` PostAsteriskCommentChars?
+ ///
+ /// MultiLineNotAsteriskChar ::
+ /// SourceCharacter but not `*`
+ ///
+ /// MultiLineNotForwardSlashOrAsteriskChar ::
+ /// SourceCharacter but not one of `/` or `*`
+ /// ```
+ ///
+ /// (B.1.3 splits MultiLineComment into two nonterminals: MultiLineComment
+ /// and SingleLineDelimitedComment. The point of that is to help specify
+ /// that a SingleLineHTMLCloseComment must occur at the start of a line. We
+ /// use `is_on_new_line` for that.)
+ ///
+ fn skip_multi_line_comment(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()> {
+ while let Some(ch) = self.chars.next() {
+ match ch {
+ '*' if self.peek() == Some('/') => {
+ self.chars.next();
+ *builder = AutoCow::new(&self);
+ return Ok(());
+ }
+ CR | LF | PS | LS => {
+ self.token.is_on_new_line = true;
+ }
+ _ => {}
+ }
+ }
+ Err(ParseError::UnterminatedMultiLineComment.into())
+ }
+
+ /// Skip a *SingleLineComment* and the following *LineTerminatorSequence*,
+ /// if any.
+ ///
+ /// ```text
+ /// SingleLineComment ::
+ /// `//` SingleLineCommentChars?
+ ///
+ /// SingleLineCommentChars ::
+ /// SingleLineCommentChar SingleLineCommentChars?
+ ///
+ /// SingleLineCommentChar ::
+ /// SourceCharacter but not LineTerminator
+ /// ```
+ fn skip_single_line_comment(&mut self, builder: &mut AutoCow<'alloc>) {
+ while let Some(ch) = self.chars.next() {
+ match ch {
+ CR | LF | LS | PS => break,
+ _ => continue,
+ }
+ }
+ *builder = AutoCow::new(&self);
+ self.token.is_on_new_line = true;
+ }
+}
+
+// ----------------------------------------------------------------------------
+// 11.6 Names and Keywords
+
+/// True if `c` is a one-character *IdentifierStart*.
+///
+/// ```text
+/// IdentifierStart ::
+/// UnicodeIDStart
+/// `$`
+/// `_`
+/// `\` UnicodeEscapeSequence
+///
+/// UnicodeIDStart ::
+/// > any Unicode code point with the Unicode property "ID_Start"
+/// ```
+fn is_identifier_start(c: char) -> bool {
+ // Escaped case is handled separately.
+ if c.is_ascii() {
+ c == '$' || c == '_' || c.is_ascii_alphabetic()
+ } else {
+ is_id_start(c)
+ }
+}
+
+/// True if `c` is a one-character *IdentifierPart*.
+///
+/// ```text
+/// IdentifierPart ::
+/// UnicodeIDContinue
+/// `$`
+/// `\` UnicodeEscapeSequence
+/// <ZWNJ>
+/// <ZWJ>
+///
+/// UnicodeIDContinue ::
+/// > any Unicode code point with the Unicode property "ID_Continue"
+/// ```
+fn is_identifier_part(c: char) -> bool {
+ // Escaped case is handled separately.
+ if c.is_ascii() {
+ c == '$' || c == '_' || c.is_ascii_alphanumeric()
+ } else {
+ is_id_continue(c) || c == ZWNJ || c == ZWJ
+ }
+}
+
+impl<'alloc> Lexer<'alloc> {
+ /// Scan the rest of an IdentifierName, having already parsed the initial
+ /// IdentifierStart and stored it in `builder`.
+ ///
+ /// On success, this returns `Ok((has_escapes, str))`, where `has_escapes`
+ /// is true if the identifier contained any UnicodeEscapeSequences, and
+ /// `str` is the un-escaped IdentifierName, including the IdentifierStart,
+ /// on success.
+ ///
+ /// ```text
+ /// IdentifierName ::
+ /// IdentifierStart
+ /// IdentifierName IdentifierPart
+ /// ```
+ fn identifier_name_tail(
+ &mut self,
+ mut builder: AutoCow<'alloc>,
+ ) -> Result<'alloc, (bool, &'alloc str)> {
+ while let Some(ch) = self.peek() {
+ if !is_identifier_part(ch) {
+ if ch == '\\' {
+ self.chars.next();
+ builder.force_allocation_without_current_ascii_char(&self);
+
+ let value = self.unicode_escape_sequence_after_backslash()?;
+ if !is_identifier_part(value) {
+ return Err(ParseError::InvalidEscapeSequence.into());
+ }
+
+ builder.push_different(value);
+ continue;
+ }
+
+ break;
+ }
+ self.chars.next();
+ builder.push_matching(ch);
+ }
+ let has_different = builder.has_different();
+ Ok((has_different, builder.finish(&self)))
+ }
+
+ fn identifier_name(&mut self, mut builder: AutoCow<'alloc>) -> Result<'alloc, &'alloc str> {
+ match self.chars.next() {
+ None => {
+ return Err(ParseError::UnexpectedEnd.into());
+ }
+ Some(c) => {
+ match c {
+ '$' | '_' | 'a'..='z' | 'A'..='Z' => {
+ builder.push_matching(c);
+ }
+
+ '\\' => {
+ builder.force_allocation_without_current_ascii_char(&self);
+
+ let value = self.unicode_escape_sequence_after_backslash()?;
+ if !is_identifier_start(value) {
+ return Err(ParseError::IllegalCharacter(value).into());
+ }
+ builder.push_different(value);
+ }
+
+ other if is_identifier_start(other) => {
+ builder.push_matching(other);
+ }
+
+ other => {
+ return Err(ParseError::IllegalCharacter(other).into());
+ }
+ }
+ self.identifier_name_tail(builder)
+ .map(|(_has_escapes, name)| name)
+ }
+ }
+ }
+
+ /// Finish scanning an *IdentifierName* or keyword, having already scanned
+ /// the *IdentifierStart* and pushed it to `builder`.
+ ///
+ /// `start` is the offset of the *IdentifierStart*.
+ ///
+ /// The lexer doesn't know the syntactic context, so it always identifies
+ /// possible keywords. It's up to the parser to understand that, for
+ /// example, `TerminalId::If` is not a keyword when it's used as a property
+ /// or method name.
+ ///
+ /// If the source string contains no escape and it matches to possible
+ /// keywords (including contextual keywords), the result is corresponding
+ /// `TerminalId`. For example, if the source string is "yield", the result
+ /// is `TerminalId::Yield`.
+ ///
+ /// If the source string contains no escape sequence and also it doesn't
+ /// match to any possible keywords, the result is `TerminalId::Name`.
+ ///
+ /// If the source string contains at least one escape sequence,
+ /// the result is always `TerminalId::NameWithEscape`, regardless of the
+ /// StringValue of it. For example, if the source string is "\u{79}ield",
+ /// the result is `TerminalId::NameWithEscape`, and the StringValue is
+ /// "yield".
+ fn identifier_tail(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()> {
+ let (has_different, text) = self.identifier_name_tail(builder)?;
+
+ // https://tc39.es/ecma262/#sec-keywords-and-reserved-words
+ //
+ // keywords in the grammar match literal sequences of specific
+ // SourceCharacter elements. A code point in a keyword cannot be
+ // expressed by a `\` UnicodeEscapeSequence.
+ let (id, value) = if has_different {
+ // Always return `NameWithEscape`.
+ //
+ // Error check against reserved word should be handled in the
+ // consumer.
+ (TerminalId::NameWithEscape, self.string_to_token_value(text))
+ } else {
+ match &text as &str {
+ "as" => (
+ TerminalId::As,
+ TokenValue::Atom(CommonSourceAtomSetIndices::as_()),
+ ),
+ "async" => {
+ /*
+ (
+ TerminalId::Async,
+ TokenValue::Atom(CommonSourceAtomSetIndices::async_()),
+ ),
+ */
+ return Err(ParseError::NotImplemented(
+ "async cannot be handled in parser due to multiple lookahead",
+ )
+ .into());
+ }
+ "await" => {
+ /*
+ (
+ TerminalId::Await,
+ TokenValue::Atom(CommonSourceAtomSetIndices::await_()),
+ ),
+ */
+ return Err(
+ ParseError::NotImplemented("await cannot be handled in parser").into(),
+ );
+ }
+ "break" => (
+ TerminalId::Break,
+ TokenValue::Atom(CommonSourceAtomSetIndices::break_()),
+ ),
+ "case" => (
+ TerminalId::Case,
+ TokenValue::Atom(CommonSourceAtomSetIndices::case()),
+ ),
+ "catch" => (
+ TerminalId::Catch,
+ TokenValue::Atom(CommonSourceAtomSetIndices::catch()),
+ ),
+ "class" => (
+ TerminalId::Class,
+ TokenValue::Atom(CommonSourceAtomSetIndices::class()),
+ ),
+ "const" => (
+ TerminalId::Const,
+ TokenValue::Atom(CommonSourceAtomSetIndices::const_()),
+ ),
+ "continue" => (
+ TerminalId::Continue,
+ TokenValue::Atom(CommonSourceAtomSetIndices::continue_()),
+ ),
+ "debugger" => (
+ TerminalId::Debugger,
+ TokenValue::Atom(CommonSourceAtomSetIndices::debugger()),
+ ),
+ "default" => (
+ TerminalId::Default,
+ TokenValue::Atom(CommonSourceAtomSetIndices::default()),
+ ),
+ "delete" => (
+ TerminalId::Delete,
+ TokenValue::Atom(CommonSourceAtomSetIndices::delete()),
+ ),
+ "do" => (
+ TerminalId::Do,
+ TokenValue::Atom(CommonSourceAtomSetIndices::do_()),
+ ),
+ "else" => (
+ TerminalId::Else,
+ TokenValue::Atom(CommonSourceAtomSetIndices::else_()),
+ ),
+ "enum" => (
+ TerminalId::Enum,
+ TokenValue::Atom(CommonSourceAtomSetIndices::enum_()),
+ ),
+ "export" => (
+ TerminalId::Export,
+ TokenValue::Atom(CommonSourceAtomSetIndices::export()),
+ ),
+ "extends" => (
+ TerminalId::Extends,
+ TokenValue::Atom(CommonSourceAtomSetIndices::extends()),
+ ),
+ "finally" => (
+ TerminalId::Finally,
+ TokenValue::Atom(CommonSourceAtomSetIndices::finally()),
+ ),
+ "for" => (
+ TerminalId::For,
+ TokenValue::Atom(CommonSourceAtomSetIndices::for_()),
+ ),
+ "from" => (
+ TerminalId::From,
+ TokenValue::Atom(CommonSourceAtomSetIndices::from()),
+ ),
+ "function" => (
+ TerminalId::Function,
+ TokenValue::Atom(CommonSourceAtomSetIndices::function()),
+ ),
+ "get" => (
+ TerminalId::Get,
+ TokenValue::Atom(CommonSourceAtomSetIndices::get()),
+ ),
+ "if" => (
+ TerminalId::If,
+ TokenValue::Atom(CommonSourceAtomSetIndices::if_()),
+ ),
+ "implements" => (
+ TerminalId::Implements,
+ TokenValue::Atom(CommonSourceAtomSetIndices::implements()),
+ ),
+ "import" => (
+ TerminalId::Import,
+ TokenValue::Atom(CommonSourceAtomSetIndices::import()),
+ ),
+ "in" => (
+ TerminalId::In,
+ TokenValue::Atom(CommonSourceAtomSetIndices::in_()),
+ ),
+ "instanceof" => (
+ TerminalId::Instanceof,
+ TokenValue::Atom(CommonSourceAtomSetIndices::instanceof()),
+ ),
+ "interface" => (
+ TerminalId::Interface,
+ TokenValue::Atom(CommonSourceAtomSetIndices::interface()),
+ ),
+ "let" => {
+ /*
+ (
+ TerminalId::Let,
+ TokenValue::Atom(CommonSourceAtomSetIndices::let_()),
+ ),
+ */
+ return Err(ParseError::NotImplemented(
+ "let cannot be handled in parser due to multiple lookahead",
+ )
+ .into());
+ }
+ "new" => (
+ TerminalId::New,
+ TokenValue::Atom(CommonSourceAtomSetIndices::new_()),
+ ),
+ "of" => (
+ TerminalId::Of,
+ TokenValue::Atom(CommonSourceAtomSetIndices::of()),
+ ),
+ "package" => (
+ TerminalId::Package,
+ TokenValue::Atom(CommonSourceAtomSetIndices::package()),
+ ),
+ "private" => (
+ TerminalId::Private,
+ TokenValue::Atom(CommonSourceAtomSetIndices::private()),
+ ),
+ "protected" => (
+ TerminalId::Protected,
+ TokenValue::Atom(CommonSourceAtomSetIndices::protected()),
+ ),
+ "public" => (
+ TerminalId::Public,
+ TokenValue::Atom(CommonSourceAtomSetIndices::public()),
+ ),
+ "return" => (
+ TerminalId::Return,
+ TokenValue::Atom(CommonSourceAtomSetIndices::return_()),
+ ),
+ "set" => (
+ TerminalId::Set,
+ TokenValue::Atom(CommonSourceAtomSetIndices::set()),
+ ),
+ "static" => (
+ TerminalId::Static,
+ TokenValue::Atom(CommonSourceAtomSetIndices::static_()),
+ ),
+ "super" => (
+ TerminalId::Super,
+ TokenValue::Atom(CommonSourceAtomSetIndices::super_()),
+ ),
+ "switch" => (
+ TerminalId::Switch,
+ TokenValue::Atom(CommonSourceAtomSetIndices::switch()),
+ ),
+ "target" => (
+ TerminalId::Target,
+ TokenValue::Atom(CommonSourceAtomSetIndices::target()),
+ ),
+ "this" => (
+ TerminalId::This,
+ TokenValue::Atom(CommonSourceAtomSetIndices::this()),
+ ),
+ "throw" => (
+ TerminalId::Throw,
+ TokenValue::Atom(CommonSourceAtomSetIndices::throw()),
+ ),
+ "try" => (
+ TerminalId::Try,
+ TokenValue::Atom(CommonSourceAtomSetIndices::try_()),
+ ),
+ "typeof" => (
+ TerminalId::Typeof,
+ TokenValue::Atom(CommonSourceAtomSetIndices::typeof_()),
+ ),
+ "var" => (
+ TerminalId::Var,
+ TokenValue::Atom(CommonSourceAtomSetIndices::var()),
+ ),
+ "void" => (
+ TerminalId::Void,
+ TokenValue::Atom(CommonSourceAtomSetIndices::void()),
+ ),
+ "while" => (
+ TerminalId::While,
+ TokenValue::Atom(CommonSourceAtomSetIndices::while_()),
+ ),
+ "with" => (
+ TerminalId::With,
+ TokenValue::Atom(CommonSourceAtomSetIndices::with()),
+ ),
+ "yield" => {
+ /*
+ (
+ TerminalId::Yield,
+ TokenValue::Atom(CommonSourceAtomSetIndices::yield_()),
+ ),
+ */
+ return Err(
+ ParseError::NotImplemented("yield cannot be handled in parser").into(),
+ );
+ }
+ "null" => (
+ TerminalId::NullLiteral,
+ TokenValue::Atom(CommonSourceAtomSetIndices::null()),
+ ),
+ "true" => (
+ TerminalId::BooleanLiteral,
+ TokenValue::Atom(CommonSourceAtomSetIndices::true_()),
+ ),
+ "false" => (
+ TerminalId::BooleanLiteral,
+ TokenValue::Atom(CommonSourceAtomSetIndices::false_()),
+ ),
+ _ => (TerminalId::Name, self.string_to_token_value(text)),
+ }
+ };
+
+ self.set_result(id, SourceLocation::new(start, self.offset()), value)
+ }
+
+ /// ```text
+ /// PrivateIdentifier::
+ /// `#` IdentifierName
+ /// ```
+ fn private_identifier(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()> {
+ let name = self.identifier_name(builder)?;
+ let value = self.string_to_token_value(name);
+ self.set_result(
+ TerminalId::PrivateIdentifier,
+ SourceLocation::new(start, self.offset()),
+ value,
+ )
+ }
+
+ /// ```text
+ /// UnicodeEscapeSequence::
+ /// `u` Hex4Digits
+ /// `u{` CodePoint `}`
+ /// ```
+ fn unicode_escape_sequence_after_backslash(&mut self) -> Result<'alloc, char> {
+ match self.chars.next() {
+ Some('u') => {}
+ _ => {
+ return Err(ParseError::InvalidEscapeSequence.into());
+ }
+ }
+ self.unicode_escape_sequence_after_backslash_and_u()
+ }
+
+ fn unicode_escape_sequence_after_backslash_and_u(&mut self) -> Result<'alloc, char> {
+ let value = match self.peek() {
+ Some('{') => {
+ self.chars.next();
+
+ let value = self.code_point()?;
+ match self.chars.next() {
+ Some('}') => {}
+ _ => {
+ return Err(ParseError::InvalidEscapeSequence.into());
+ }
+ }
+ value
+ }
+ _ => self.hex_4_digits()?,
+ };
+
+ Ok(value)
+ }
+}
+
+impl<'alloc> Lexer<'alloc> {
+ // ------------------------------------------------------------------------
+ // 11.8.3 Numeric Literals
+
+ /// Advance over decimal digits in the input.
+ ///
+ /// ```text
+ /// NumericLiteralSeparator::
+ /// `_`
+ ///
+ /// DecimalDigits ::
+ /// DecimalDigit
+ /// DecimalDigits NumericLiteralSeparator? DecimalDigit
+ ///
+ /// DecimalDigit :: one of
+ /// `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
+ /// ```
+ fn decimal_digits(&mut self) -> Result<'alloc, ()> {
+ if let Some('0'..='9') = self.peek() {
+ self.chars.next();
+ } else {
+ return Err(self.unexpected_err().into());
+ }
+
+ self.decimal_digits_after_first_digit()?;
+ Ok(())
+ }
+
+ fn optional_decimal_digits(&mut self) -> Result<'alloc, ()> {
+ if let Some('0'..='9') = self.peek() {
+ self.chars.next();
+ } else {
+ return Ok(());
+ }
+
+ self.decimal_digits_after_first_digit()?;
+ Ok(())
+ }
+
+ fn decimal_digits_after_first_digit(&mut self) -> Result<'alloc, ()> {
+ while let Some(next) = self.peek() {
+ match next {
+ '_' => {
+ self.chars.next();
+
+ if let Some('0'..='9') = self.peek() {
+ self.chars.next();
+ } else {
+ return Err(self.unexpected_err().into());
+ }
+ }
+ '0'..='9' => {
+ self.chars.next();
+ }
+ _ => break,
+ }
+ }
+ Ok(())
+ }
+
+ /// Skip an ExponentPart, if present.
+ ///
+ /// ```text
+ /// ExponentPart ::
+ /// ExponentIndicator SignedInteger
+ ///
+ /// ExponentIndicator :: one of
+ /// `e` `E`
+ ///
+ /// SignedInteger ::
+ /// DecimalDigits
+ /// `+` DecimalDigits
+ /// `-` DecimalDigits
+ /// ```
+ fn optional_exponent(&mut self) -> Result<'alloc, bool> {
+ if let Some('e') | Some('E') = self.peek() {
+ self.chars.next();
+ self.decimal_exponent()?;
+ return Ok(true);
+ }
+
+ Ok(false)
+ }
+
+ fn decimal_exponent(&mut self) -> Result<'alloc, ()> {
+ if let Some('+') | Some('-') = self.peek() {
+ self.chars.next();
+ }
+
+ self.decimal_digits()?;
+
+ Ok(())
+ }
+
+ /// ```text
+ /// HexDigit :: one of
+ /// `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` `a` `b` `c` `d` `e` `f` `A` `B` `C` `D` `E` `F`
+ /// ```
+ fn hex_digit(&mut self) -> Result<'alloc, u32> {
+ match self.chars.next() {
+ None => Err(ParseError::InvalidEscapeSequence.into()),
+ Some(c @ '0'..='9') => Ok(c as u32 - '0' as u32),
+ Some(c @ 'a'..='f') => Ok(10 + (c as u32 - 'a' as u32)),
+ Some(c @ 'A'..='F') => Ok(10 + (c as u32 - 'A' as u32)),
+ Some(other) => Err(ParseError::IllegalCharacter(other).into()),
+ }
+ }
+
+ fn code_point_to_char(value: u32) -> Result<'alloc, char> {
+ if 0xd800 <= value && value <= 0xdfff {
+ Err(ParseError::NotImplemented("unicode escape sequences (surrogates)").into())
+ } else {
+ char::try_from(value).map_err(|_| ParseError::InvalidEscapeSequence.into())
+ }
+ }
+
+ /// ```text
+ /// Hex4Digits ::
+ /// HexDigit HexDigit HexDigit HexDigit
+ /// ```
+ fn hex_4_digits(&mut self) -> Result<'alloc, char> {
+ let mut value = 0;
+ for _ in 0..4 {
+ value = (value << 4) | self.hex_digit()?;
+ }
+ Self::code_point_to_char(value)
+ }
+
+ /// ```text
+ /// CodePoint ::
+ /// HexDigits but only if MV of HexDigits ≤ 0x10FFFF
+ ///
+ /// HexDigits ::
+ /// HexDigit
+ /// HexDigits HexDigit
+ /// ```
+ fn code_point(&mut self) -> Result<'alloc, char> {
+ let mut value = self.hex_digit()?;
+
+ loop {
+ let next = match self.peek() {
+ None => {
+ return Err(ParseError::InvalidEscapeSequence.into());
+ }
+ Some(c @ '0'..='9') => c as u32 - '0' as u32,
+ Some(c @ 'a'..='f') => 10 + (c as u32 - 'a' as u32),
+ Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32),
+ Some(_) => break,
+ };
+ self.chars.next();
+ value = (value << 4) | next;
+ if value > 0x10FFFF {
+ return Err(ParseError::InvalidEscapeSequence.into());
+ }
+ }
+
+ Self::code_point_to_char(value)
+ }
+
+ /// Scan a NumericLiteral (defined in 11.8.3, extended by B.1.1) after
+ /// having already consumed the first character, which was `0`.
+ ///
+ /// ```text
+ /// NumericLiteral ::
+ /// DecimalLiteral
+ /// DecimalBigIntegerLiteral
+ /// NonDecimalIntegerLiteral
+ /// NonDecimalIntegerLiteral BigIntLiteralSuffix
+ ///
+ /// DecimalBigIntegerLiteral ::
+ /// `0` BigIntLiteralSuffix
+ /// NonZeroDigit DecimalDigits? BigIntLiteralSuffix
+ ///
+ /// NonDecimalIntegerLiteral ::
+ /// BinaryIntegerLiteral
+ /// OctalIntegerLiteral
+ /// HexIntegerLiteral
+ ///
+ /// BigIntLiteralSuffix ::
+ /// `n`
+ /// ```
+ fn numeric_literal_starting_with_zero(&mut self) -> Result<'alloc, NumericResult> {
+ let mut base = NumericLiteralBase::Decimal;
+ match self.peek() {
+ // BinaryIntegerLiteral ::
+ // `0b` BinaryDigits
+ // `0B` BinaryDigits
+ //
+ // BinaryDigits ::
+ // BinaryDigit
+ // BinaryDigits NumericLiteralSeparator? BinaryDigit
+ //
+ // BinaryDigit :: one of
+ // `0` `1`
+ Some('b') | Some('B') => {
+ self.chars.next();
+
+ base = NumericLiteralBase::Binary;
+
+ if let Some('0'..='1') = self.peek() {
+ self.chars.next();
+ } else {
+ return Err(self.unexpected_err().into());
+ }
+
+ while let Some(next) = self.peek() {
+ match next {
+ '_' => {
+ self.chars.next();
+
+ if let Some('0'..='1') = self.peek() {
+ self.chars.next();
+ } else {
+ return Err(self.unexpected_err().into());
+ }
+ }
+ '0'..='1' => {
+ self.chars.next();
+ }
+ _ => break,
+ }
+ }
+
+ if let Some('n') = self.peek() {
+ self.chars.next();
+ self.check_after_numeric_literal()?;
+ return Ok(NumericResult::BigInt { base });
+ }
+ }
+
+ // OctalIntegerLiteral ::
+ // `0o` OctalDigits
+ // `0O` OctalDigits
+ //
+ // OctalDigits ::
+ // OctalDigit
+ // OctalDigits NumericLiteralSeparator? OctalDigit
+ //
+ // OctalDigit :: one of
+ // `0` `1` `2` `3` `4` `5` `6` `7`
+ //
+ Some('o') | Some('O') => {
+ self.chars.next();
+
+ base = NumericLiteralBase::Octal;
+
+ if let Some('0'..='7') = self.peek() {
+ self.chars.next();
+ } else {
+ return Err(self.unexpected_err().into());
+ }
+
+ while let Some(next) = self.peek() {
+ match next {
+ '_' => {
+ self.chars.next();
+
+ if let Some('0'..='7') = self.peek() {
+ self.chars.next();
+ } else {
+ return Err(self.unexpected_err().into());
+ }
+ }
+ '0'..='7' => {
+ self.chars.next();
+ }
+ _ => break,
+ }
+ }
+
+ if let Some('n') = self.peek() {
+ self.chars.next();
+ self.check_after_numeric_literal()?;
+ return Ok(NumericResult::BigInt { base });
+ }
+ }
+
+ // HexIntegerLiteral ::
+ // `0x` HexDigits
+ // `0X` HexDigits
+ //
+ // HexDigits ::
+ // HexDigit
+ // HexDigits NumericLiteralSeparator? HexDigit
+ //
+ // HexDigit :: one of
+ // `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` `a` `b` `c` `d` `e` `f` `A` `B` `C` `D` `E` `F`
+ Some('x') | Some('X') => {
+ self.chars.next();
+
+ base = NumericLiteralBase::Hex;
+
+ if let Some('0'..='9') | Some('a'..='f') | Some('A'..='F') = self.peek() {
+ self.chars.next();
+ } else {
+ return Err(self.unexpected_err().into());
+ }
+
+ while let Some(next) = self.peek() {
+ match next {
+ '_' => {
+ self.chars.next();
+
+ if let Some('0'..='9') | Some('a'..='f') | Some('A'..='F') = self.peek()
+ {
+ self.chars.next();
+ } else {
+ return Err(self.unexpected_err().into());
+ }
+ }
+ '0'..='9' | 'a'..='f' | 'A'..='F' => {
+ self.chars.next();
+ }
+ _ => break,
+ }
+ }
+
+ if let Some('n') = self.peek() {
+ self.chars.next();
+ self.check_after_numeric_literal()?;
+ return Ok(NumericResult::BigInt { base });
+ }
+ }
+
+ Some('.') => {
+ self.chars.next();
+ return self.decimal_literal_after_decimal_point_after_digits();
+ }
+
+ Some('e') | Some('E') => {
+ self.chars.next();
+ self.decimal_exponent()?;
+ return Ok(NumericResult::Float);
+ }
+
+ Some('n') => {
+ self.chars.next();
+ self.check_after_numeric_literal()?;
+ return Ok(NumericResult::BigInt { base });
+ }
+
+ Some('0'..='9') => {
+ // This is almost always the token `0` in practice.
+ //
+ // In nonstrict code, as a legacy feature, other numbers
+ // starting with `0` are allowed. If /0[0-7]+/ matches, it's a
+ // LegacyOctalIntegerLiteral; but if we see an `8` or `9` in
+ // the number, it's decimal. Decimal numbers can have a decimal
+ // point and/or ExponentPart; octals can't.
+ //
+ // Neither is allowed with a BigIntLiteralSuffix `n`.
+ //
+ // LegacyOctalIntegerLiteral ::
+ // `0` OctalDigit
+ // LegacyOctalIntegerLiteral OctalDigit
+ //
+ // NonOctalDecimalIntegerLiteral ::
+ // `0` NonOctalDigit
+ // LegacyOctalLikeDecimalIntegerLiteral NonOctalDigit
+ // NonOctalDecimalIntegerLiteral DecimalDigit
+ //
+ // LegacyOctalLikeDecimalIntegerLiteral ::
+ // `0` OctalDigit
+ // LegacyOctalLikeDecimalIntegerLiteral OctalDigit
+ //
+ // NonOctalDigit :: one of
+ // `8` `9`
+ //
+
+ // TODO: implement `strict_mode` check
+ // let strict_mode = true;
+ // if !strict_mode {
+ // // TODO: Distinguish between Octal and NonOctalDecimal.
+ // // TODO: Support NonOctalDecimal followed by a decimal
+ // // point and/or ExponentPart.
+ // self.decimal_digits()?;
+ // }
+ return Err(ParseError::NotImplemented("LegacyOctalIntegerLiteral").into());
+ }
+
+ _ => {}
+ }
+
+ self.check_after_numeric_literal()?;
+ Ok(NumericResult::Int { base })
+ }
+
+ /// Scan a NumericLiteral (defined in 11.8.3, extended by B.1.1) after
+ /// having already consumed the first character, which is a decimal digit.
+ fn decimal_literal_after_first_digit(&mut self) -> Result<'alloc, NumericResult> {
+ // DecimalLiteral ::
+ // DecimalIntegerLiteral `.` DecimalDigits? ExponentPart?
+ // `.` DecimalDigits ExponentPart?
+ // DecimalIntegerLiteral ExponentPart?
+ //
+ // DecimalIntegerLiteral ::
+ // `0` #see `numeric_literal_starting_with_zero`
+ // NonZeroDigit
+ // NonZeroDigit NumericLiteralSeparator? DecimalDigits
+ // NonOctalDecimalIntegerLiteral #see `numeric_literal_
+ // # starting_with_zero`
+ //
+ // NonZeroDigit :: one of
+ // `1` `2` `3` `4` `5` `6` `7` `8` `9`
+
+ self.decimal_digits_after_first_digit()?;
+ match self.peek() {
+ Some('.') => {
+ self.chars.next();
+ return self.decimal_literal_after_decimal_point_after_digits();
+ }
+ Some('n') => {
+ self.chars.next();
+ self.check_after_numeric_literal()?;
+ return Ok(NumericResult::BigInt {
+ base: NumericLiteralBase::Decimal,
+ });
+ }
+ _ => {}
+ }
+
+ let has_exponent = self.optional_exponent()?;
+ self.check_after_numeric_literal()?;
+
+ let result = if has_exponent {
+ NumericResult::Float
+ } else {
+ NumericResult::Int {
+ base: NumericLiteralBase::Decimal,
+ }
+ };
+
+ Ok(result)
+ }
+
+ fn decimal_literal_after_decimal_point(&mut self) -> Result<'alloc, NumericResult> {
+ // The parts after `.` in
+ //
+ // `.` DecimalDigits ExponentPart?
+ self.decimal_digits()?;
+ self.optional_exponent()?;
+ self.check_after_numeric_literal()?;
+
+ Ok(NumericResult::Float)
+ }
+
+ fn decimal_literal_after_decimal_point_after_digits(
+ &mut self,
+ ) -> Result<'alloc, NumericResult> {
+ // The parts after `.` in
+ //
+ // DecimalLiteral ::
+ // DecimalIntegerLiteral `.` DecimalDigits? ExponentPart?
+ self.optional_decimal_digits()?;
+ self.optional_exponent()?;
+ self.check_after_numeric_literal()?;
+
+ Ok(NumericResult::Float)
+ }
+
+ fn check_after_numeric_literal(&self) -> Result<'alloc, ()> {
+ // The SourceCharacter immediately following a
+ // NumericLiteral must not be an IdentifierStart or
+ // DecimalDigit. (11.8.3)
+ if let Some(ch) = self.peek() {
+ if is_identifier_start(ch) || ch.is_digit(10) {
+ return Err(ParseError::IllegalCharacter(ch).into());
+ }
+ }
+
+ Ok(())
+ }
+
+ // ------------------------------------------------------------------------
+ // 11.8.4 String Literals (as extended by B.1.2)
+
+ /// Scan an LineContinuation or EscapeSequence in a string literal, having
+ /// already consumed the initial backslash character.
+ ///
+ /// ```text
+ /// LineContinuation ::
+ /// `\` LineTerminatorSequence
+ ///
+ /// EscapeSequence ::
+ /// CharacterEscapeSequence
+ /// (in strict mode code) `0` [lookahead ∉ DecimalDigit]
+ /// (in non-strict code) LegacyOctalEscapeSequence
+ /// HexEscapeSequence
+ /// UnicodeEscapeSequence
+ ///
+ /// CharacterEscapeSequence ::
+ /// SingleEscapeCharacter
+ /// NonEscapeCharacter
+ ///
+ /// SingleEscapeCharacter :: one of
+ /// `'` `"` `\` `b` `f` `n` `r` `t` `v`
+ ///
+ /// LegacyOctalEscapeSequence ::
+ /// OctalDigit [lookahead ∉ OctalDigit]
+ /// ZeroToThree OctalDigit [lookahead ∉ OctalDigit]
+ /// FourToSeven OctalDigit
+ /// ZeroToThree OctalDigit OctalDigit
+ ///
+ /// ZeroToThree :: one of
+ /// `0` `1` `2` `3`
+ ///
+ /// FourToSeven :: one of
+ /// `4` `5` `6` `7`
+ /// ```
+ fn escape_sequence(&mut self, text: &mut String<'alloc>) -> Result<'alloc, ()> {
+ match self.chars.next() {
+ None => {
+ return Err(ParseError::UnterminatedString.into());
+ }
+ Some(c) => match c {
+ LF | LS | PS => {
+ // LineContinuation. Ignore it.
+ //
+ // Don't set is_on_new_line because this LineContinuation
+ // has no bearing on whether the current string literal was
+ // the first token on the line where it started.
+ }
+
+ CR => {
+ // LineContinuation. Check for the sequence \r\n; otherwise
+ // ignore it.
+ if self.peek() == Some(LF) {
+ self.chars.next();
+ }
+ }
+
+ '\'' | '"' | '\\' => {
+ text.push(c);
+ }
+
+ 'b' => {
+ text.push('\u{8}');
+ }
+
+ 'f' => {
+ text.push(FF);
+ }
+
+ 'n' => {
+ text.push(LF);
+ }
+
+ 'r' => {
+ text.push(CR);
+ }
+
+ 't' => {
+ text.push(TAB);
+ }
+
+ 'v' => {
+ text.push(VT);
+ }
+
+ 'x' => {
+ // HexEscapeSequence ::
+ // `x` HexDigit HexDigit
+ let mut value = self.hex_digit()?;
+ value = (value << 4) | self.hex_digit()?;
+ match char::try_from(value) {
+ Err(_) => {
+ return Err(ParseError::InvalidEscapeSequence.into());
+ }
+ Ok(c) => {
+ text.push(c);
+ }
+ }
+ }
+
+ 'u' => {
+ let c = self.unicode_escape_sequence_after_backslash_and_u()?;
+ text.push(c);
+ }
+
+ '0' => {
+ // In strict mode code and in template literals, the
+ // relevant production is
+ //
+ // EscapeSequence ::
+ // `0` [lookahead <! DecimalDigit]
+ //
+ // In non-strict StringLiterals, `\0` begins a
+ // LegacyOctalEscapeSequence which may contain more digits.
+ match self.peek() {
+ Some('0'..='7') => {
+ return Err(ParseError::NotImplemented(
+ "legacy octal escape sequence in string",
+ )
+ .into());
+ }
+ Some('8'..='9') => {
+ return Err(ParseError::NotImplemented(
+ "digit immediately following \\0 escape sequence",
+ )
+ .into());
+ }
+ _ => {}
+ }
+ text.push('\0');
+ }
+
+ '1'..='7' => {
+ return Err(ParseError::NotImplemented(
+ "legacy octal escape sequence in string",
+ )
+ .into());
+ }
+
+ other => {
+ // "\8" and "\9" are invalid per spec, but SpiderMonkey and
+ // V8 accept them, and JSC accepts them in non-strict mode.
+ // "\8" is "8" and "\9" is "9".
+ text.push(other);
+ }
+ },
+ }
+ Ok(())
+ }
+
+ /// Scan a string literal, having already consumed the starting quote
+ /// character `delimiter`.
+ ///
+ /// ```text
+ /// StringLiteral ::
+ /// `"` DoubleStringCharacters? `"`
+ /// `'` SingleStringCharacters? `'`
+ ///
+ /// DoubleStringCharacters ::
+ /// DoubleStringCharacter DoubleStringCharacters?
+ ///
+ /// SingleStringCharacters ::
+ /// SingleStringCharacter SingleStringCharacters?
+ ///
+ /// DoubleStringCharacter ::
+ /// SourceCharacter but not one of `"` or `\` or LineTerminator
+ /// <LS>
+ /// <PS>
+ /// `\` EscapeSequence
+ /// LineContinuation
+ ///
+ /// SingleStringCharacter ::
+ /// SourceCharacter but not one of `'` or `\` or LineTerminator
+ /// <LS>
+ /// <PS>
+ /// `\` EscapeSequence
+ /// LineContinuation
+ /// ```
+ fn string_literal(&mut self, delimiter: char) -> Result<'alloc, ()> {
+ let offset = self.offset() - 1;
+ let mut builder = AutoCow::new(&self);
+ loop {
+ match self.chars.next() {
+ None | Some('\r') | Some('\n') => {
+ return Err(ParseError::UnterminatedString.into());
+ }
+
+ Some(c @ '"') | Some(c @ '\'') => {
+ if c == delimiter {
+ let value = self.string_to_token_value(builder.finish_without_push(&self));
+ return self.set_result(
+ TerminalId::StringLiteral,
+ SourceLocation::new(offset, self.offset()),
+ value,
+ );
+ } else {
+ builder.push_matching(c);
+ }
+ }
+
+ Some('\\') => {
+ let text = builder.get_mut_string_without_current_ascii_char(&self);
+ self.escape_sequence(text)?;
+ }
+
+ Some(other) => {
+ // NonEscapeCharacter ::
+ // SourceCharacter but not one of EscapeCharacter or LineTerminator
+ //
+ // EscapeCharacter ::
+ // SingleEscapeCharacter
+ // DecimalDigit
+ // `x`
+ // `u`
+ builder.push_matching(other);
+ }
+ }
+ }
+ }
+
+ // ------------------------------------------------------------------------
+ // 11.8.5 Regular Expression Literals
+
+ fn regular_expression_backslash_sequence(&mut self) -> Result<'alloc, ()> {
+ match self.chars.next() {
+ None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => {
+ Err(ParseError::UnterminatedRegExp.into())
+ }
+ Some(_) => Ok(()),
+ }
+ }
+
+ // See 12.2.8 and 11.8.5 sections.
+ fn regular_expression_literal(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()> {
+ let offset = self.offset();
+
+ loop {
+ match self.chars.next() {
+ None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => {
+ return Err(ParseError::UnterminatedRegExp.into());
+ }
+ Some('/') => {
+ break;
+ }
+ Some('[') => {
+ // RegularExpressionClass.
+ loop {
+ match self.chars.next() {
+ None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => {
+ return Err(ParseError::UnterminatedRegExp.into());
+ }
+ Some(']') => {
+ break;
+ }
+ Some('\\') => {
+ self.regular_expression_backslash_sequence()?;
+ }
+ Some(_) => {}
+ }
+ }
+ }
+ Some('\\') => {
+ self.regular_expression_backslash_sequence()?;
+ }
+ Some(_) => {}
+ }
+ }
+ let mut flag_text = AutoCow::new(&self);
+ while let Some(ch) = self.peek() {
+ match ch {
+ '$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' => {
+ self.chars.next();
+ flag_text.push_matching(ch);
+ }
+ _ => break,
+ }
+ }
+
+ // 12.2.8.2.1 Assert literal is a RegularExpressionLiteral.
+ let literal = builder.finish(&self);
+
+ // 12.2.8.2.2 Check that only gimsuy flags are mentioned at most once.
+ let gimsuy_mask: u32 = ['g', 'i', 'm', 's', 'u', 'y']
+ .iter()
+ .map(|x| 1 << ((*x as u8) - ('a' as u8)))
+ .sum();
+ let mut flag_text_set: u32 = 0;
+ for ch in flag_text.finish(&self).chars() {
+ if !ch.is_ascii_lowercase() {
+ return Err(ParseError::NotImplemented(
+ "Unexpected flag in regular expression literal",
+ )
+ .into());
+ }
+ let ch_mask = 1 << ((ch as u8) - ('a' as u8));
+ if ch_mask & gimsuy_mask == 0 {
+ return Err(ParseError::NotImplemented(
+ "Unexpected flag in regular expression literal",
+ )
+ .into());
+ }
+ if flag_text_set & ch_mask != 0 {
+ return Err(ParseError::NotImplemented(
+ "Flag is mentioned twice in regular expression literal",
+ )
+ .into());
+ }
+ flag_text_set |= ch_mask;
+ }
+
+ // TODO: 12.2.8.2.4 and 12.2.8.2.5 Check that the body matches the
+ // grammar defined in 21.2.1.
+
+ let value = self.slice_to_token_value(literal);
+ self.set_result(
+ TerminalId::RegularExpressionLiteral,
+ SourceLocation::new(offset, self.offset()),
+ value,
+ )
+ }
+
+ // ------------------------------------------------------------------------
+ // 11.8.6 Template Literal Lexical Components
+
+ /// Parse a template literal component token, having already consumed the
+ /// starting `` ` `` or `}` character. On success, the `id` of the returned
+ /// `Token` is `subst` (if the token ends with `${`) or `tail` (if the
+ /// token ends with `` ` ``).
+ ///
+ /// ```text
+ /// NoSubstitutionTemplate ::
+ /// ``` TemplateCharacters? ```
+ ///
+ /// TemplateHead ::
+ /// ``` TemplateCharacters? `${`
+ ///
+ /// TemplateMiddle ::
+ /// `}` TemplateCharacters? `${`
+ ///
+ /// TemplateTail ::
+ /// `}` TemplateCharacters? ```
+ ///
+ /// TemplateCharacters ::
+ /// TemplateCharacter TemplateCharacters?
+ /// ```
+ fn template_part(
+ &mut self,
+ start: usize,
+ subst: TerminalId,
+ tail: TerminalId,
+ ) -> Result<'alloc, ()> {
+ let mut builder = AutoCow::new(&self);
+ while let Some(ch) = self.chars.next() {
+ // TemplateCharacter ::
+ // `$` [lookahead != `{` ]
+ // `\` EscapeSequence
+ // `\` NotEscapeSequence
+ // LineContinuation
+ // LineTerminatorSequence
+ // SourceCharacter but not one of ``` or `\` or `$` or LineTerminator
+ //
+ // NotEscapeSequence ::
+ // `0` DecimalDigit
+ // DecimalDigit but not `0`
+ // `x` [lookahead <! HexDigit]
+ // `x` HexDigit [lookahead <! HexDigit]
+ // `u` [lookahead <! HexDigit] [lookahead != `{`]
+ // `u` HexDigit [lookahead <! HexDigit]
+ // `u` HexDigit HexDigit [lookahead <! HexDigit]
+ // `u` HexDigit HexDigit HexDigit [lookahead <! HexDigit]
+ // `u` `{` [lookahead <! HexDigit]
+ // `u` `{` NotCodePoint [lookahead <! HexDigit]
+ // `u` `{` CodePoint [lookahead <! HexDigit] [lookahead != `}`]
+ //
+ // NotCodePoint ::
+ // HexDigits [> but only if MV of |HexDigits| > 0x10FFFF ]
+ //
+ // CodePoint ::
+ // HexDigits [> but only if MV of |HexDigits| ≤ 0x10FFFF ]
+ if ch == '$' && self.peek() == Some('{') {
+ self.chars.next();
+ let value = self.string_to_token_value(builder.finish_without_push(&self));
+ return self.set_result(subst, SourceLocation::new(start, self.offset()), value);
+ }
+ if ch == '`' {
+ let value = self.string_to_token_value(builder.finish_without_push(&self));
+ return self.set_result(tail, SourceLocation::new(start, self.offset()), value);
+ }
+ // TODO: Support escape sequences.
+ if ch == '\\' {
+ let text = builder.get_mut_string_without_current_ascii_char(&self);
+ self.escape_sequence(text)?;
+ } else {
+ builder.push_matching(ch);
+ }
+ }
+ Err(ParseError::UnterminatedString.into())
+ }
+
+ fn advance_impl<'parser>(&mut self, parser: &Parser<'parser>) -> Result<'alloc, ()> {
+ let mut builder = AutoCow::new(&self);
+ let mut start = self.offset();
+ while let Some(c) = self.chars.next() {
+ match c {
+ // 11.2 White Space
+ //
+ // WhiteSpace ::
+ // <TAB>
+ // <VT>
+ // <FF>
+ // <SP>
+ // <NBSP>
+ // <ZWNBSP>
+ // <USP>
+ TAB |
+ VT |
+ FF |
+ SP |
+ NBSP |
+ ZWNBSP |
+ '\u{1680}' | // Ogham space mark (in <USP>)
+ '\u{2000}' ..= '\u{200a}' | // typesetting spaces (in <USP>)
+ '\u{202f}' | // Narrow no-break space (in <USP>)
+ '\u{205f}' | // Medium mathematical space (in <USP>)
+ '\u{3000}' // Ideographic space (in <USP>)
+ => {
+ // TODO - The spec uses <USP> to stand for any character
+ // with category "Space_Separator" (Zs). New Unicode
+ // standards may add characters to this set. This should therefore be
+ // implemented using the Unicode database somehow.
+ builder = AutoCow::new(&self);
+ start = self.offset();
+ continue;
+ }
+
+ // 11.3 Line Terminators
+ //
+ // LineTerminator ::
+ // <LF>
+ // <CR>
+ // <LS>
+ // <PS>
+ LF | CR | LS | PS => {
+ self.token.is_on_new_line = true;
+ builder = AutoCow::new(&self);
+ start = self.offset();
+ continue;
+ }
+
+ '0' => {
+ let result = self.numeric_literal_starting_with_zero()?;
+ return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?);
+ }
+
+ '1'..='9' => {
+ let result = self.decimal_literal_after_first_digit()?;
+ return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?);
+ }
+
+ '"' | '\'' => {
+ return self.string_literal(c);
+ }
+
+ '`' => {
+ return self.template_part(start, TerminalId::TemplateHead, TerminalId::NoSubstitutionTemplate);
+ }
+
+ '!' => match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::StrictNotEqual,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::LaxNotEqual,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ }
+ }
+ _ => return self.set_result(
+ TerminalId::LogicalNot,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '%' => match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::RemainderAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::Remainder,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '&' => match self.peek() {
+ Some('&') => {
+ self.chars.next();
+ match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::LogicalAndAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::LogicalAnd,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ )
+ }
+ }
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::BitwiseAndAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::BitwiseAnd,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '*' => match self.peek() {
+ Some('*') => {
+ self.chars.next();
+ match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::ExponentiateAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::Exponentiate,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ }
+ }
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::MultiplyAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::Star,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '+' => match self.peek() {
+ Some('+') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::Increment,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::AddAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::Plus,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '-' => match self.peek() {
+ Some('-') => {
+ self.chars.next();
+ match self.peek() {
+ Some('>') if self.token.is_on_new_line => {
+ // B.1.3 SingleLineHTMLCloseComment
+ // TODO: Limit this to Script (not Module).
+ self.skip_single_line_comment(&mut builder);
+ continue;
+ }
+ _ => return self.set_result(
+ TerminalId::Decrement,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ }
+ }
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::SubtractAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::Minus,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '.' => match self.peek() {
+ Some('.') => {
+ self.chars.next();
+ match self.peek() {
+ Some('.') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::Ellipsis,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return Err(ParseError::IllegalCharacter('.').into()),
+ }
+ }
+ Some('0'..='9') => {
+ let result = self.decimal_literal_after_decimal_point()?;
+ return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?);
+ }
+ _ => return self.set_result(
+ TerminalId::Dot,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '/' => match self.peek() {
+ Some('/') => {
+ // SingleLineComment :: `//` SingleLineCommentChars?
+ self.chars.next();
+ self.skip_single_line_comment(&mut builder);
+ start = self.offset();
+ continue;
+ }
+ Some('*') => {
+ self.chars.next();
+ self.skip_multi_line_comment(&mut builder)?;
+ start = self.offset();
+ continue;
+ }
+ _ => {
+ if parser.can_accept_terminal(TerminalId::Divide) {
+ match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::DivideAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::Divide,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ }
+ }
+ return self.regular_expression_literal(&mut builder);
+ }
+ },
+
+ '}' => {
+ if parser.can_accept_terminal(TerminalId::TemplateMiddle) {
+ return self.template_part(start, TerminalId::TemplateMiddle, TerminalId::TemplateTail);
+ }
+ return self.set_result(
+ TerminalId::CloseBrace,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+
+ '<' => match self.peek() {
+ Some('<') => {
+ self.chars.next();
+ match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::LeftShiftAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::LeftShift,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ }
+ }
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::LessThanOrEqualTo,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ Some('!') if self.is_looking_at("!--") => {
+ // B.1.3 SingleLineHTMLOpenComment. Note that the above
+ // `is_looking_at` test peeked ahead at the next three
+ // characters of input. This lookahead is necessary
+ // because `x<!--` has a comment but `x<!-y` does not.
+ //
+ // TODO: Limit this to Script (not Module).
+ self.skip_single_line_comment(&mut builder);
+ start = self.offset();
+ continue;
+ }
+ _ => return self.set_result(
+ TerminalId::LessThan,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '=' => match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::StrictEqual,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::LaxEqual,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ }
+ }
+ Some('>') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::Arrow,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::EqualSign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '>' => match self.peek() {
+ Some('>') => {
+ self.chars.next();
+ match self.peek() {
+ Some('>') => {
+ self.chars.next();
+ match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::UnsignedRightShiftAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::UnsignedRightShift,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ }
+ }
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::SignedRightShiftAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::SignedRightShift,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ }
+ }
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::GreaterThanOrEqualTo,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::GreaterThan,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '^' => match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::BitwiseXorAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::BitwiseXor,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '|' => match self.peek() {
+ Some('|') => {
+ self.chars.next();
+ match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::LogicalOrAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::LogicalOr,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ )
+ }
+ }
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::BitwiseOrAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::BitwiseOr,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ },
+
+ '?' => match self.peek() {
+ Some('?') => {
+ self.chars.next();
+ match self.peek() {
+ Some('=') => {
+ self.chars.next();
+ return self.set_result(
+ TerminalId::CoalesceAssign,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::Coalesce,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ )
+ }
+ }
+ Some('.') => {
+ if let Some('0'..='9') = self.double_peek() {
+ return self.set_result(
+ TerminalId::QuestionMark,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ )
+ }
+ self.chars.next();
+ return self.set_result(
+ TerminalId::OptionalChain,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ );
+ }
+ _ => return self.set_result(
+ TerminalId::QuestionMark,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ }
+
+ '(' => return self.set_result(
+ TerminalId::OpenParenthesis,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ ')' => return self.set_result(
+ TerminalId::CloseParenthesis,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ ',' => return self.set_result(
+ TerminalId::Comma,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ ':' => return self.set_result(
+ TerminalId::Colon,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ ';' => return self.set_result(
+ TerminalId::Semicolon,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ '[' => return self.set_result(
+ TerminalId::OpenBracket,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ ']' => return self.set_result(
+ TerminalId::CloseBracket,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ '{' => return self.set_result(
+ TerminalId::OpenBrace,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+ '~' => return self.set_result(
+ TerminalId::BitwiseNot,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ ),
+
+ // Idents
+ '$' | '_' | 'a'..='z' | 'A'..='Z' => {
+ builder.push_matching(c);
+ return self.identifier_tail(start, builder);
+ }
+
+ '\\' => {
+ builder.force_allocation_without_current_ascii_char(&self);
+
+ let value = self.unicode_escape_sequence_after_backslash()?;
+ if !is_identifier_start(value) {
+ return Err(ParseError::IllegalCharacter(value).into());
+ }
+ builder.push_different(value);
+
+ return self.identifier_tail(start, builder);
+ }
+
+ '#' => {
+ if start == 0 {
+ // https://tc39.es/proposal-hashbang/out.html
+ // HashbangComment ::
+ // `#!` SingleLineCommentChars?
+ if let Some('!') = self.peek() {
+ self.skip_single_line_comment(&mut builder);
+ start = self.offset();
+ continue;
+ }
+ }
+
+ builder.push_matching(c);
+ return self.private_identifier(start, builder);
+ }
+
+ other if is_identifier_start(other) => {
+ builder.push_matching(other);
+ return self.identifier_tail(start, builder);
+ }
+
+ other => {
+ return Err(ParseError::IllegalCharacter(other).into());
+ }
+ }
+ }
+ self.set_result(
+ TerminalId::End,
+ SourceLocation::new(start, self.offset()),
+ TokenValue::None,
+ )
+ }
+
+ fn string_to_token_value(&mut self, s: &'alloc str) -> TokenValue {
+ let index = self.atoms.borrow_mut().insert(s);
+ TokenValue::Atom(index)
+ }
+
+ fn slice_to_token_value(&mut self, s: &'alloc str) -> TokenValue {
+ let index = self.slices.borrow_mut().push(s);
+ TokenValue::Slice(index)
+ }
+
+ fn numeric_result_to_advance_result(
+ &mut self,
+ s: &'alloc str,
+ start: usize,
+ result: NumericResult,
+ ) -> Result<'alloc, ()> {
+ let (terminal_id, value) = match result {
+ NumericResult::Int { base } => {
+ let n = parse_int(s, base).map_err(|s| ParseError::NotImplemented(s))?;
+ (TerminalId::NumericLiteral, TokenValue::Number(n))
+ }
+ NumericResult::Float => {
+ let n = parse_float(s).map_err(|s| ParseError::NotImplemented(s))?;
+ (TerminalId::NumericLiteral, TokenValue::Number(n))
+ }
+ NumericResult::BigInt { .. } => {
+ // FIXME
+ (TerminalId::BigIntLiteral, self.string_to_token_value(s))
+ }
+ };
+
+ self.set_result(
+ terminal_id,
+ SourceLocation::new(start, self.offset()),
+ value,
+ )
+ }
+}
+
+struct AutoCow<'alloc> {
+ start: &'alloc str,
+ value: Option<String<'alloc>>,
+}
+
+impl<'alloc> AutoCow<'alloc> {
+ fn new(lexer: &Lexer<'alloc>) -> Self {
+ AutoCow {
+ start: lexer.chars.as_str(),
+ value: None,
+ }
+ }
+
+ // Push a char that matches lexer.chars.next()
+ fn push_matching(&mut self, c: char) {
+ if let Some(text) = &mut self.value {
+ text.push(c);
+ }
+ }
+
+ // Push a different character than lexer.chars.next().
+ // force_allocation_without_current_ascii_char must be called before this.
+ fn push_different(&mut self, c: char) {
+ debug_assert!(self.value.is_some());
+ self.value.as_mut().unwrap().push(c)
+ }
+
+ // Force allocation of a String, excluding the current ASCII character,
+ // and return the reference to it
+ fn get_mut_string_without_current_ascii_char<'b>(
+ &'b mut self,
+ lexer: &'_ Lexer<'alloc>,
+ ) -> &'b mut String<'alloc> {
+ self.force_allocation_without_current_ascii_char(lexer);
+ self.value.as_mut().unwrap()
+ }
+
+ // Force allocation of a String, excluding the current ASCII character.
+ fn force_allocation_without_current_ascii_char(&mut self, lexer: &'_ Lexer<'alloc>) {
+ if self.value.is_some() {
+ return;
+ }
+
+ self.value = Some(String::from_str_in(
+ &self.start[..self.start.len() - lexer.chars.as_str().len() - 1],
+ lexer.allocator,
+ ));
+ }
+
+ // Check if the string contains a different character, such as an escape
+ // sequence
+ fn has_different(&self) -> bool {
+ self.value.is_some()
+ }
+
+ fn finish(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str {
+ match self.value.take() {
+ Some(arena_string) => arena_string.into_bump_str(),
+ None => &self.start[..self.start.len() - lexer.chars.as_str().len()],
+ }
+ }
+
+ // Just like finish, but without pushing current char.
+ fn finish_without_push(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str {
+ match self.value.take() {
+ Some(arena_string) => arena_string.into_bump_str(),
+ None => &self.start[..self.start.len() - lexer.chars.as_str().len() - 1],
+ }
+ }
+}