diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/rust/jsparagus-parser/src | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/jsparagus-parser/src')
-rw-r--r-- | third_party/rust/jsparagus-parser/src/lexer.rs | 2325 | ||||
-rw-r--r-- | third_party/rust/jsparagus-parser/src/lib.rs | 111 | ||||
-rw-r--r-- | third_party/rust/jsparagus-parser/src/numeric_value.rs | 160 | ||||
-rw-r--r-- | third_party/rust/jsparagus-parser/src/parser.rs | 262 | ||||
-rw-r--r-- | third_party/rust/jsparagus-parser/src/queue_stack.rs | 256 | ||||
-rw-r--r-- | third_party/rust/jsparagus-parser/src/simulator.rs | 211 | ||||
-rw-r--r-- | third_party/rust/jsparagus-parser/src/tests.rs | 875 | ||||
-rw-r--r-- | third_party/rust/jsparagus-parser/src/unicode.rs | 41 | ||||
-rw-r--r-- | third_party/rust/jsparagus-parser/src/unicode_data.rs | 2117 |
9 files changed, 6358 insertions, 0 deletions
diff --git a/third_party/rust/jsparagus-parser/src/lexer.rs b/third_party/rust/jsparagus-parser/src/lexer.rs new file mode 100644 index 0000000000..109558c5c2 --- /dev/null +++ b/third_party/rust/jsparagus-parser/src/lexer.rs @@ -0,0 +1,2325 @@ +//! JavaScript lexer. + +use crate::numeric_value::{parse_float, parse_int, NumericLiteralBase}; +use crate::parser::Parser; +use crate::unicode::{is_id_continue, is_id_start}; +use ast::arena; +use ast::source_atom_set::{CommonSourceAtomSetIndices, SourceAtomSet}; +use ast::source_slice_list::SourceSliceList; +use ast::SourceLocation; +use bumpalo::{collections::String, Bump}; +use generated_parser::{ParseError, Result, TerminalId, Token, TokenValue}; +use std::cell::RefCell; +use std::convert::TryFrom; +use std::rc::Rc; +use std::str::Chars; + +pub struct Lexer<'alloc> { + allocator: &'alloc Bump, + + /// Next token to be returned. + token: arena::Box<'alloc, Token>, + + /// Length of the input text, in UTF-8 bytes. + source_length: usize, + + /// Iterator over the remaining not-yet-parsed input. + chars: Chars<'alloc>, + + atoms: Rc<RefCell<SourceAtomSet<'alloc>>>, + + slices: Rc<RefCell<SourceSliceList<'alloc>>>, +} + +enum NumericResult { + Int { + base: NumericLiteralBase, + }, + Float, + BigInt { + #[allow(dead_code)] + base: NumericLiteralBase, + }, +} + +impl<'alloc> Lexer<'alloc> { + pub fn new( + allocator: &'alloc Bump, + chars: Chars<'alloc>, + atoms: Rc<RefCell<SourceAtomSet<'alloc>>>, + slices: Rc<RefCell<SourceSliceList<'alloc>>>, + ) -> Lexer<'alloc> { + Self::with_offset(allocator, chars, 0, atoms, slices) + } + + /// Create a lexer for a part of a JS script or module. `offset` is the + /// total length of all previous parts, in bytes; source locations for + /// tokens created by the new lexer start counting from this number. + pub fn with_offset( + allocator: &'alloc Bump, + chars: Chars<'alloc>, + offset: usize, + atoms: Rc<RefCell<SourceAtomSet<'alloc>>>, + slices: Rc<RefCell<SourceSliceList<'alloc>>>, + ) -> Lexer<'alloc> { + let source_length = offset + chars.as_str().len(); + let mut token = arena::alloc(allocator, new_token()); + token.is_on_new_line = true; + Lexer { + allocator, + token, + source_length, + chars, + atoms, + slices, + } + } + + fn is_looking_at(&self, s: &str) -> bool { + self.chars.as_str().starts_with(s) + } + + pub fn offset(&self) -> usize { + self.source_length - self.chars.as_str().len() + } + + fn peek(&self) -> Option<char> { + self.chars.as_str().chars().next() + } + + fn double_peek(&self) -> Option<char> { + let mut chars = self.chars.as_str().chars(); + chars.next(); + chars.next() + } + + fn set_result( + &mut self, + terminal_id: TerminalId, + loc: SourceLocation, + value: TokenValue, + ) -> Result<'alloc, ()> { + self.token.terminal_id = terminal_id; + self.token.loc = loc; + self.token.value = value; + Ok(()) + } + + #[inline] + pub fn next<'parser>( + &mut self, + parser: &Parser<'parser>, + ) -> Result<'alloc, arena::Box<'alloc, Token>> { + let mut next_token = arena::alloc_with(self.allocator, || new_token()); + self.advance_impl(parser)?; + std::mem::swap(&mut self.token, &mut next_token); + Ok(next_token) + } + + fn unexpected_err(&mut self) -> ParseError<'alloc> { + if let Some(ch) = self.peek() { + ParseError::IllegalCharacter(ch) + } else { + ParseError::UnexpectedEnd + } + } +} + +/// Returns an empty token which is meant as a place holder to be mutated later. +fn new_token() -> Token { + Token::basic_token(TerminalId::End, SourceLocation::default()) +} + +// ---------------------------------------------------------------------------- +// 11.1 Unicode Format-Control Characters + +/// U+200C ZERO WIDTH NON-JOINER, abbreviated in the spec as <ZWNJ>. +/// Specially permitted in identifiers. +const ZWNJ: char = '\u{200c}'; + +/// U+200D ZERO WIDTH JOINER, abbreviated as <ZWJ>. +/// Specially permitted in identifiers. +const ZWJ: char = '\u{200d}'; + +/// U+FEFF ZERO WIDTH NO-BREAK SPACE, abbreviated <ZWNBSP>. +/// Considered a whitespace character in JS. +const ZWNBSP: char = '\u{feff}'; + +// ---------------------------------------------------------------------------- +// 11.2 White Space + +/// U+0009 CHARACTER TABULATION, abbreviated <TAB>. +const TAB: char = '\u{9}'; + +/// U+000B VERTICAL TAB, abbreviated <VT>. +const VT: char = '\u{b}'; + +/// U+000C FORM FEED, abbreviated <FF>. +const FF: char = '\u{c}'; + +/// U+0020 SPACE, abbreviated <SP>. +const SP: char = '\u{20}'; + +/// U+00A0 NON-BREAKING SPACE, abbreviated <NBSP>. +const NBSP: char = '\u{a0}'; + +// ---------------------------------------------------------------------------- +// 11.3 Line Terminators + +/// U+000A LINE FEED, abbreviated in the spec as <LF>. +const LF: char = '\u{a}'; + +/// U+000D CARRIAGE RETURN, abbreviated in the spec as <CR>. +const CR: char = '\u{d}'; + +/// U+2028 LINE SEPARATOR, abbreviated <LS>. +const LS: char = '\u{2028}'; + +/// U+2029 PARAGRAPH SEPARATOR, abbreviated <PS>. +const PS: char = '\u{2029}'; + +// ---------------------------------------------------------------------------- +// 11.4 Comments +// +// Comment:: +// MultiLineComment +// SingleLineComment + +impl<'alloc> Lexer<'alloc> { + /// Skip a *MultiLineComment*. + /// + /// ```text + /// MultiLineComment :: + /// `/*` MultiLineCommentChars? `*/` + /// + /// MultiLineCommentChars :: + /// MultiLineNotAsteriskChar MultiLineCommentChars? + /// `*` PostAsteriskCommentChars? + /// + /// PostAsteriskCommentChars :: + /// MultiLineNotForwardSlashOrAsteriskChar MultiLineCommentChars? + /// `*` PostAsteriskCommentChars? + /// + /// MultiLineNotAsteriskChar :: + /// SourceCharacter but not `*` + /// + /// MultiLineNotForwardSlashOrAsteriskChar :: + /// SourceCharacter but not one of `/` or `*` + /// ``` + /// + /// (B.1.3 splits MultiLineComment into two nonterminals: MultiLineComment + /// and SingleLineDelimitedComment. The point of that is to help specify + /// that a SingleLineHTMLCloseComment must occur at the start of a line. We + /// use `is_on_new_line` for that.) + /// + fn skip_multi_line_comment(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()> { + while let Some(ch) = self.chars.next() { + match ch { + '*' if self.peek() == Some('/') => { + self.chars.next(); + *builder = AutoCow::new(&self); + return Ok(()); + } + CR | LF | PS | LS => { + self.token.is_on_new_line = true; + } + _ => {} + } + } + Err(ParseError::UnterminatedMultiLineComment.into()) + } + + /// Skip a *SingleLineComment* and the following *LineTerminatorSequence*, + /// if any. + /// + /// ```text + /// SingleLineComment :: + /// `//` SingleLineCommentChars? + /// + /// SingleLineCommentChars :: + /// SingleLineCommentChar SingleLineCommentChars? + /// + /// SingleLineCommentChar :: + /// SourceCharacter but not LineTerminator + /// ``` + fn skip_single_line_comment(&mut self, builder: &mut AutoCow<'alloc>) { + while let Some(ch) = self.chars.next() { + match ch { + CR | LF | LS | PS => break, + _ => continue, + } + } + *builder = AutoCow::new(&self); + self.token.is_on_new_line = true; + } +} + +// ---------------------------------------------------------------------------- +// 11.6 Names and Keywords + +/// True if `c` is a one-character *IdentifierStart*. +/// +/// ```text +/// IdentifierStart :: +/// UnicodeIDStart +/// `$` +/// `_` +/// `\` UnicodeEscapeSequence +/// +/// UnicodeIDStart :: +/// > any Unicode code point with the Unicode property "ID_Start" +/// ``` +fn is_identifier_start(c: char) -> bool { + // Escaped case is handled separately. + if c.is_ascii() { + c == '$' || c == '_' || c.is_ascii_alphabetic() + } else { + is_id_start(c) + } +} + +/// True if `c` is a one-character *IdentifierPart*. +/// +/// ```text +/// IdentifierPart :: +/// UnicodeIDContinue +/// `$` +/// `\` UnicodeEscapeSequence +/// <ZWNJ> +/// <ZWJ> +/// +/// UnicodeIDContinue :: +/// > any Unicode code point with the Unicode property "ID_Continue" +/// ``` +fn is_identifier_part(c: char) -> bool { + // Escaped case is handled separately. + if c.is_ascii() { + c == '$' || c == '_' || c.is_ascii_alphanumeric() + } else { + is_id_continue(c) || c == ZWNJ || c == ZWJ + } +} + +impl<'alloc> Lexer<'alloc> { + /// Scan the rest of an IdentifierName, having already parsed the initial + /// IdentifierStart and stored it in `builder`. + /// + /// On success, this returns `Ok((has_escapes, str))`, where `has_escapes` + /// is true if the identifier contained any UnicodeEscapeSequences, and + /// `str` is the un-escaped IdentifierName, including the IdentifierStart, + /// on success. + /// + /// ```text + /// IdentifierName :: + /// IdentifierStart + /// IdentifierName IdentifierPart + /// ``` + fn identifier_name_tail( + &mut self, + mut builder: AutoCow<'alloc>, + ) -> Result<'alloc, (bool, &'alloc str)> { + while let Some(ch) = self.peek() { + if !is_identifier_part(ch) { + if ch == '\\' { + self.chars.next(); + builder.force_allocation_without_current_ascii_char(&self); + + let value = self.unicode_escape_sequence_after_backslash()?; + if !is_identifier_part(value) { + return Err(ParseError::InvalidEscapeSequence.into()); + } + + builder.push_different(value); + continue; + } + + break; + } + self.chars.next(); + builder.push_matching(ch); + } + let has_different = builder.has_different(); + Ok((has_different, builder.finish(&self))) + } + + fn identifier_name(&mut self, mut builder: AutoCow<'alloc>) -> Result<'alloc, &'alloc str> { + match self.chars.next() { + None => { + return Err(ParseError::UnexpectedEnd.into()); + } + Some(c) => { + match c { + '$' | '_' | 'a'..='z' | 'A'..='Z' => { + builder.push_matching(c); + } + + '\\' => { + builder.force_allocation_without_current_ascii_char(&self); + + let value = self.unicode_escape_sequence_after_backslash()?; + if !is_identifier_start(value) { + return Err(ParseError::IllegalCharacter(value).into()); + } + builder.push_different(value); + } + + other if is_identifier_start(other) => { + builder.push_matching(other); + } + + other => { + return Err(ParseError::IllegalCharacter(other).into()); + } + } + self.identifier_name_tail(builder) + .map(|(_has_escapes, name)| name) + } + } + } + + /// Finish scanning an *IdentifierName* or keyword, having already scanned + /// the *IdentifierStart* and pushed it to `builder`. + /// + /// `start` is the offset of the *IdentifierStart*. + /// + /// The lexer doesn't know the syntactic context, so it always identifies + /// possible keywords. It's up to the parser to understand that, for + /// example, `TerminalId::If` is not a keyword when it's used as a property + /// or method name. + /// + /// If the source string contains no escape and it matches to possible + /// keywords (including contextual keywords), the result is corresponding + /// `TerminalId`. For example, if the source string is "yield", the result + /// is `TerminalId::Yield`. + /// + /// If the source string contains no escape sequence and also it doesn't + /// match to any possible keywords, the result is `TerminalId::Name`. + /// + /// If the source string contains at least one escape sequence, + /// the result is always `TerminalId::NameWithEscape`, regardless of the + /// StringValue of it. For example, if the source string is "\u{79}ield", + /// the result is `TerminalId::NameWithEscape`, and the StringValue is + /// "yield". + fn identifier_tail(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()> { + let (has_different, text) = self.identifier_name_tail(builder)?; + + // https://tc39.es/ecma262/#sec-keywords-and-reserved-words + // + // keywords in the grammar match literal sequences of specific + // SourceCharacter elements. A code point in a keyword cannot be + // expressed by a `\` UnicodeEscapeSequence. + let (id, value) = if has_different { + // Always return `NameWithEscape`. + // + // Error check against reserved word should be handled in the + // consumer. + (TerminalId::NameWithEscape, self.string_to_token_value(text)) + } else { + match &text as &str { + "as" => ( + TerminalId::As, + TokenValue::Atom(CommonSourceAtomSetIndices::as_()), + ), + "async" => { + /* + ( + TerminalId::Async, + TokenValue::Atom(CommonSourceAtomSetIndices::async_()), + ), + */ + return Err(ParseError::NotImplemented( + "async cannot be handled in parser due to multiple lookahead", + ) + .into()); + } + "await" => { + /* + ( + TerminalId::Await, + TokenValue::Atom(CommonSourceAtomSetIndices::await_()), + ), + */ + return Err( + ParseError::NotImplemented("await cannot be handled in parser").into(), + ); + } + "break" => ( + TerminalId::Break, + TokenValue::Atom(CommonSourceAtomSetIndices::break_()), + ), + "case" => ( + TerminalId::Case, + TokenValue::Atom(CommonSourceAtomSetIndices::case()), + ), + "catch" => ( + TerminalId::Catch, + TokenValue::Atom(CommonSourceAtomSetIndices::catch()), + ), + "class" => ( + TerminalId::Class, + TokenValue::Atom(CommonSourceAtomSetIndices::class()), + ), + "const" => ( + TerminalId::Const, + TokenValue::Atom(CommonSourceAtomSetIndices::const_()), + ), + "continue" => ( + TerminalId::Continue, + TokenValue::Atom(CommonSourceAtomSetIndices::continue_()), + ), + "debugger" => ( + TerminalId::Debugger, + TokenValue::Atom(CommonSourceAtomSetIndices::debugger()), + ), + "default" => ( + TerminalId::Default, + TokenValue::Atom(CommonSourceAtomSetIndices::default()), + ), + "delete" => ( + TerminalId::Delete, + TokenValue::Atom(CommonSourceAtomSetIndices::delete()), + ), + "do" => ( + TerminalId::Do, + TokenValue::Atom(CommonSourceAtomSetIndices::do_()), + ), + "else" => ( + TerminalId::Else, + TokenValue::Atom(CommonSourceAtomSetIndices::else_()), + ), + "enum" => ( + TerminalId::Enum, + TokenValue::Atom(CommonSourceAtomSetIndices::enum_()), + ), + "export" => ( + TerminalId::Export, + TokenValue::Atom(CommonSourceAtomSetIndices::export()), + ), + "extends" => ( + TerminalId::Extends, + TokenValue::Atom(CommonSourceAtomSetIndices::extends()), + ), + "finally" => ( + TerminalId::Finally, + TokenValue::Atom(CommonSourceAtomSetIndices::finally()), + ), + "for" => ( + TerminalId::For, + TokenValue::Atom(CommonSourceAtomSetIndices::for_()), + ), + "from" => ( + TerminalId::From, + TokenValue::Atom(CommonSourceAtomSetIndices::from()), + ), + "function" => ( + TerminalId::Function, + TokenValue::Atom(CommonSourceAtomSetIndices::function()), + ), + "get" => ( + TerminalId::Get, + TokenValue::Atom(CommonSourceAtomSetIndices::get()), + ), + "if" => ( + TerminalId::If, + TokenValue::Atom(CommonSourceAtomSetIndices::if_()), + ), + "implements" => ( + TerminalId::Implements, + TokenValue::Atom(CommonSourceAtomSetIndices::implements()), + ), + "import" => ( + TerminalId::Import, + TokenValue::Atom(CommonSourceAtomSetIndices::import()), + ), + "in" => ( + TerminalId::In, + TokenValue::Atom(CommonSourceAtomSetIndices::in_()), + ), + "instanceof" => ( + TerminalId::Instanceof, + TokenValue::Atom(CommonSourceAtomSetIndices::instanceof()), + ), + "interface" => ( + TerminalId::Interface, + TokenValue::Atom(CommonSourceAtomSetIndices::interface()), + ), + "let" => { + /* + ( + TerminalId::Let, + TokenValue::Atom(CommonSourceAtomSetIndices::let_()), + ), + */ + return Err(ParseError::NotImplemented( + "let cannot be handled in parser due to multiple lookahead", + ) + .into()); + } + "new" => ( + TerminalId::New, + TokenValue::Atom(CommonSourceAtomSetIndices::new_()), + ), + "of" => ( + TerminalId::Of, + TokenValue::Atom(CommonSourceAtomSetIndices::of()), + ), + "package" => ( + TerminalId::Package, + TokenValue::Atom(CommonSourceAtomSetIndices::package()), + ), + "private" => ( + TerminalId::Private, + TokenValue::Atom(CommonSourceAtomSetIndices::private()), + ), + "protected" => ( + TerminalId::Protected, + TokenValue::Atom(CommonSourceAtomSetIndices::protected()), + ), + "public" => ( + TerminalId::Public, + TokenValue::Atom(CommonSourceAtomSetIndices::public()), + ), + "return" => ( + TerminalId::Return, + TokenValue::Atom(CommonSourceAtomSetIndices::return_()), + ), + "set" => ( + TerminalId::Set, + TokenValue::Atom(CommonSourceAtomSetIndices::set()), + ), + "static" => ( + TerminalId::Static, + TokenValue::Atom(CommonSourceAtomSetIndices::static_()), + ), + "super" => ( + TerminalId::Super, + TokenValue::Atom(CommonSourceAtomSetIndices::super_()), + ), + "switch" => ( + TerminalId::Switch, + TokenValue::Atom(CommonSourceAtomSetIndices::switch()), + ), + "target" => ( + TerminalId::Target, + TokenValue::Atom(CommonSourceAtomSetIndices::target()), + ), + "this" => ( + TerminalId::This, + TokenValue::Atom(CommonSourceAtomSetIndices::this()), + ), + "throw" => ( + TerminalId::Throw, + TokenValue::Atom(CommonSourceAtomSetIndices::throw()), + ), + "try" => ( + TerminalId::Try, + TokenValue::Atom(CommonSourceAtomSetIndices::try_()), + ), + "typeof" => ( + TerminalId::Typeof, + TokenValue::Atom(CommonSourceAtomSetIndices::typeof_()), + ), + "var" => ( + TerminalId::Var, + TokenValue::Atom(CommonSourceAtomSetIndices::var()), + ), + "void" => ( + TerminalId::Void, + TokenValue::Atom(CommonSourceAtomSetIndices::void()), + ), + "while" => ( + TerminalId::While, + TokenValue::Atom(CommonSourceAtomSetIndices::while_()), + ), + "with" => ( + TerminalId::With, + TokenValue::Atom(CommonSourceAtomSetIndices::with()), + ), + "yield" => { + /* + ( + TerminalId::Yield, + TokenValue::Atom(CommonSourceAtomSetIndices::yield_()), + ), + */ + return Err( + ParseError::NotImplemented("yield cannot be handled in parser").into(), + ); + } + "null" => ( + TerminalId::NullLiteral, + TokenValue::Atom(CommonSourceAtomSetIndices::null()), + ), + "true" => ( + TerminalId::BooleanLiteral, + TokenValue::Atom(CommonSourceAtomSetIndices::true_()), + ), + "false" => ( + TerminalId::BooleanLiteral, + TokenValue::Atom(CommonSourceAtomSetIndices::false_()), + ), + _ => (TerminalId::Name, self.string_to_token_value(text)), + } + }; + + self.set_result(id, SourceLocation::new(start, self.offset()), value) + } + + /// ```text + /// PrivateIdentifier:: + /// `#` IdentifierName + /// ``` + fn private_identifier(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()> { + let name = self.identifier_name(builder)?; + let value = self.string_to_token_value(name); + self.set_result( + TerminalId::PrivateIdentifier, + SourceLocation::new(start, self.offset()), + value, + ) + } + + /// ```text + /// UnicodeEscapeSequence:: + /// `u` Hex4Digits + /// `u{` CodePoint `}` + /// ``` + fn unicode_escape_sequence_after_backslash(&mut self) -> Result<'alloc, char> { + match self.chars.next() { + Some('u') => {} + _ => { + return Err(ParseError::InvalidEscapeSequence.into()); + } + } + self.unicode_escape_sequence_after_backslash_and_u() + } + + fn unicode_escape_sequence_after_backslash_and_u(&mut self) -> Result<'alloc, char> { + let value = match self.peek() { + Some('{') => { + self.chars.next(); + + let value = self.code_point()?; + match self.chars.next() { + Some('}') => {} + _ => { + return Err(ParseError::InvalidEscapeSequence.into()); + } + } + value + } + _ => self.hex_4_digits()?, + }; + + Ok(value) + } +} + +impl<'alloc> Lexer<'alloc> { + // ------------------------------------------------------------------------ + // 11.8.3 Numeric Literals + + /// Advance over decimal digits in the input. + /// + /// ```text + /// NumericLiteralSeparator:: + /// `_` + /// + /// DecimalDigits :: + /// DecimalDigit + /// DecimalDigits NumericLiteralSeparator? DecimalDigit + /// + /// DecimalDigit :: one of + /// `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` + /// ``` + fn decimal_digits(&mut self) -> Result<'alloc, ()> { + if let Some('0'..='9') = self.peek() { + self.chars.next(); + } else { + return Err(self.unexpected_err().into()); + } + + self.decimal_digits_after_first_digit()?; + Ok(()) + } + + fn optional_decimal_digits(&mut self) -> Result<'alloc, ()> { + if let Some('0'..='9') = self.peek() { + self.chars.next(); + } else { + return Ok(()); + } + + self.decimal_digits_after_first_digit()?; + Ok(()) + } + + fn decimal_digits_after_first_digit(&mut self) -> Result<'alloc, ()> { + while let Some(next) = self.peek() { + match next { + '_' => { + self.chars.next(); + + if let Some('0'..='9') = self.peek() { + self.chars.next(); + } else { + return Err(self.unexpected_err().into()); + } + } + '0'..='9' => { + self.chars.next(); + } + _ => break, + } + } + Ok(()) + } + + /// Skip an ExponentPart, if present. + /// + /// ```text + /// ExponentPart :: + /// ExponentIndicator SignedInteger + /// + /// ExponentIndicator :: one of + /// `e` `E` + /// + /// SignedInteger :: + /// DecimalDigits + /// `+` DecimalDigits + /// `-` DecimalDigits + /// ``` + fn optional_exponent(&mut self) -> Result<'alloc, bool> { + if let Some('e') | Some('E') = self.peek() { + self.chars.next(); + self.decimal_exponent()?; + return Ok(true); + } + + Ok(false) + } + + fn decimal_exponent(&mut self) -> Result<'alloc, ()> { + if let Some('+') | Some('-') = self.peek() { + self.chars.next(); + } + + self.decimal_digits()?; + + Ok(()) + } + + /// ```text + /// HexDigit :: one of + /// `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` `a` `b` `c` `d` `e` `f` `A` `B` `C` `D` `E` `F` + /// ``` + fn hex_digit(&mut self) -> Result<'alloc, u32> { + match self.chars.next() { + None => Err(ParseError::InvalidEscapeSequence.into()), + Some(c @ '0'..='9') => Ok(c as u32 - '0' as u32), + Some(c @ 'a'..='f') => Ok(10 + (c as u32 - 'a' as u32)), + Some(c @ 'A'..='F') => Ok(10 + (c as u32 - 'A' as u32)), + Some(other) => Err(ParseError::IllegalCharacter(other).into()), + } + } + + fn code_point_to_char(value: u32) -> Result<'alloc, char> { + if 0xd800 <= value && value <= 0xdfff { + Err(ParseError::NotImplemented("unicode escape sequences (surrogates)").into()) + } else { + char::try_from(value).map_err(|_| ParseError::InvalidEscapeSequence.into()) + } + } + + /// ```text + /// Hex4Digits :: + /// HexDigit HexDigit HexDigit HexDigit + /// ``` + fn hex_4_digits(&mut self) -> Result<'alloc, char> { + let mut value = 0; + for _ in 0..4 { + value = (value << 4) | self.hex_digit()?; + } + Self::code_point_to_char(value) + } + + /// ```text + /// CodePoint :: + /// HexDigits but only if MV of HexDigits ≤ 0x10FFFF + /// + /// HexDigits :: + /// HexDigit + /// HexDigits HexDigit + /// ``` + fn code_point(&mut self) -> Result<'alloc, char> { + let mut value = self.hex_digit()?; + + loop { + let next = match self.peek() { + None => { + return Err(ParseError::InvalidEscapeSequence.into()); + } + Some(c @ '0'..='9') => c as u32 - '0' as u32, + Some(c @ 'a'..='f') => 10 + (c as u32 - 'a' as u32), + Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32), + Some(_) => break, + }; + self.chars.next(); + value = (value << 4) | next; + if value > 0x10FFFF { + return Err(ParseError::InvalidEscapeSequence.into()); + } + } + + Self::code_point_to_char(value) + } + + /// Scan a NumericLiteral (defined in 11.8.3, extended by B.1.1) after + /// having already consumed the first character, which was `0`. + /// + /// ```text + /// NumericLiteral :: + /// DecimalLiteral + /// DecimalBigIntegerLiteral + /// NonDecimalIntegerLiteral + /// NonDecimalIntegerLiteral BigIntLiteralSuffix + /// + /// DecimalBigIntegerLiteral :: + /// `0` BigIntLiteralSuffix + /// NonZeroDigit DecimalDigits? BigIntLiteralSuffix + /// + /// NonDecimalIntegerLiteral :: + /// BinaryIntegerLiteral + /// OctalIntegerLiteral + /// HexIntegerLiteral + /// + /// BigIntLiteralSuffix :: + /// `n` + /// ``` + fn numeric_literal_starting_with_zero(&mut self) -> Result<'alloc, NumericResult> { + let mut base = NumericLiteralBase::Decimal; + match self.peek() { + // BinaryIntegerLiteral :: + // `0b` BinaryDigits + // `0B` BinaryDigits + // + // BinaryDigits :: + // BinaryDigit + // BinaryDigits NumericLiteralSeparator? BinaryDigit + // + // BinaryDigit :: one of + // `0` `1` + Some('b') | Some('B') => { + self.chars.next(); + + base = NumericLiteralBase::Binary; + + if let Some('0'..='1') = self.peek() { + self.chars.next(); + } else { + return Err(self.unexpected_err().into()); + } + + while let Some(next) = self.peek() { + match next { + '_' => { + self.chars.next(); + + if let Some('0'..='1') = self.peek() { + self.chars.next(); + } else { + return Err(self.unexpected_err().into()); + } + } + '0'..='1' => { + self.chars.next(); + } + _ => break, + } + } + + if let Some('n') = self.peek() { + self.chars.next(); + self.check_after_numeric_literal()?; + return Ok(NumericResult::BigInt { base }); + } + } + + // OctalIntegerLiteral :: + // `0o` OctalDigits + // `0O` OctalDigits + // + // OctalDigits :: + // OctalDigit + // OctalDigits NumericLiteralSeparator? OctalDigit + // + // OctalDigit :: one of + // `0` `1` `2` `3` `4` `5` `6` `7` + // + Some('o') | Some('O') => { + self.chars.next(); + + base = NumericLiteralBase::Octal; + + if let Some('0'..='7') = self.peek() { + self.chars.next(); + } else { + return Err(self.unexpected_err().into()); + } + + while let Some(next) = self.peek() { + match next { + '_' => { + self.chars.next(); + + if let Some('0'..='7') = self.peek() { + self.chars.next(); + } else { + return Err(self.unexpected_err().into()); + } + } + '0'..='7' => { + self.chars.next(); + } + _ => break, + } + } + + if let Some('n') = self.peek() { + self.chars.next(); + self.check_after_numeric_literal()?; + return Ok(NumericResult::BigInt { base }); + } + } + + // HexIntegerLiteral :: + // `0x` HexDigits + // `0X` HexDigits + // + // HexDigits :: + // HexDigit + // HexDigits NumericLiteralSeparator? HexDigit + // + // HexDigit :: one of + // `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` `a` `b` `c` `d` `e` `f` `A` `B` `C` `D` `E` `F` + Some('x') | Some('X') => { + self.chars.next(); + + base = NumericLiteralBase::Hex; + + if let Some('0'..='9') | Some('a'..='f') | Some('A'..='F') = self.peek() { + self.chars.next(); + } else { + return Err(self.unexpected_err().into()); + } + + while let Some(next) = self.peek() { + match next { + '_' => { + self.chars.next(); + + if let Some('0'..='9') | Some('a'..='f') | Some('A'..='F') = self.peek() + { + self.chars.next(); + } else { + return Err(self.unexpected_err().into()); + } + } + '0'..='9' | 'a'..='f' | 'A'..='F' => { + self.chars.next(); + } + _ => break, + } + } + + if let Some('n') = self.peek() { + self.chars.next(); + self.check_after_numeric_literal()?; + return Ok(NumericResult::BigInt { base }); + } + } + + Some('.') => { + self.chars.next(); + return self.decimal_literal_after_decimal_point_after_digits(); + } + + Some('e') | Some('E') => { + self.chars.next(); + self.decimal_exponent()?; + return Ok(NumericResult::Float); + } + + Some('n') => { + self.chars.next(); + self.check_after_numeric_literal()?; + return Ok(NumericResult::BigInt { base }); + } + + Some('0'..='9') => { + // This is almost always the token `0` in practice. + // + // In nonstrict code, as a legacy feature, other numbers + // starting with `0` are allowed. If /0[0-7]+/ matches, it's a + // LegacyOctalIntegerLiteral; but if we see an `8` or `9` in + // the number, it's decimal. Decimal numbers can have a decimal + // point and/or ExponentPart; octals can't. + // + // Neither is allowed with a BigIntLiteralSuffix `n`. + // + // LegacyOctalIntegerLiteral :: + // `0` OctalDigit + // LegacyOctalIntegerLiteral OctalDigit + // + // NonOctalDecimalIntegerLiteral :: + // `0` NonOctalDigit + // LegacyOctalLikeDecimalIntegerLiteral NonOctalDigit + // NonOctalDecimalIntegerLiteral DecimalDigit + // + // LegacyOctalLikeDecimalIntegerLiteral :: + // `0` OctalDigit + // LegacyOctalLikeDecimalIntegerLiteral OctalDigit + // + // NonOctalDigit :: one of + // `8` `9` + // + + // TODO: implement `strict_mode` check + // let strict_mode = true; + // if !strict_mode { + // // TODO: Distinguish between Octal and NonOctalDecimal. + // // TODO: Support NonOctalDecimal followed by a decimal + // // point and/or ExponentPart. + // self.decimal_digits()?; + // } + return Err(ParseError::NotImplemented("LegacyOctalIntegerLiteral").into()); + } + + _ => {} + } + + self.check_after_numeric_literal()?; + Ok(NumericResult::Int { base }) + } + + /// Scan a NumericLiteral (defined in 11.8.3, extended by B.1.1) after + /// having already consumed the first character, which is a decimal digit. + fn decimal_literal_after_first_digit(&mut self) -> Result<'alloc, NumericResult> { + // DecimalLiteral :: + // DecimalIntegerLiteral `.` DecimalDigits? ExponentPart? + // `.` DecimalDigits ExponentPart? + // DecimalIntegerLiteral ExponentPart? + // + // DecimalIntegerLiteral :: + // `0` #see `numeric_literal_starting_with_zero` + // NonZeroDigit + // NonZeroDigit NumericLiteralSeparator? DecimalDigits + // NonOctalDecimalIntegerLiteral #see `numeric_literal_ + // # starting_with_zero` + // + // NonZeroDigit :: one of + // `1` `2` `3` `4` `5` `6` `7` `8` `9` + + self.decimal_digits_after_first_digit()?; + match self.peek() { + Some('.') => { + self.chars.next(); + return self.decimal_literal_after_decimal_point_after_digits(); + } + Some('n') => { + self.chars.next(); + self.check_after_numeric_literal()?; + return Ok(NumericResult::BigInt { + base: NumericLiteralBase::Decimal, + }); + } + _ => {} + } + + let has_exponent = self.optional_exponent()?; + self.check_after_numeric_literal()?; + + let result = if has_exponent { + NumericResult::Float + } else { + NumericResult::Int { + base: NumericLiteralBase::Decimal, + } + }; + + Ok(result) + } + + fn decimal_literal_after_decimal_point(&mut self) -> Result<'alloc, NumericResult> { + // The parts after `.` in + // + // `.` DecimalDigits ExponentPart? + self.decimal_digits()?; + self.optional_exponent()?; + self.check_after_numeric_literal()?; + + Ok(NumericResult::Float) + } + + fn decimal_literal_after_decimal_point_after_digits( + &mut self, + ) -> Result<'alloc, NumericResult> { + // The parts after `.` in + // + // DecimalLiteral :: + // DecimalIntegerLiteral `.` DecimalDigits? ExponentPart? + self.optional_decimal_digits()?; + self.optional_exponent()?; + self.check_after_numeric_literal()?; + + Ok(NumericResult::Float) + } + + fn check_after_numeric_literal(&self) -> Result<'alloc, ()> { + // The SourceCharacter immediately following a + // NumericLiteral must not be an IdentifierStart or + // DecimalDigit. (11.8.3) + if let Some(ch) = self.peek() { + if is_identifier_start(ch) || ch.is_digit(10) { + return Err(ParseError::IllegalCharacter(ch).into()); + } + } + + Ok(()) + } + + // ------------------------------------------------------------------------ + // 11.8.4 String Literals (as extended by B.1.2) + + /// Scan an LineContinuation or EscapeSequence in a string literal, having + /// already consumed the initial backslash character. + /// + /// ```text + /// LineContinuation :: + /// `\` LineTerminatorSequence + /// + /// EscapeSequence :: + /// CharacterEscapeSequence + /// (in strict mode code) `0` [lookahead ∉ DecimalDigit] + /// (in non-strict code) LegacyOctalEscapeSequence + /// HexEscapeSequence + /// UnicodeEscapeSequence + /// + /// CharacterEscapeSequence :: + /// SingleEscapeCharacter + /// NonEscapeCharacter + /// + /// SingleEscapeCharacter :: one of + /// `'` `"` `\` `b` `f` `n` `r` `t` `v` + /// + /// LegacyOctalEscapeSequence :: + /// OctalDigit [lookahead ∉ OctalDigit] + /// ZeroToThree OctalDigit [lookahead ∉ OctalDigit] + /// FourToSeven OctalDigit + /// ZeroToThree OctalDigit OctalDigit + /// + /// ZeroToThree :: one of + /// `0` `1` `2` `3` + /// + /// FourToSeven :: one of + /// `4` `5` `6` `7` + /// ``` + fn escape_sequence(&mut self, text: &mut String<'alloc>) -> Result<'alloc, ()> { + match self.chars.next() { + None => { + return Err(ParseError::UnterminatedString.into()); + } + Some(c) => match c { + LF | LS | PS => { + // LineContinuation. Ignore it. + // + // Don't set is_on_new_line because this LineContinuation + // has no bearing on whether the current string literal was + // the first token on the line where it started. + } + + CR => { + // LineContinuation. Check for the sequence \r\n; otherwise + // ignore it. + if self.peek() == Some(LF) { + self.chars.next(); + } + } + + '\'' | '"' | '\\' => { + text.push(c); + } + + 'b' => { + text.push('\u{8}'); + } + + 'f' => { + text.push(FF); + } + + 'n' => { + text.push(LF); + } + + 'r' => { + text.push(CR); + } + + 't' => { + text.push(TAB); + } + + 'v' => { + text.push(VT); + } + + 'x' => { + // HexEscapeSequence :: + // `x` HexDigit HexDigit + let mut value = self.hex_digit()?; + value = (value << 4) | self.hex_digit()?; + match char::try_from(value) { + Err(_) => { + return Err(ParseError::InvalidEscapeSequence.into()); + } + Ok(c) => { + text.push(c); + } + } + } + + 'u' => { + let c = self.unicode_escape_sequence_after_backslash_and_u()?; + text.push(c); + } + + '0' => { + // In strict mode code and in template literals, the + // relevant production is + // + // EscapeSequence :: + // `0` [lookahead <! DecimalDigit] + // + // In non-strict StringLiterals, `\0` begins a + // LegacyOctalEscapeSequence which may contain more digits. + match self.peek() { + Some('0'..='7') => { + return Err(ParseError::NotImplemented( + "legacy octal escape sequence in string", + ) + .into()); + } + Some('8'..='9') => { + return Err(ParseError::NotImplemented( + "digit immediately following \\0 escape sequence", + ) + .into()); + } + _ => {} + } + text.push('\0'); + } + + '1'..='7' => { + return Err(ParseError::NotImplemented( + "legacy octal escape sequence in string", + ) + .into()); + } + + other => { + // "\8" and "\9" are invalid per spec, but SpiderMonkey and + // V8 accept them, and JSC accepts them in non-strict mode. + // "\8" is "8" and "\9" is "9". + text.push(other); + } + }, + } + Ok(()) + } + + /// Scan a string literal, having already consumed the starting quote + /// character `delimiter`. + /// + /// ```text + /// StringLiteral :: + /// `"` DoubleStringCharacters? `"` + /// `'` SingleStringCharacters? `'` + /// + /// DoubleStringCharacters :: + /// DoubleStringCharacter DoubleStringCharacters? + /// + /// SingleStringCharacters :: + /// SingleStringCharacter SingleStringCharacters? + /// + /// DoubleStringCharacter :: + /// SourceCharacter but not one of `"` or `\` or LineTerminator + /// <LS> + /// <PS> + /// `\` EscapeSequence + /// LineContinuation + /// + /// SingleStringCharacter :: + /// SourceCharacter but not one of `'` or `\` or LineTerminator + /// <LS> + /// <PS> + /// `\` EscapeSequence + /// LineContinuation + /// ``` + fn string_literal(&mut self, delimiter: char) -> Result<'alloc, ()> { + let offset = self.offset() - 1; + let mut builder = AutoCow::new(&self); + loop { + match self.chars.next() { + None | Some('\r') | Some('\n') => { + return Err(ParseError::UnterminatedString.into()); + } + + Some(c @ '"') | Some(c @ '\'') => { + if c == delimiter { + let value = self.string_to_token_value(builder.finish_without_push(&self)); + return self.set_result( + TerminalId::StringLiteral, + SourceLocation::new(offset, self.offset()), + value, + ); + } else { + builder.push_matching(c); + } + } + + Some('\\') => { + let text = builder.get_mut_string_without_current_ascii_char(&self); + self.escape_sequence(text)?; + } + + Some(other) => { + // NonEscapeCharacter :: + // SourceCharacter but not one of EscapeCharacter or LineTerminator + // + // EscapeCharacter :: + // SingleEscapeCharacter + // DecimalDigit + // `x` + // `u` + builder.push_matching(other); + } + } + } + } + + // ------------------------------------------------------------------------ + // 11.8.5 Regular Expression Literals + + fn regular_expression_backslash_sequence(&mut self) -> Result<'alloc, ()> { + match self.chars.next() { + None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => { + Err(ParseError::UnterminatedRegExp.into()) + } + Some(_) => Ok(()), + } + } + + // See 12.2.8 and 11.8.5 sections. + fn regular_expression_literal(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()> { + let offset = self.offset(); + + loop { + match self.chars.next() { + None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => { + return Err(ParseError::UnterminatedRegExp.into()); + } + Some('/') => { + break; + } + Some('[') => { + // RegularExpressionClass. + loop { + match self.chars.next() { + None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => { + return Err(ParseError::UnterminatedRegExp.into()); + } + Some(']') => { + break; + } + Some('\\') => { + self.regular_expression_backslash_sequence()?; + } + Some(_) => {} + } + } + } + Some('\\') => { + self.regular_expression_backslash_sequence()?; + } + Some(_) => {} + } + } + let mut flag_text = AutoCow::new(&self); + while let Some(ch) = self.peek() { + match ch { + '$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' => { + self.chars.next(); + flag_text.push_matching(ch); + } + _ => break, + } + } + + // 12.2.8.2.1 Assert literal is a RegularExpressionLiteral. + let literal = builder.finish(&self); + + // 12.2.8.2.2 Check that only gimsuy flags are mentioned at most once. + let gimsuy_mask: u32 = ['g', 'i', 'm', 's', 'u', 'y'] + .iter() + .map(|x| 1 << ((*x as u8) - ('a' as u8))) + .sum(); + let mut flag_text_set: u32 = 0; + for ch in flag_text.finish(&self).chars() { + if !ch.is_ascii_lowercase() { + return Err(ParseError::NotImplemented( + "Unexpected flag in regular expression literal", + ) + .into()); + } + let ch_mask = 1 << ((ch as u8) - ('a' as u8)); + if ch_mask & gimsuy_mask == 0 { + return Err(ParseError::NotImplemented( + "Unexpected flag in regular expression literal", + ) + .into()); + } + if flag_text_set & ch_mask != 0 { + return Err(ParseError::NotImplemented( + "Flag is mentioned twice in regular expression literal", + ) + .into()); + } + flag_text_set |= ch_mask; + } + + // TODO: 12.2.8.2.4 and 12.2.8.2.5 Check that the body matches the + // grammar defined in 21.2.1. + + let value = self.slice_to_token_value(literal); + self.set_result( + TerminalId::RegularExpressionLiteral, + SourceLocation::new(offset, self.offset()), + value, + ) + } + + // ------------------------------------------------------------------------ + // 11.8.6 Template Literal Lexical Components + + /// Parse a template literal component token, having already consumed the + /// starting `` ` `` or `}` character. On success, the `id` of the returned + /// `Token` is `subst` (if the token ends with `${`) or `tail` (if the + /// token ends with `` ` ``). + /// + /// ```text + /// NoSubstitutionTemplate :: + /// ``` TemplateCharacters? ``` + /// + /// TemplateHead :: + /// ``` TemplateCharacters? `${` + /// + /// TemplateMiddle :: + /// `}` TemplateCharacters? `${` + /// + /// TemplateTail :: + /// `}` TemplateCharacters? ``` + /// + /// TemplateCharacters :: + /// TemplateCharacter TemplateCharacters? + /// ``` + fn template_part( + &mut self, + start: usize, + subst: TerminalId, + tail: TerminalId, + ) -> Result<'alloc, ()> { + let mut builder = AutoCow::new(&self); + while let Some(ch) = self.chars.next() { + // TemplateCharacter :: + // `$` [lookahead != `{` ] + // `\` EscapeSequence + // `\` NotEscapeSequence + // LineContinuation + // LineTerminatorSequence + // SourceCharacter but not one of ``` or `\` or `$` or LineTerminator + // + // NotEscapeSequence :: + // `0` DecimalDigit + // DecimalDigit but not `0` + // `x` [lookahead <! HexDigit] + // `x` HexDigit [lookahead <! HexDigit] + // `u` [lookahead <! HexDigit] [lookahead != `{`] + // `u` HexDigit [lookahead <! HexDigit] + // `u` HexDigit HexDigit [lookahead <! HexDigit] + // `u` HexDigit HexDigit HexDigit [lookahead <! HexDigit] + // `u` `{` [lookahead <! HexDigit] + // `u` `{` NotCodePoint [lookahead <! HexDigit] + // `u` `{` CodePoint [lookahead <! HexDigit] [lookahead != `}`] + // + // NotCodePoint :: + // HexDigits [> but only if MV of |HexDigits| > 0x10FFFF ] + // + // CodePoint :: + // HexDigits [> but only if MV of |HexDigits| ≤ 0x10FFFF ] + if ch == '$' && self.peek() == Some('{') { + self.chars.next(); + let value = self.string_to_token_value(builder.finish_without_push(&self)); + return self.set_result(subst, SourceLocation::new(start, self.offset()), value); + } + if ch == '`' { + let value = self.string_to_token_value(builder.finish_without_push(&self)); + return self.set_result(tail, SourceLocation::new(start, self.offset()), value); + } + // TODO: Support escape sequences. + if ch == '\\' { + let text = builder.get_mut_string_without_current_ascii_char(&self); + self.escape_sequence(text)?; + } else { + builder.push_matching(ch); + } + } + Err(ParseError::UnterminatedString.into()) + } + + fn advance_impl<'parser>(&mut self, parser: &Parser<'parser>) -> Result<'alloc, ()> { + let mut builder = AutoCow::new(&self); + let mut start = self.offset(); + while let Some(c) = self.chars.next() { + match c { + // 11.2 White Space + // + // WhiteSpace :: + // <TAB> + // <VT> + // <FF> + // <SP> + // <NBSP> + // <ZWNBSP> + // <USP> + TAB | + VT | + FF | + SP | + NBSP | + ZWNBSP | + '\u{1680}' | // Ogham space mark (in <USP>) + '\u{2000}' ..= '\u{200a}' | // typesetting spaces (in <USP>) + '\u{202f}' | // Narrow no-break space (in <USP>) + '\u{205f}' | // Medium mathematical space (in <USP>) + '\u{3000}' // Ideographic space (in <USP>) + => { + // TODO - The spec uses <USP> to stand for any character + // with category "Space_Separator" (Zs). New Unicode + // standards may add characters to this set. This should therefore be + // implemented using the Unicode database somehow. + builder = AutoCow::new(&self); + start = self.offset(); + continue; + } + + // 11.3 Line Terminators + // + // LineTerminator :: + // <LF> + // <CR> + // <LS> + // <PS> + LF | CR | LS | PS => { + self.token.is_on_new_line = true; + builder = AutoCow::new(&self); + start = self.offset(); + continue; + } + + '0' => { + let result = self.numeric_literal_starting_with_zero()?; + return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?); + } + + '1'..='9' => { + let result = self.decimal_literal_after_first_digit()?; + return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?); + } + + '"' | '\'' => { + return self.string_literal(c); + } + + '`' => { + return self.template_part(start, TerminalId::TemplateHead, TerminalId::NoSubstitutionTemplate); + } + + '!' => match self.peek() { + Some('=') => { + self.chars.next(); + match self.peek() { + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::StrictNotEqual, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::LaxNotEqual, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + } + } + _ => return self.set_result( + TerminalId::LogicalNot, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '%' => match self.peek() { + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::RemainderAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::Remainder, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '&' => match self.peek() { + Some('&') => { + self.chars.next(); + match self.peek() { + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::LogicalAndAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::LogicalAnd, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ) + } + } + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::BitwiseAndAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::BitwiseAnd, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '*' => match self.peek() { + Some('*') => { + self.chars.next(); + match self.peek() { + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::ExponentiateAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::Exponentiate, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + } + } + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::MultiplyAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::Star, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '+' => match self.peek() { + Some('+') => { + self.chars.next(); + return self.set_result( + TerminalId::Increment, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::AddAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::Plus, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '-' => match self.peek() { + Some('-') => { + self.chars.next(); + match self.peek() { + Some('>') if self.token.is_on_new_line => { + // B.1.3 SingleLineHTMLCloseComment + // TODO: Limit this to Script (not Module). + self.skip_single_line_comment(&mut builder); + continue; + } + _ => return self.set_result( + TerminalId::Decrement, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + } + } + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::SubtractAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::Minus, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '.' => match self.peek() { + Some('.') => { + self.chars.next(); + match self.peek() { + Some('.') => { + self.chars.next(); + return self.set_result( + TerminalId::Ellipsis, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return Err(ParseError::IllegalCharacter('.').into()), + } + } + Some('0'..='9') => { + let result = self.decimal_literal_after_decimal_point()?; + return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?); + } + _ => return self.set_result( + TerminalId::Dot, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '/' => match self.peek() { + Some('/') => { + // SingleLineComment :: `//` SingleLineCommentChars? + self.chars.next(); + self.skip_single_line_comment(&mut builder); + start = self.offset(); + continue; + } + Some('*') => { + self.chars.next(); + self.skip_multi_line_comment(&mut builder)?; + start = self.offset(); + continue; + } + _ => { + if parser.can_accept_terminal(TerminalId::Divide) { + match self.peek() { + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::DivideAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::Divide, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + } + } + return self.regular_expression_literal(&mut builder); + } + }, + + '}' => { + if parser.can_accept_terminal(TerminalId::TemplateMiddle) { + return self.template_part(start, TerminalId::TemplateMiddle, TerminalId::TemplateTail); + } + return self.set_result( + TerminalId::CloseBrace, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + + '<' => match self.peek() { + Some('<') => { + self.chars.next(); + match self.peek() { + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::LeftShiftAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::LeftShift, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + } + } + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::LessThanOrEqualTo, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + Some('!') if self.is_looking_at("!--") => { + // B.1.3 SingleLineHTMLOpenComment. Note that the above + // `is_looking_at` test peeked ahead at the next three + // characters of input. This lookahead is necessary + // because `x<!--` has a comment but `x<!-y` does not. + // + // TODO: Limit this to Script (not Module). + self.skip_single_line_comment(&mut builder); + start = self.offset(); + continue; + } + _ => return self.set_result( + TerminalId::LessThan, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '=' => match self.peek() { + Some('=') => { + self.chars.next(); + match self.peek() { + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::StrictEqual, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::LaxEqual, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + } + } + Some('>') => { + self.chars.next(); + return self.set_result( + TerminalId::Arrow, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::EqualSign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '>' => match self.peek() { + Some('>') => { + self.chars.next(); + match self.peek() { + Some('>') => { + self.chars.next(); + match self.peek() { + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::UnsignedRightShiftAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::UnsignedRightShift, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + } + } + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::SignedRightShiftAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::SignedRightShift, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + } + } + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::GreaterThanOrEqualTo, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::GreaterThan, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '^' => match self.peek() { + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::BitwiseXorAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::BitwiseXor, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '|' => match self.peek() { + Some('|') => { + self.chars.next(); + match self.peek() { + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::LogicalOrAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::LogicalOr, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ) + } + } + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::BitwiseOrAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::BitwiseOr, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + }, + + '?' => match self.peek() { + Some('?') => { + self.chars.next(); + match self.peek() { + Some('=') => { + self.chars.next(); + return self.set_result( + TerminalId::CoalesceAssign, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::Coalesce, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ) + } + } + Some('.') => { + if let Some('0'..='9') = self.double_peek() { + return self.set_result( + TerminalId::QuestionMark, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ) + } + self.chars.next(); + return self.set_result( + TerminalId::OptionalChain, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ); + } + _ => return self.set_result( + TerminalId::QuestionMark, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + } + + '(' => return self.set_result( + TerminalId::OpenParenthesis, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + ')' => return self.set_result( + TerminalId::CloseParenthesis, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + ',' => return self.set_result( + TerminalId::Comma, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + ':' => return self.set_result( + TerminalId::Colon, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + ';' => return self.set_result( + TerminalId::Semicolon, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + '[' => return self.set_result( + TerminalId::OpenBracket, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + ']' => return self.set_result( + TerminalId::CloseBracket, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + '{' => return self.set_result( + TerminalId::OpenBrace, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + '~' => return self.set_result( + TerminalId::BitwiseNot, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ), + + // Idents + '$' | '_' | 'a'..='z' | 'A'..='Z' => { + builder.push_matching(c); + return self.identifier_tail(start, builder); + } + + '\\' => { + builder.force_allocation_without_current_ascii_char(&self); + + let value = self.unicode_escape_sequence_after_backslash()?; + if !is_identifier_start(value) { + return Err(ParseError::IllegalCharacter(value).into()); + } + builder.push_different(value); + + return self.identifier_tail(start, builder); + } + + '#' => { + if start == 0 { + // https://tc39.es/proposal-hashbang/out.html + // HashbangComment :: + // `#!` SingleLineCommentChars? + if let Some('!') = self.peek() { + self.skip_single_line_comment(&mut builder); + start = self.offset(); + continue; + } + } + + builder.push_matching(c); + return self.private_identifier(start, builder); + } + + other if is_identifier_start(other) => { + builder.push_matching(other); + return self.identifier_tail(start, builder); + } + + other => { + return Err(ParseError::IllegalCharacter(other).into()); + } + } + } + self.set_result( + TerminalId::End, + SourceLocation::new(start, self.offset()), + TokenValue::None, + ) + } + + fn string_to_token_value(&mut self, s: &'alloc str) -> TokenValue { + let index = self.atoms.borrow_mut().insert(s); + TokenValue::Atom(index) + } + + fn slice_to_token_value(&mut self, s: &'alloc str) -> TokenValue { + let index = self.slices.borrow_mut().push(s); + TokenValue::Slice(index) + } + + fn numeric_result_to_advance_result( + &mut self, + s: &'alloc str, + start: usize, + result: NumericResult, + ) -> Result<'alloc, ()> { + let (terminal_id, value) = match result { + NumericResult::Int { base } => { + let n = parse_int(s, base).map_err(|s| ParseError::NotImplemented(s))?; + (TerminalId::NumericLiteral, TokenValue::Number(n)) + } + NumericResult::Float => { + let n = parse_float(s).map_err(|s| ParseError::NotImplemented(s))?; + (TerminalId::NumericLiteral, TokenValue::Number(n)) + } + NumericResult::BigInt { .. } => { + // FIXME + (TerminalId::BigIntLiteral, self.string_to_token_value(s)) + } + }; + + self.set_result( + terminal_id, + SourceLocation::new(start, self.offset()), + value, + ) + } +} + +struct AutoCow<'alloc> { + start: &'alloc str, + value: Option<String<'alloc>>, +} + +impl<'alloc> AutoCow<'alloc> { + fn new(lexer: &Lexer<'alloc>) -> Self { + AutoCow { + start: lexer.chars.as_str(), + value: None, + } + } + + // Push a char that matches lexer.chars.next() + fn push_matching(&mut self, c: char) { + if let Some(text) = &mut self.value { + text.push(c); + } + } + + // Push a different character than lexer.chars.next(). + // force_allocation_without_current_ascii_char must be called before this. + fn push_different(&mut self, c: char) { + debug_assert!(self.value.is_some()); + self.value.as_mut().unwrap().push(c) + } + + // Force allocation of a String, excluding the current ASCII character, + // and return the reference to it + fn get_mut_string_without_current_ascii_char<'b>( + &'b mut self, + lexer: &'_ Lexer<'alloc>, + ) -> &'b mut String<'alloc> { + self.force_allocation_without_current_ascii_char(lexer); + self.value.as_mut().unwrap() + } + + // Force allocation of a String, excluding the current ASCII character. + fn force_allocation_without_current_ascii_char(&mut self, lexer: &'_ Lexer<'alloc>) { + if self.value.is_some() { + return; + } + + self.value = Some(String::from_str_in( + &self.start[..self.start.len() - lexer.chars.as_str().len() - 1], + lexer.allocator, + )); + } + + // Check if the string contains a different character, such as an escape + // sequence + fn has_different(&self) -> bool { + self.value.is_some() + } + + fn finish(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str { + match self.value.take() { + Some(arena_string) => arena_string.into_bump_str(), + None => &self.start[..self.start.len() - lexer.chars.as_str().len()], + } + } + + // Just like finish, but without pushing current char. + fn finish_without_push(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str { + match self.value.take() { + Some(arena_string) => arena_string.into_bump_str(), + None => &self.start[..self.start.len() - lexer.chars.as_str().len() - 1], + } + } +} diff --git a/third_party/rust/jsparagus-parser/src/lib.rs b/third_party/rust/jsparagus-parser/src/lib.rs new file mode 100644 index 0000000000..e78744afa5 --- /dev/null +++ b/third_party/rust/jsparagus-parser/src/lib.rs @@ -0,0 +1,111 @@ +#![cfg_attr(feature = "unstable", feature(test))] + +mod lexer; +pub mod numeric_value; +mod parser; +mod queue_stack; +mod simulator; +mod unicode; +mod unicode_data; + +#[cfg(test)] +mod tests; + +extern crate arrayvec; +extern crate jsparagus_ast as ast; +extern crate jsparagus_generated_parser as generated_parser; +extern crate jsparagus_json_log as json_log; + +use crate::parser::Parser; +use ast::{ + arena, + source_atom_set::SourceAtomSet, + source_slice_list::SourceSliceList, + types::{Module, Script}, +}; +use bumpalo; +use generated_parser::{ + AstBuilder, StackValue, TerminalId, START_STATE_MODULE, START_STATE_SCRIPT, TABLES, +}; +pub use generated_parser::{ParseError, Result}; +use json_log::json_debug; +use lexer::Lexer; +use std::cell::RefCell; +use std::rc::Rc; + +pub struct ParseOptions {} +impl ParseOptions { + pub fn new() -> Self { + Self {} + } +} + +pub fn parse_script<'alloc>( + allocator: &'alloc bumpalo::Bump, + source: &'alloc str, + _options: &ParseOptions, + atoms: Rc<RefCell<SourceAtomSet<'alloc>>>, + slices: Rc<RefCell<SourceSliceList<'alloc>>>, +) -> Result<'alloc, arena::Box<'alloc, Script<'alloc>>> { + json_debug!({ + "parse": "script", + }); + Ok(parse(allocator, source, START_STATE_SCRIPT, atoms, slices)?.to_ast()?) +} + +pub fn parse_module<'alloc>( + allocator: &'alloc bumpalo::Bump, + source: &'alloc str, + _options: &ParseOptions, + atoms: Rc<RefCell<SourceAtomSet<'alloc>>>, + slices: Rc<RefCell<SourceSliceList<'alloc>>>, +) -> Result<'alloc, arena::Box<'alloc, Module<'alloc>>> { + json_debug!({ + "parse": "module", + }); + Ok(parse(allocator, source, START_STATE_MODULE, atoms, slices)?.to_ast()?) +} + +fn parse<'alloc>( + allocator: &'alloc bumpalo::Bump, + source: &'alloc str, + start_state: usize, + atoms: Rc<RefCell<SourceAtomSet<'alloc>>>, + slices: Rc<RefCell<SourceSliceList<'alloc>>>, +) -> Result<'alloc, StackValue<'alloc>> { + let mut tokens = Lexer::new(allocator, source.chars(), atoms.clone(), slices.clone()); + + TABLES.check(); + + let mut parser = Parser::new(AstBuilder::new(allocator, atoms, slices), start_state); + + loop { + let t = tokens.next(&parser)?; + if t.terminal_id == TerminalId::End { + break; + } + parser.write_token(t)?; + } + parser.close(tokens.offset()) +} + +pub fn is_partial_script<'alloc>( + allocator: &'alloc bumpalo::Bump, + source: &'alloc str, + atoms: Rc<RefCell<SourceAtomSet<'alloc>>>, + slices: Rc<RefCell<SourceSliceList<'alloc>>>, +) -> Result<'alloc, bool> { + let mut parser = Parser::new( + AstBuilder::new(allocator, atoms.clone(), slices.clone()), + START_STATE_SCRIPT, + ); + let mut tokens = Lexer::new(allocator, source.chars(), atoms, slices); + loop { + let t = tokens.next(&parser)?; + if t.terminal_id == TerminalId::End { + break; + } + parser.write_token(t)?; + } + Ok(!parser.can_close()) +} diff --git a/third_party/rust/jsparagus-parser/src/numeric_value.rs b/third_party/rust/jsparagus-parser/src/numeric_value.rs new file mode 100644 index 0000000000..2fd5cab431 --- /dev/null +++ b/third_party/rust/jsparagus-parser/src/numeric_value.rs @@ -0,0 +1,160 @@ +//! Parse NumericLiteral. + +pub type ParseNumberResult = Result<f64, &'static str>; + +#[derive(Debug)] +pub enum NumericLiteralBase { + Decimal, + Binary, + Octal, + Hex, +} + +// The number of digits in 2**53 - 1 (integer part of f64). +// 9007199254740991 +const F64_INT_DIGITS_MAX_LEN: usize = 16; +// 11111111111111111111111111111111111111111111111111111 +const F64_INT_BIN_DIGITS_MAX_LEN: usize = 53; +// 377777777777777777 +const F64_INT_OCT_DIGITS_MAX_LEN: usize = 18; +// 1fffffffffffff +const F64_INT_HEX_DIGITS_MAX_LEN: usize = 14; + +// To avoid allocating extra buffer when '_' is present, integer cases are +// handled without Rust standard `parse` function, as long as the value +// won't overflow the integer part of f64. +fn parse_decimal_int(s: &str) -> ParseNumberResult { + debug_assert!(!s.is_empty()); + + // NOTE: Maximum length cannot be handled. + if s.len() >= F64_INT_DIGITS_MAX_LEN { + // Fallback to float function that can handle all cases. + return parse_float(s); + } + + let src = s.as_bytes(); + + let mut result = 0.0; + for &c in src { + match c { + b'0'..=b'9' => { + let n = c - b'0'; + result = result * 10.0 + n as f64; + } + b'_' => {} + _ => panic!("invalid syntax"), + } + } + Ok(result) +} + +fn parse_binary(s: &str) -> ParseNumberResult { + debug_assert!(!s.is_empty()); + + // NOTE: Maximum length can be handled. + if s.len() > F64_INT_BIN_DIGITS_MAX_LEN { + return Err("too long binary literal"); + } + + let src = s.as_bytes(); + + let mut result = 0.0; + for &c in src { + match c { + b'0'..=b'1' => { + let n = c - b'0'; + result = result * 2.0 + n as f64; + } + b'_' => {} + _ => panic!("invalid syntax"), + } + } + Ok(result) +} + +fn parse_octal(s: &str) -> ParseNumberResult { + debug_assert!(!s.is_empty()); + + // NOTE: Maximum length cannot be handled. + if s.len() >= F64_INT_OCT_DIGITS_MAX_LEN { + return Err("too long octal literal"); + } + + let src = s.as_bytes(); + + let mut result = 0.0; + for &c in src { + match c { + b'0'..=b'7' => { + let n = c - b'0'; + result = result * 8.0 + n as f64; + } + b'_' => {} + _ => panic!("invalid syntax"), + } + } + Ok(result) +} + +fn parse_hex(s: &str) -> ParseNumberResult { + debug_assert!(!s.is_empty()); + + // NOTE: Maximum length cannot be handled. + if s.len() >= F64_INT_HEX_DIGITS_MAX_LEN { + return Err("too long hex literal"); + } + + let src = s.as_bytes(); + + let mut result = 0.0; + for &c in src { + match c { + b'0'..=b'9' => { + let n = c - b'0'; + result = result * 16.0 + n as f64; + } + b'A'..=b'F' => { + let n = c - b'A' + 10; + result = result * 16.0 + n as f64; + } + b'a'..=b'f' => { + let n = c - b'a' + 10; + result = result * 16.0 + n as f64; + } + b'_' => {} + _ => panic!("invalid syntax"), + } + } + Ok(result) +} + +/// Parse integer NumericLiteral. +/// +/// NonDecimalIntegerLiteral should contain the leading '0x' etc. +/// +/// FIXME: LegacyOctalIntegerLiteral is not supported. +pub fn parse_int<'alloc>(s: &str, kind: NumericLiteralBase) -> ParseNumberResult { + match kind { + NumericLiteralBase::Decimal => parse_decimal_int(s), + NumericLiteralBase::Binary => parse_binary(&s[2..]), + NumericLiteralBase::Octal => parse_octal(&s[2..]), + NumericLiteralBase::Hex => parse_hex(&s[2..]), + } +} + +fn parse_float_with_underscore(s: &str) -> ParseNumberResult { + let filtered: String = s.chars().filter(|c| *c != '_').collect(); + + filtered + .parse::<f64>() + .map_err(|_| "too long decimal literal") +} + +/// Parse non-integer NumericLiteral. +pub fn parse_float(s: &str) -> ParseNumberResult { + if s.contains('_') { + return parse_float_with_underscore(s); + } + + s.parse::<f64>().map_err(|_| "too long decimal literal") +} diff --git a/third_party/rust/jsparagus-parser/src/parser.rs b/third_party/rust/jsparagus-parser/src/parser.rs new file mode 100644 index 0000000000..c9f12a56e7 --- /dev/null +++ b/third_party/rust/jsparagus-parser/src/parser.rs @@ -0,0 +1,262 @@ +use crate::queue_stack::QueueStack; +use crate::simulator::Simulator; +use ast::arena; +use ast::SourceLocation; +use generated_parser::{ + full_actions, AstBuilder, AstBuilderDelegate, ErrorCode, ParseError, ParserTrait, Result, + StackValue, TermValue, TerminalId, Token, TABLES, +}; +use json_log::json_trace; + +pub struct Parser<'alloc> { + /// Vector of states visited in the LR parse table. + state_stack: Vec<usize>, + /// Stack and Queue of terms and their associated values. The Queue + /// corresponds to terms which are added as lookahead as well as terms which + /// are replayed, and the stack matches the state_stack. + node_stack: QueueStack<TermValue<StackValue<'alloc>>>, + /// Build the AST stored in the TermValue vectors. + handler: AstBuilder<'alloc>, +} + +impl<'alloc> AstBuilderDelegate<'alloc> for Parser<'alloc> { + fn ast_builder_refmut(&mut self) -> &mut AstBuilder<'alloc> { + &mut self.handler + } +} + +impl<'alloc> ParserTrait<'alloc, StackValue<'alloc>> for Parser<'alloc> { + fn shift(&mut self, tv: TermValue<StackValue<'alloc>>) -> Result<'alloc, bool> { + // The shift function should exit either by accepting the input or + // emptying its queue of lookahead. + debug_assert!(self.node_stack.queue_empty()); + self.node_stack.enqueue(tv); + // Shift the new terminal/nonterminal and its associated value. + json_trace!({ "enter": "shift" }); + let mut state = self.state(); + debug_assert!(state < TABLES.shift_count); + while !self.node_stack.queue_empty() { + let term_index: usize = self.node_stack.next().unwrap().term.into(); + debug_assert!(term_index < TABLES.shift_width); + let index = state * TABLES.shift_width + term_index; + let goto = TABLES.shift_table[index]; + json_trace!({ + "from": state, + "to": goto, + "term": format!("{:?}", { let s: &'static str = tv.term.into(); s }), + }); + if goto < 0 { + self.node_stack.shift(); + let tv = self.node_stack.pop().unwrap(); + // Error handling is in charge of shifting an ErrorSymbol from the + // current state. + self.try_error_handling(tv)?; + continue; + } + state = goto as usize; + self.shift_replayed(state); + // Execute any actions, such as reduce actions ast builder actions. + if state >= TABLES.shift_count { + assert!(state < TABLES.action_count + TABLES.shift_count); + json_trace!({ "action": state }); + if full_actions(self, state)? { + return Ok(true); + } + state = self.state(); + } + debug_assert!(state < TABLES.shift_count); + } + Ok(false) + } + #[inline(always)] + fn shift_replayed(&mut self, state: usize) { + // let term_index: usize = self.node_stack.next().unwrap().term.into(); + // assert!(term_index < TABLES.shift_width); + // let from_state = self.state(); + // let index = from_state * TABLES.shift_width + term_index; + // let goto = TABLES.shift_table[index]; + // assert!((goto as usize) == state); + self.state_stack.push(state); + self.node_stack.shift(); + } + fn unshift(&mut self) { + self.state_stack.pop().unwrap(); + self.node_stack.unshift() + } + fn pop(&mut self) -> TermValue<StackValue<'alloc>> { + self.state_stack.pop().unwrap(); + self.node_stack.pop().unwrap() + } + fn replay(&mut self, tv: TermValue<StackValue<'alloc>>) { + self.node_stack.push_next(tv) + } + fn epsilon(&mut self, state: usize) { + *self.state_stack.last_mut().unwrap() = state; + } + fn top_state(&self) -> usize { + self.state() + } + fn check_not_on_new_line(&mut self, peek: usize) -> Result<'alloc, bool> { + let sv = { + let stack = self.node_stack.stack_slice(); + &stack[stack.len() - peek].value + }; + if let StackValue::Token(ref token) = sv { + if !token.is_on_new_line { + return Ok(true); + } + self.rewind(peek - 1); + let tv = self.pop(); + self.try_error_handling(tv)?; + return Ok(false); + } + Err(ParseError::NoLineTerminatorHereExpectedToken.into()) + } +} + +impl<'alloc> Parser<'alloc> { + pub fn new(handler: AstBuilder<'alloc>, entry_state: usize) -> Self { + TABLES.check(); + assert!(entry_state < TABLES.shift_count); + let mut state_stack = Vec::with_capacity(128); + state_stack.push(entry_state); + + Self { + state_stack, + node_stack: QueueStack::with_capacity(128), + handler, + } + } + + fn state(&self) -> usize { + *self.state_stack.last().unwrap() + } + + pub fn write_token(&mut self, token: arena::Box<'alloc, Token>) -> Result<'alloc, ()> { + json_trace!({ + "method": "write_token", + "is_on_new_line": token.is_on_new_line, + "start": token.loc.start, + "end": token.loc.end, + }); + // Shift the token with the associated StackValue. + let term = token.terminal_id.into(); + let accept = self.shift(TermValue { + term, + value: StackValue::Token(token), + })?; + // JavaScript grammar accepts empty inputs, therefore we can never + // accept any program before receiving a TerminalId::End. + assert!(!accept); + Ok(()) + } + + pub fn close(&mut self, position: usize) -> Result<'alloc, StackValue<'alloc>> { + // Shift the End terminal with the associated StackValue. + json_trace!({ + "method": "close", + "position": position, + }); + let loc = SourceLocation::new(position, position); + let token = Token::basic_token(TerminalId::End, loc); + let accept = self.shift(TermValue { + term: TerminalId::End.into(), + value: StackValue::Token(self.handler.alloc(token)), + })?; + // Adding a TerminalId::End would either lead to a parse error, or to + // accepting the current input. In which case we return matching node + // value. + assert!(accept); + + // We can either reduce a Script/Module, or a Script/Module followed by + // an <End> terminal. + assert!(self.node_stack.stack_len() >= 1); + assert!(self.node_stack.stack_len() <= 2); + if self.node_stack.stack_len() > 1 { + self.node_stack.pop(); + } + Ok(self.node_stack.pop().unwrap().value) + } + + pub(crate) fn parse_error(t: &Token) -> ParseError<'alloc> { + if t.terminal_id == TerminalId::End { + ParseError::UnexpectedEnd + } else { + ParseError::SyntaxError(t.clone()) + } + } + + fn try_error_handling(&mut self, t: TermValue<StackValue<'alloc>>) -> Result<'alloc, bool> { + json_trace!({ + "try_error_handling_term": format!("{}", { + let s: &'static str = t.term.into(); + s + }), + }); + if let StackValue::Token(ref token) = t.value { + // Error tokens might them-self cause more errors to be reported. + // This happens due to the fact that the ErrorToken can be replayed, + // and while the ErrorToken might be in the lookahead rules, it + // might not be in the shifted terms coming after the reduced + // nonterminal. + if t.term == TerminalId::ErrorToken.into() { + return Err(Self::parse_error(token).into()); + } + + // Otherwise, check if the current rule accept an Automatic + // Semi-Colon insertion (ASI). + let state = self.state(); + assert!(state < TABLES.shift_count); + let error_code = TABLES.error_codes[state]; + if let Some(error_code) = error_code { + let err_token = (*token).clone(); + Self::recover(token, error_code)?; + self.replay(t); + let err_token = self.handler.alloc(err_token); + self.replay(TermValue { + term: TerminalId::ErrorToken.into(), + value: StackValue::Token(err_token), + }); + return Ok(false); + } + // On error, don't attempt error handling again. + return Err(Self::parse_error(token).into()); + } + Err(ParseError::ParserCannotUnpackToken.into()) + } + + pub(crate) fn recover(t: &Token, error_code: ErrorCode) -> Result<'alloc, ()> { + match error_code { + ErrorCode::Asi => { + if t.is_on_new_line + || t.terminal_id == TerminalId::End + || t.terminal_id == TerminalId::CloseBrace + { + Ok(()) + } else { + Err(Self::parse_error(t).into()) + } + } + ErrorCode::DoWhileAsi => Ok(()), + } + } + + fn simulator<'a>(&'a self) -> Simulator<'alloc, 'a> { + assert_eq!(self.node_stack.queue_len(), 0); + Simulator::new(&self.state_stack, self.node_stack.stack_slice()) + } + + pub fn can_accept_terminal(&self, t: TerminalId) -> bool { + let result = self.simulator().write_token(t).is_ok(); + json_trace!({ + "can_accept": result, + "terminal": format!("{:?}", t), + }); + result + } + + /// Return true if self.close() would succeed. + pub fn can_close(&self) -> bool { + self.simulator().close(0).is_ok() + } +} diff --git a/third_party/rust/jsparagus-parser/src/queue_stack.rs b/third_party/rust/jsparagus-parser/src/queue_stack.rs new file mode 100644 index 0000000000..f56e2e1b66 --- /dev/null +++ b/third_party/rust/jsparagus-parser/src/queue_stack.rs @@ -0,0 +1,256 @@ +//! This module implements a Stack, which is useful for implementing a parser +//! with variable lookahead, as it would allow to pop elements which are below +//! the top-element, and maintain a top counter which would be in charge of +//! moving these elements once shifted. +use std::ptr; + +/// This container implements a stack and a queue in a single vector: +/// - stack: buf[..top] +/// - queue: buf[top + gap..] +/// +/// This structure is meant to avoid moving data when the head of the queue is +/// transfered to the top of the stack. Also, sometimes we need to set items +/// aside from the top of a stack, and then push them back onto the stack later. +/// The queue is for storing these set-aside values. Since they live in the same +/// buffer as the stack, values can be "set aside" and "pushed back on" without +/// moving them at all. +/// +/// In the context of an LR parser, the stack contains shifted elements, and the +/// queue contains the lookahead. If the lexer is completely independent of the +/// parser, all tokens could be queued before starting the parser. +/// +/// The following statements describe how this structure is meant to be used and +/// is described as a stack and a queue displayed as follow: +/// [...stack...] <gap> [...queue...] +/// +/// New elements are always inserted in the queue with `enqueue`: +/// [a, b] <no gap> [] +/// * enqueue(c) +/// [a, b] <no gap> [c] +/// +/// These elements are then moved to the stack with `shift`: +/// [a, b] <no gap> [c] +/// * shift() +/// [a, b, c] <no gap> [] +/// +/// The stack top can be set aside in the queue with `unshift`: +/// [a, b, c] <no gap> [] +/// * unshift() +/// [a, b] <no gap> [c] +/// +/// The stack top can be removed with `pop`: +/// [a, b] <no gap> [c] +/// * pop() -> b +/// [a] <gap: 1> [c] +/// * pop() -> a +/// [] <gap: 2> [c] +/// +/// New elements can be added to the front of the queue with `push_next`, which +/// also moves the content of the queue to ensure that `shift` can be used +/// afterward: +/// [] <gap: 2> [c] +/// * push_next(d) +/// [] <no gap> [d, c] +/// +/// These operations are used by LR parser, to add lookahead with `enqueue`, to +/// shift tokens with `shift`, to save tokens to be replayed with `unshift`, to +/// reduce a set of tokens and replace it by a non-terminal with `pop` and +/// `push_next`. +pub struct QueueStack<T> { + /// Buffer containing the stack and the queue. + /// + /// [a, b, c, d, e, f, g, h, i, j] + /// '-----------'<------>'-----' + /// stack ^ gap queue + /// | + /// top -' + buf: Vec<T>, + /// Length of the stack, self.buf[top - 1] being the last element of the + /// stack. + top: usize, + /// Length of the gap between the stack top and the queue head. + gap: usize, +} + +impl<T> QueueStack<T> { + /// Create a queue and stack with the given number of reserved elements. + pub fn with_capacity(n: usize) -> QueueStack<T> { + QueueStack { + buf: Vec::with_capacity(n), + top: 0, + gap: 0, + } + } + + /// Add an element to the back of the queue. + pub fn enqueue(&mut self, value: T) { + self.buf.push(value); + } + + /// Add an element to the front of the queue. + pub fn push_next(&mut self, value: T) { + self.compact_with_gap(1); + self.gap -= 1; + unsafe { + // Write over the gap without reading nor dropping the old entry. + let ptr = self.buf.as_mut_ptr().add(self.top + self.gap); + ptr.write(value); + } + } + + /// Whether elements can be shifted. + pub fn can_shift(&self) -> bool { + self.gap == 0 && !self.queue_empty() + } + + /// Whether elements can be unshifted. + pub fn can_unshift(&self) -> bool { + self.gap == 0 && !self.stack_empty() + } + + /// Transfer an element from the top of the stack to the front of the queue. + /// + /// The gap must be empty. This does not move the value from one address to + /// another in memory; it just adjusts the boundary between the stack and + /// the queue. + /// + /// # Panics + /// If the stack is empty or there is a gap. + pub fn unshift(&mut self) { + assert!(self.can_unshift()); + self.top -= 1; + } + + /// Transfer an element from the front of the queue to the top of the stack. + /// + /// The gap must be empty. This does not move the value from one address to + /// another in memory; it just adjusts the boundary between the stack and + /// the queue. + /// + /// # Panics + /// If the queue is empty or there is a gap. + #[inline(always)] + pub fn shift(&mut self) { + assert!(self.can_shift()); + self.top += 1; + } + + /// Remove the top element of the stack and return it, or None if the stack + /// is empty. + /// + /// This increases the gap size by 1. + pub fn pop(&mut self) -> Option<T> { + if self.top == 0 { + None + } else { + self.top -= 1; + self.gap += 1; + unsafe { + // Take ownership of the content. + let ptr = self.buf.as_mut_ptr().add(self.top); + Some(ptr.read()) + } + } + } + + /// Set the gap size to `new_gap`, memmove-ing the contents of the queue as + /// needed. + fn compact_with_gap(&mut self, new_gap: usize) { + assert!(new_gap <= (std::isize::MAX as usize)); + assert!(self.gap <= (std::isize::MAX as usize)); + let diff = new_gap as isize - self.gap as isize; + if diff == 0 { + return; + } + // Ensure there is enough capacity. + if diff > 0 { + self.buf.reserve(diff as usize); + } + // Number of elements to be copied. + let count = self.queue_len(); + let new_len = self.top + new_gap + count; + assert!(new_len < self.buf.capacity()); + unsafe { + let src_ptr = self.buf.as_mut_ptr().add(self.top + self.gap); + let dst_ptr = src_ptr.offset(diff); + + // Shift everything down/up to have the expected gap. + ptr::copy(src_ptr, dst_ptr, count); + + // Update the buffer length to newly copied elements. + self.buf.set_len(new_len); + // Update the gap to the new gap value. + self.gap = new_gap; + } + debug_assert_eq!(self.queue_len(), count); + } + + /// Returns a reference to the front element of the queue. + pub fn next(&self) -> Option<&T> { + if self.queue_empty() { + None + } else { + Some(&self.buf[self.top + self.gap]) + } + } + + /// Returns a reference to the top element of the stack. + #[allow(dead_code)] + pub fn top(&self) -> Option<&T> { + if self.top == 0 { + None + } else { + Some(&self.buf[self.top - 1]) + } + } + + /// Returns a mutable reference to the top of the stack. + #[allow(dead_code)] + pub fn top_mut(&mut self) -> Option<&mut T> { + if self.top == 0 { + None + } else { + Some(&mut self.buf[self.top - 1]) + } + } + + /// Number of elements in the stack. + pub fn stack_len(&self) -> usize { + self.top + } + + /// Number of elements in the queue. + pub fn queue_len(&self) -> usize { + self.buf.len() - self.top - self.gap + } + + /// Whether the stack is empty. + pub fn stack_empty(&self) -> bool { + self.top == 0 + } + + /// Whether the queue is empty. + pub fn queue_empty(&self) -> bool { + self.top == self.buf.len() + } + + /// Create a slice which corresponds the stack. + pub fn stack_slice(&self) -> &[T] { + &self.buf[..self.top] + } + + /// Create a slice which corresponds the queue. + #[allow(dead_code)] + pub fn queue_slice(&self) -> &[T] { + &self.buf[self.top + self.gap..] + } +} + +impl<T> Drop for QueueStack<T> { + fn drop(&mut self) { + // QueueStack contains a gap of non-initialized values, before releasing + // the vector, we move all initialized values from the queue into the + // remaining gap. + self.compact_with_gap(0); + } +} diff --git a/third_party/rust/jsparagus-parser/src/simulator.rs b/third_party/rust/jsparagus-parser/src/simulator.rs new file mode 100644 index 0000000000..0e060fd5c1 --- /dev/null +++ b/third_party/rust/jsparagus-parser/src/simulator.rs @@ -0,0 +1,211 @@ +//! Simulates parser execution, for a single token of input, without incurring +//! any side effects. +//! +//! This is basically a copy of the parser.rs source code with calls to +//! generated_parser::reduce, and stack bookkeeping, omitted. + +use crate::parser::Parser; +use arrayvec::ArrayVec; +use ast::SourceLocation; +use generated_parser::{ + noop_actions, ParseError, ParserTrait, Result, StackValue, TermValue, TerminalId, Token, TABLES, +}; + +/// The Simulator is used to check whether we can shift one token, either to +/// check what might be accepted, or to check whether we can End parsing now. +/// This is used by the REPL to verify whether or not we can end the input. +pub struct Simulator<'alloc, 'parser> { + /// Define the top of the immutable stack. + sp: usize, + /// Immutable state stack coming from the forked parser. + state_stack: &'parser [usize], + /// Immuatable term stack coming from the forked parser. + node_stack: &'parser [TermValue<StackValue<'alloc>>], + /// Mutable state stack used by the simulator on top of the immutable + /// parser's state stack. + /// + /// Uses a fixed-size array as the number of lookahead is bounded to a lower + /// value, panics otherwise. + sim_state_stack: ArrayVec<usize, 4>, + /// Mutable term stack used by the simulator on top of the immutable + /// parser's term stack. + /// + /// Uses a fixed-size array as the number of lookahead is bounded to a lower + /// value, panics otherwise. + sim_node_stack: ArrayVec<TermValue<()>, 4>, + /// Mutable term stack used by the simulator for replaying terms when + /// reducing non-terminals are replaying lookahead terminals. + /// + /// Uses a fixed-size array as the number of lookahead is bounded to a lower + /// value, panics otherwise. + replay_stack: ArrayVec<TermValue<()>, 4>, +} + +impl<'alloc, 'parser> ParserTrait<'alloc, ()> for Simulator<'alloc, 'parser> { + fn shift(&mut self, tv: TermValue<()>) -> Result<'alloc, bool> { + // Shift the new terminal/nonterminal and its associated value. + let mut state = self.state(); + assert!(state < TABLES.shift_count); + let mut tv = tv; + loop { + let term_index: usize = tv.term.into(); + assert!(term_index < TABLES.shift_width); + let index = state * TABLES.shift_width + term_index; + let goto = TABLES.shift_table[index]; + if goto < 0 { + // Error handling is in charge of shifting an ErrorSymbol from the + // current state. + self.try_error_handling(tv)?; + tv = self.replay_stack.pop().unwrap(); + continue; + } + state = goto as usize; + self.sim_state_stack.push(state); + self.sim_node_stack.push(tv); + // Execute any actions, such as reduce actions. + if state >= TABLES.shift_count { + assert!(state < TABLES.action_count + TABLES.shift_count); + if noop_actions(self, state)? { + return Ok(true); + } + state = self.state(); + } + assert!(state < TABLES.shift_count); + if let Some(tv_temp) = self.replay_stack.pop() { + tv = tv_temp; + } else { + break; + } + } + Ok(false) + } + fn shift_replayed(&mut self, state: usize) { + let tv = self.replay_stack.pop().unwrap(); + self.sim_state_stack.push(state); + self.sim_node_stack.push(tv); + } + fn unshift(&mut self) { + let tv = self.pop(); + self.replay(tv) + } + fn pop(&mut self) -> TermValue<()> { + if let Some(s) = self.sim_node_stack.pop() { + self.sim_state_stack.pop(); + return s; + } + let t = self.node_stack[self.sp - 1].term; + self.sp -= 1; + TermValue { term: t, value: () } + } + fn replay(&mut self, tv: TermValue<()>) { + self.replay_stack.push(tv) + } + fn epsilon(&mut self, state: usize) { + if self.sim_state_stack.is_empty() { + self.sim_state_stack.push(self.state_stack[self.sp]); + self.sim_node_stack.push(TermValue { + term: self.node_stack[self.sp - 1].term, + value: (), + }); + self.sp -= 1; + } + *self.sim_state_stack.last_mut().unwrap() = state; + } + fn top_state(&self) -> usize { + self.state() + } + fn check_not_on_new_line(&mut self, _peek: usize) -> Result<'alloc, bool> { + Ok(true) + } +} + +impl<'alloc, 'parser> Simulator<'alloc, 'parser> { + pub fn new( + state_stack: &'parser [usize], + node_stack: &'parser [TermValue<StackValue<'alloc>>], + ) -> Simulator<'alloc, 'parser> { + let sp = state_stack.len() - 1; + assert_eq!(state_stack.len(), node_stack.len() + 1); + Simulator { + sp, + state_stack, + node_stack, + sim_state_stack: ArrayVec::new(), + sim_node_stack: ArrayVec::new(), + replay_stack: ArrayVec::new(), + } + } + + fn state(&self) -> usize { + if let Some(res) = self.sim_state_stack.last() { + *res + } else { + self.state_stack[self.sp] + } + } + + pub fn write_token(&mut self, t: TerminalId) -> Result<'alloc, ()> { + // Shift the token with the associated StackValue. + let accept = self.shift(TermValue { + term: t.into(), + value: (), + })?; + // JavaScript grammar accepts empty inputs, therefore we can never + // accept any program before receiving a TerminalId::End. + assert!(!accept); + Ok(()) + } + + pub fn close(&mut self, _position: usize) -> Result<'alloc, ()> { + // Shift the End terminal with the associated StackValue. + let accept = self.shift(TermValue { + term: TerminalId::End.into(), + value: (), + })?; + // Adding a TerminalId::End would either lead to a parse error, or to + // accepting the current input. In which case we return matching node + // value. + assert!(accept); + + // We can either reduce a Script/Module, or a Script/Module followed by + // an <End> terminal. + assert!(self.sp + self.sim_node_stack.len() >= 1); + Ok(()) + } + + // Simulate the action of Parser::try_error_handling. + fn try_error_handling(&mut self, t: TermValue<()>) -> Result<'alloc, bool> { + if t.term.is_terminal() { + let term = t.term.to_terminal(); + let bogus_loc = SourceLocation::new(0, 0); + let token = &Token::basic_token(term, bogus_loc); + + // Error tokens might them-self cause more errors to be reported. + // This happens due to the fact that the ErrorToken can be replayed, + // and while the ErrorToken might be in the lookahead rules, it + // might not be in the shifted terms coming after the reduced + // nonterminal. + if term == TerminalId::ErrorToken { + return Err(Parser::parse_error(token).into()); + } + + // Otherwise, check if the current rule accept an Automatic + // Semi-Colon insertion (ASI). + let state = self.state(); + assert!(state < TABLES.shift_count); + let error_code = TABLES.error_codes[state]; + if let Some(error_code) = error_code { + Parser::recover(token, error_code)?; + self.replay(t); + self.replay(TermValue { + term: TerminalId::ErrorToken.into(), + value: (), + }); + return Ok(false); + } + return Err(Parser::parse_error(token).into()); + } + // On error, don't attempt error handling again. + Err(ParseError::ParserCannotUnpackToken.into()) + } +} diff --git a/third_party/rust/jsparagus-parser/src/tests.rs b/third_party/rust/jsparagus-parser/src/tests.rs new file mode 100644 index 0000000000..7953dd7554 --- /dev/null +++ b/third_party/rust/jsparagus-parser/src/tests.rs @@ -0,0 +1,875 @@ +use std::iter; + +use crate::lexer::Lexer; +use crate::parser::Parser; +use crate::{parse_script, ParseOptions}; +use ast::source_atom_set::SourceAtomSet; +use ast::source_slice_list::SourceSliceList; +use ast::{arena, source_location::SourceLocation, types::*}; +use bumpalo::{self, Bump}; +use generated_parser::{self, AstBuilder, ParseError, Result, TerminalId}; +use std::cell::RefCell; +use std::rc::Rc; + +#[cfg(all(feature = "unstable", test))] +mod benchmarks { + extern crate test; + + use std::fs::File; + use std::io::Read; + use test::Bencher; + + use crate::lexer::Lexer; + use crate::parse_script; + + #[bench] + fn bench_parse_grammar(b: &mut Bencher) { + let mut buffer = fs::read_to_string("../vue.js").expect("reading test file"); + b.iter(|| { + let lexer = Lexer::new(buffer.chars()); + parse_script(lexer).unwrap(); + }); + } +} + +trait IntoChunks<'a> { + type Chunks: Iterator<Item = &'a str>; + fn into_chunks(self) -> Self::Chunks; +} + +impl<'a> IntoChunks<'a> for &'a str { + type Chunks = iter::Once<&'a str>; + fn into_chunks(self) -> Self::Chunks { + iter::once(self) + } +} + +impl<'a> IntoChunks<'a> for &'a Vec<&'a str> { + type Chunks = iter::Cloned<std::slice::Iter<'a, &'a str>>; + fn into_chunks(self) -> Self::Chunks { + self.iter().cloned() + } +} + +// Glue all the chunks together. XXX TODO Once the lexer supports chunks, +// we'll reimplement this to feed the code to the lexer one chunk at a time. +fn chunks_to_string<'a, T: IntoChunks<'a>>(code: T) -> String { + let mut buf = String::new(); + for chunk in code.into_chunks() { + buf.push_str(chunk); + } + buf +} + +fn try_parse<'alloc, 'source, Source>( + allocator: &'alloc Bump, + code: Source, +) -> Result<'alloc, arena::Box<'alloc, Script<'alloc>>> +where + Source: IntoChunks<'source>, +{ + let buf = arena::alloc_str(allocator, &chunks_to_string(code)); + let options = ParseOptions::new(); + let atoms = Rc::new(RefCell::new(SourceAtomSet::new())); + let slices = Rc::new(RefCell::new(SourceSliceList::new())); + parse_script(allocator, &buf, &options, atoms, slices) +} + +fn assert_parses<'alloc, T: IntoChunks<'alloc>>(code: T) { + let allocator = &Bump::new(); + try_parse(allocator, code).unwrap(); +} + +fn assert_error<'alloc, T: IntoChunks<'alloc>>(code: T) { + let allocator = &Bump::new(); + assert!(match try_parse(allocator, code).map_err(|e| *e) { + Err(ParseError::NotImplemented(_)) => panic!("expected error, got NotImplemented"), + Err(_) => true, + Ok(ast) => panic!("assertion failed: SUCCESS error: {:?}", ast), + }); +} + +fn assert_syntax_error<'alloc, T: IntoChunks<'alloc>>(code: T) { + let allocator = &Bump::new(); + assert!(match try_parse(allocator, code).map_err(|e| *e) { + Err(ParseError::SyntaxError(_)) => true, + Err(other) => panic!("unexpected error: {:?}", other), + Ok(ast) => panic!("assertion failed: SUCCESS error: {:?}", ast), + }); +} + +fn assert_not_implemented<'alloc, T: IntoChunks<'alloc>>(code: T) { + let allocator = &Bump::new(); + assert!(match try_parse(allocator, code).map_err(|e| *e) { + Err(ParseError::NotImplemented(_)) => true, + Err(other) => panic!("unexpected error: {:?}", other), + Ok(ast) => panic!("assertion failed: SUCCESS error: {:?}", ast), + }); +} + +fn assert_illegal_character<'alloc, T: IntoChunks<'alloc>>(code: T) { + let allocator = &Bump::new(); + assert!(match try_parse(allocator, code).map_err(|e| *e) { + Err(ParseError::IllegalCharacter(_)) => true, + Err(other) => panic!("unexpected error: {:?}", other), + Ok(ast) => panic!("assertion failed: SUCCESS error: {:?}", ast), + }); +} + +fn assert_error_eq<'alloc, T: IntoChunks<'alloc>>(code: T, expected: ParseError) { + let allocator = &Bump::new(); + let result = try_parse(allocator, code); + assert!(result.is_err()); + assert_eq!(*result.unwrap_err(), expected); +} + +fn assert_incomplete<'alloc, T: IntoChunks<'alloc>>(code: T) { + let allocator = &Bump::new(); + let result = try_parse(allocator, code); + assert!(result.is_err()); + assert_eq!(*result.unwrap_err(), ParseError::UnexpectedEnd); +} + +// Assert that `left` and `right`, when parsed as ES Modules, consist of the +// same sequence of tokens (although possibly at different offsets). +fn assert_same_tokens<'alloc>(left: &str, right: &str) { + let allocator = &Bump::new(); + let left_atoms = Rc::new(RefCell::new(SourceAtomSet::new())); + let left_slices = Rc::new(RefCell::new(SourceSliceList::new())); + let right_atoms = Rc::new(RefCell::new(SourceAtomSet::new())); + let right_slices = Rc::new(RefCell::new(SourceSliceList::new())); + let mut left_lexer = Lexer::new( + allocator, + left.chars(), + left_atoms.clone(), + left_slices.clone(), + ); + let mut right_lexer = Lexer::new( + allocator, + right.chars(), + right_atoms.clone(), + right_slices.clone(), + ); + + let mut left_parser = Parser::new( + AstBuilder::new(allocator, left_atoms, left_slices), + generated_parser::START_STATE_MODULE, + ); + let mut right_parser = Parser::new( + AstBuilder::new(allocator, right_atoms, right_slices), + generated_parser::START_STATE_MODULE, + ); + + loop { + let left_token = left_lexer + .next(&left_parser) + .expect("error parsing left string"); + let right_token = right_lexer + .next(&right_parser) + .expect("error parsing right string"); + assert_eq!( + left_token.terminal_id, right_token.terminal_id, + "at offset {} in {:?} / {} in {:?}", + left_token.loc.start, left, right_token.loc.start, right, + ); + assert_eq!( + left_token.value, right_token.value, + "at offsets {} / {}", + left_token.loc.start, right_token.loc.start + ); + + if left_token.terminal_id == TerminalId::End { + break; + } + left_parser.write_token(left_token).unwrap(); + right_parser.write_token(right_token).unwrap(); + } + left_parser.close(left_lexer.offset()).unwrap(); + right_parser.close(left_lexer.offset()).unwrap(); +} + +fn assert_can_close_after<'alloc, T: IntoChunks<'alloc>>(code: T) { + let allocator = &Bump::new(); + let buf = chunks_to_string(code); + let atoms = Rc::new(RefCell::new(SourceAtomSet::new())); + let slices = Rc::new(RefCell::new(SourceSliceList::new())); + let mut lexer = Lexer::new(allocator, buf.chars(), atoms.clone(), slices.clone()); + let mut parser = Parser::new( + AstBuilder::new(allocator, atoms, slices), + generated_parser::START_STATE_SCRIPT, + ); + loop { + let t = lexer.next(&parser).expect("lexer error"); + if t.terminal_id == TerminalId::End { + break; + } + parser.write_token(t).unwrap(); + } + assert!(parser.can_close()); +} + +fn assert_same_number(code: &str, expected: f64) { + let allocator = &Bump::new(); + let script = try_parse(allocator, code).unwrap().unbox(); + match &script.statements[0] { + Statement::ExpressionStatement(expression) => match &**expression { + Expression::LiteralNumericExpression(num) => { + assert_eq!(num.value, expected, "{}", code); + } + _ => panic!("expected LiteralNumericExpression"), + }, + _ => panic!("expected ExpressionStatement"), + } +} + +#[test] +fn test_asi_at_end() { + assert_parses("3 + 4"); + assert_syntax_error("3 4"); + assert_incomplete("3 +"); + assert_incomplete("{"); + assert_incomplete("{;"); +} + +#[test] +fn test_asi_at_block_end() { + assert_parses("{ doCrimes() }"); + assert_parses("function f() { ok }"); +} + +#[test] +fn test_asi_after_line_terminator() { + assert_parses( + "switch (value) { + case 1: break + case 2: console.log('2'); + }", + ); + assert_syntax_error("switch (value) { case 1: break case 2: console.log('2'); }"); + + // "[T]he presence or absence of single-line comments does not affect the + // process of automatic semicolon insertion[...]." + // <https://tc39.es/ecma262/#sec-comments> + assert_parses("x = 1 // line break here\ny = 2"); + assert_parses("x = 1 // line break here\r\ny = 2"); + assert_parses("x = 1 /* no line break in here */ //\ny = 2"); + assert_parses("x = 1<!-- line break here\ny = 2"); + + assert_syntax_error("x = 1 /* no line break in here */ y = 2"); + assert_parses("x = 1 /* line break \n there */y = 2"); +} + +#[test] +fn test_asi_suppressed() { + // The specification says ASI does not happen in the production + // EmptyStatement : `;`. + // TODO - assert_syntax_error("if (true)"); + assert_syntax_error("{ for (;;) }"); + + // ASI does not happen in for(;;) loops. + assert_syntax_error("for ( \n ; ) {}"); + assert_syntax_error("for ( ; \n ) {}"); + assert_syntax_error("for ( \n \n ) {}"); + assert_syntax_error("for (var i = 0 \n i < 9; i++) {}"); + assert_syntax_error("for (var i = 0; i < 9 \n i++) {}"); + assert_syntax_error("for (i = 0 \n i < 9; i++) {}"); + assert_syntax_error("for (i = 0; i < 9 \n i++) {}"); + assert_syntax_error("for (const i = 0 \n i < 9; i++) {}"); + + // ASI is suppressed in the production ClassElement[Yield, Await] : `;` + // to prevent an infinite loop of ASI. lol + assert_syntax_error("class Fail { \n +1; }"); +} + +#[test] +fn test_if_else() { + assert_parses("if (x) f();"); + assert_incomplete("if (x)"); + assert_parses("if (x) f(); else g();"); + assert_incomplete("if (x) f(); else"); + assert_parses("if (x) if (y) g(); else h();"); + assert_parses("if (x) if (y) g(); else h(); else j();"); +} + +#[test] +fn test_lexer_decimal() { + assert_parses("0."); + assert_parses(".5"); + assert_syntax_error("."); +} + +#[test] +fn test_numbers() { + assert_same_number("0", 0.0); + assert_same_number("1", 1.0); + assert_same_number("10", 10.0); + + assert_error_eq("0a", ParseError::IllegalCharacter('a')); + assert_error_eq("1a", ParseError::IllegalCharacter('a')); + + assert_error_eq("1.0a", ParseError::IllegalCharacter('a')); + assert_error_eq(".0a", ParseError::IllegalCharacter('a')); + assert_error_eq("1.a", ParseError::IllegalCharacter('a')); + + assert_same_number("1.0", 1.0); + assert_same_number("1.", 1.0); + assert_same_number("0.", 0.0); + + assert_same_number("1.0e0", 1.0); + assert_same_number("1.e0", 1.0); + assert_same_number(".0e0", 0.0); + + assert_same_number("1.0e+0", 1.0); + assert_same_number("1.e+0", 1.0); + assert_same_number(".0e+0", 0.0); + + assert_same_number("1.0e-0", 1.0); + assert_same_number("1.e-0", 1.0); + assert_same_number(".0e-0", 0.0); + + assert_error_eq("1.0e", ParseError::UnexpectedEnd); + assert_error_eq("1.e", ParseError::UnexpectedEnd); + assert_error_eq(".0e", ParseError::UnexpectedEnd); + + assert_error_eq("1.0e+", ParseError::UnexpectedEnd); + assert_error_eq("1.0e-", ParseError::UnexpectedEnd); + assert_error_eq(".0e+", ParseError::UnexpectedEnd); + assert_error_eq(".0e-", ParseError::UnexpectedEnd); + + assert_same_number("1.0E0", 1.0); + assert_same_number("1.E0", 1.0); + assert_same_number(".0E0", 0.0); + + assert_same_number("1.0E+0", 1.0); + assert_same_number("1.E+0", 1.0); + assert_same_number(".0E+0", 0.0); + + assert_same_number("1.0E-0", 1.0); + assert_same_number("1.E-0", 1.0); + assert_same_number(".0E-0", 0.0); + + assert_error_eq("1.0E", ParseError::UnexpectedEnd); + assert_error_eq("1.E", ParseError::UnexpectedEnd); + assert_error_eq(".0E", ParseError::UnexpectedEnd); + + assert_error_eq("1.0E+", ParseError::UnexpectedEnd); + assert_error_eq("1.0E-", ParseError::UnexpectedEnd); + assert_error_eq(".0E+", ParseError::UnexpectedEnd); + assert_error_eq(".0E-", ParseError::UnexpectedEnd); + + assert_same_number(".0", 0.0); + assert_parses(""); + + assert_same_number("0b0", 0.0); + + assert_same_number("0b1", 1.0); + assert_same_number("0B01", 1.0); + assert_error_eq("0b", ParseError::UnexpectedEnd); + assert_error_eq("0b ", ParseError::IllegalCharacter(' ')); + assert_error_eq("0b2", ParseError::IllegalCharacter('2')); + + assert_same_number("0o0", 0.0); + assert_same_number("0o7", 7.0); + assert_same_number("0O01234567", 0o01234567 as f64); + assert_error_eq("0o", ParseError::UnexpectedEnd); + assert_error_eq("0o ", ParseError::IllegalCharacter(' ')); + assert_error_eq("0o8", ParseError::IllegalCharacter('8')); + + assert_same_number("0x0", 0.0); + assert_same_number("0xf", 15.0); + assert_not_implemented("0X0123456789abcdef"); + assert_not_implemented("0X0123456789ABCDEF"); + assert_error_eq("0x", ParseError::UnexpectedEnd); + assert_error_eq("0x ", ParseError::IllegalCharacter(' ')); + assert_error_eq("0xg", ParseError::IllegalCharacter('g')); + + assert_parses("1..x"); + + assert_same_number("1_1", 11.0); + assert_same_number("0b1_1", 3.0); + assert_same_number("0o1_1", 9.0); + assert_same_number("0x1_1", 17.0); + + assert_same_number("1_1.1_1", 11.11); + assert_same_number("1_1.1_1e+1_1", 11.11e11); + + assert_error_eq("1_", ParseError::UnexpectedEnd); + assert_error_eq("1._1", ParseError::IllegalCharacter('_')); + assert_error_eq("1.1_", ParseError::UnexpectedEnd); + assert_error_eq("1.1e1_", ParseError::UnexpectedEnd); + assert_error_eq("1.1e_1", ParseError::IllegalCharacter('_')); +} + +#[test] +fn test_numbers_large() { + assert_same_number("4294967295", 4294967295.0); + assert_same_number("4294967296", 4294967296.0); + assert_same_number("4294967297", 4294967297.0); + + assert_same_number("9007199254740991", 9007199254740991.0); + assert_same_number("9007199254740992", 9007199254740992.0); + assert_same_number("9007199254740993", 9007199254740992.0); + + assert_same_number("18446744073709553664", 18446744073709552000.0); + assert_same_number("18446744073709553665", 18446744073709556000.0); + + assert_same_number("0b11111111111111111111111111111111", 4294967295.0); + assert_same_number("0b100000000000000000000000000000000", 4294967296.0); + assert_same_number("0b100000000000000000000000000000001", 4294967297.0); + + assert_same_number( + "0b11111111111111111111111111111111111111111111111111111", + 9007199254740991.0, + ); + assert_not_implemented("0b100000000000000000000000000000000000000000000000000000"); + + assert_same_number("0o77777777777777777", 2251799813685247.0); + assert_not_implemented("0o100000000000000000"); + + assert_same_number("0xfffffffffffff", 4503599627370495.0); + assert_not_implemented("0x10000000000000"); + + assert_same_number("4.9406564584124654417656879286822e-324", 5e-324); +} + +#[test] +fn test_bigint() { + assert_not_implemented("0n"); + /* + assert_parses("0n"); + assert_parses("1n"); + assert_parses("10n"); + + assert_error_eq("0na", ParseError::IllegalCharacter('a')); + assert_error_eq("1na", ParseError::IllegalCharacter('a')); + + assert_error_eq("1.0n", ParseError::IllegalCharacter('n')); + assert_error_eq(".0n", ParseError::IllegalCharacter('n')); + assert_error_eq("1.n", ParseError::IllegalCharacter('n')); + + assert_error_eq("1e0n", ParseError::IllegalCharacter('n')); + assert_error_eq("1e+0n", ParseError::IllegalCharacter('n')); + assert_error_eq("1e-0n", ParseError::IllegalCharacter('n')); + assert_error_eq("1E0n", ParseError::IllegalCharacter('n')); + assert_error_eq("1E+0n", ParseError::IllegalCharacter('n')); + assert_error_eq("1E-0n", ParseError::IllegalCharacter('n')); + + assert_parses("0b0n"); + + assert_parses("0b1n"); + assert_parses("0B01n"); + assert_error_eq("0bn", ParseError::IllegalCharacter('n')); + + assert_parses("0o0n"); + assert_parses("0o7n"); + assert_parses("0O01234567n"); + assert_error_eq("0on", ParseError::IllegalCharacter('n')); + + assert_parses("0x0n"); + assert_parses("0xfn"); + assert_parses("0X0123456789abcdefn"); + assert_parses("0X0123456789ABCDEFn"); + assert_error_eq("0xn", ParseError::IllegalCharacter('n')); + + assert_parses("1_1n"); + assert_parses("0b1_1n"); + assert_parses("0o1_1n"); + assert_parses("0x1_1n"); + + assert_error_eq("1_1.1_1n", ParseError::IllegalCharacter('n')); + assert_error_eq("1_1.1_1e1_1n", ParseError::IllegalCharacter('n')); + + assert_error_eq("1_n", ParseError::IllegalCharacter('n')); + assert_error_eq("1.1_n", ParseError::IllegalCharacter('n')); + assert_error_eq("1.1e1_n", ParseError::IllegalCharacter('n')); + */ +} + +#[test] +fn test_arrow() { + assert_parses("x => x"); + assert_parses("f = x => x;"); + assert_parses("(x, y) => [y, x]"); + assert_parses("f = (x, y) => {}"); + assert_syntax_error("(x, y) => {x: x, y: y}"); +} + +#[test] +fn test_illegal_character() { + assert_illegal_character("\0"); + assert_illegal_character("—x;"); + assert_illegal_character("const ONE_THIRD = 1 ÷ 3;"); +} + +#[test] +fn test_identifier() { + // U+00B7 MIDDLE DOT is an IdentifierPart. + assert_parses("_·_ = {_·_:'·_·'};"); + + // <ZWJ> and <ZWNJ> match IdentifierPart but not IdentifierStart. + assert_parses("var x\u{200c};"); // <ZWNJ> + assert_parses("_\u{200d}();"); // <ZWJ> + assert_parses("_\u{200d}__();"); // <ZWJ> + assert_parses("_\u{200d}\u{200c}();"); // <ZWJ> + assert_illegal_character("var \u{200c};"); // <ZWNJ> + assert_illegal_character("x = \u{200d};"); // <ZWJ> + + // Other_ID_Start for backward compat. + assert_parses("\u{309B}();"); + assert_parses("\u{309C}();"); + assert_parses("_\u{309B}();"); + assert_parses("_\u{309C}();"); + + // Non-BMP. + assert_parses("\u{10000}();"); + assert_parses("_\u{10000}();"); + assert_illegal_character("\u{1000c}();"); + assert_illegal_character("_\u{1000c}();"); +} + +#[test] +fn test_regexp() { + assert_parses(r"/\w/"); + assert_parses("/[A-Z]/"); + assert_parses("/[//]/"); + assert_parses("/a*a/"); + assert_parses("/**//x*/"); + assert_same_tokens("/**//x*/", "/x*/"); + assert_parses("{} /x/"); + assert_parses("of / 2"); +} + +#[test] +fn test_html_comments() { + assert_same_tokens("x<!--y;", "x"); + assert_same_tokens("x<!-y;", "x < ! - y ;"); + assert_same_tokens("x<!y", "x < ! y"); + + assert_same_tokens("--> hello world\nok", "ok"); + assert_same_tokens("/* ignore */ --> also ignore\nok", "ok"); + assert_same_tokens("/* ignore *//**/--> also ignore\nok", "ok"); + assert_same_tokens("x-->y\nz", "x -- > y\nz"); +} + +#[test] +fn test_incomplete_comments() { + assert_error("/*"); + assert_error("/* hello world"); + assert_error("/* hello world *"); + + assert_parses(&vec!["/* hello\n", " world */"]); + assert_parses(&vec!["// oawfeoiawj", "ioawefoawjie"]); + assert_parses(&vec!["// oawfeoiawj", "ioawefoawjie\n ok();"]); + assert_parses(&vec!["// oawfeoiawj", "ioawefoawjie", "jiowaeawojefiw"]); + assert_parses(&vec![ + "// oawfeoiawj", + "ioawefoawjie", + "jiowaeawojefiw\n ok();", + ]); +} + +#[test] +fn test_strings() { + assert_parses("f(\"\",\"\")"); + assert_parses("f(\"\")"); + assert_parses("(\"\")"); + assert_parses("f('','')"); + assert_parses("f('')"); + assert_parses("('')"); +} + +#[test] +fn test_awkward_chunks() { + assert_parses(&vec!["const", "ructor.length = 1;"]); + assert_parses(&vec!["const", " x = 1;"]); + + // Try feeding one character at a time to the parser. + let chars: Vec<&str> = "function f() { ok(); }".split("").collect(); + assert_parses(&chars); + + // XXX TODO + //assertEqual( + // self.parse(&vec!["/xyzzy/", "g;"]), + // ('Script', + // ('ScriptBody', + // ('StatementList 0', + // ('ExpressionStatement', + // ('PrimaryExpression 10', '/xyzzy/g')))))); + + let allocator = &Bump::new(); + let actual = try_parse(allocator, &vec!["x/", "=2;"]).unwrap(); + let atoms = Rc::new(RefCell::new(SourceAtomSet::new())); + let expected = Script { + directives: arena::Vec::new_in(allocator), + statements: bumpalo::vec![ + in allocator; + Statement::ExpressionStatement(arena::alloc( + allocator, + Expression::CompoundAssignmentExpression { + operator: CompoundAssignmentOperator::Div { + loc: SourceLocation::new(1, 3), + }, + binding: SimpleAssignmentTarget::AssignmentTargetIdentifier( + AssignmentTargetIdentifier { + name: Identifier { + value: atoms.borrow_mut().insert("x"), + loc: SourceLocation::new(0, 1), + }, + loc: SourceLocation::new(0, 1), + }, + ), + expression: arena::alloc( + allocator, + Expression::LiteralNumericExpression(NumericLiteral { + value: 2.0, + loc: SourceLocation::new(3, 4), + }), + ), + loc: SourceLocation::new(0, 4), + }, + )) + ], + loc: SourceLocation::new(0, 4), + }; + assert_eq!(format!("{:?}", actual), format!("{:?}", expected)); +} + +#[test] +fn test_can_close() { + let empty: Vec<&str> = vec![]; + assert_can_close_after(&empty); + assert_can_close_after(""); + assert_can_close_after("2 + 2;\n"); + assert_can_close_after("// seems ok\n"); +} + +#[test] +fn test_regex() { + assert_parses("/x/"); + assert_parses("x = /x/"); + assert_parses("x = /x/g"); + + // FIXME: Unexpected flag + // assert_parses("x = /x/wow_flags_can_be_$$anything$$"); + assert_not_implemented("x = /x/wow_flags_can_be_$$anything$$"); + + // TODO: Should the lexer running out of input throw an incomplete error, or a lexer error? + assert_error_eq("/x", ParseError::UnterminatedRegExp); + assert_incomplete("x = //"); // comment + assert_error_eq("x = /*/", ParseError::UnterminatedMultiLineComment); /*/ comment */ + assert_error_eq("x =/= 2", ParseError::UnterminatedRegExp); + assert_parses("x /= 2"); + assert_parses("x = /[]/"); + assert_parses("x = /[^x]/"); + assert_parses("x = /+=351*/"); + assert_parses("x = /^\\s*function (\\w+)/;"); + assert_parses("const regexp = /this is fine: [/] dont @ me/;"); +} + +#[test] +fn test_arrow_parameters() { + assert_error_eq( + "({a:a, ...b, c:c}) => {}", + ParseError::ObjectPatternWithNonFinalRest, + ); + assert_error_eq( + "(a, [...zero, one]) => {}", + ParseError::ArrayPatternWithNonFinalRest, + ); + assert_error_eq( + "(a, {items: [...zero, one]}) => {}", + ParseError::ArrayPatternWithNonFinalRest, + ); +} + +#[test] +fn test_invalid_assignment_targets() { + assert_syntax_error("2 + 2 = x;"); + assert_error_eq("(2 + 2) = x;", ParseError::InvalidAssignmentTarget); + assert_error_eq("++-x;", ParseError::InvalidAssignmentTarget); + assert_error_eq("(x && y)--;", ParseError::InvalidAssignmentTarget); +} + +#[test] +fn test_can_close_with_asi() { + assert_can_close_after("2 + 2\n"); +} + +#[test] +fn test_conditional_keywords() { + // property names + assert_parses("const obj = {if: 3, function: 4};"); + assert_parses("const obj = {true: 1, false: 0, null: NaN};"); + assert_parses("assert(obj.if == 3);"); + assert_parses("assert(obj.true + obj.false + obj.null == NaN);"); + + // method names + assert_parses( + " + class C { + if() {} + function() {} + } + ", + ); + + // FIXME: let (multitoken lookahead): + assert_not_implemented("let a = 1;"); + /* + // let as identifier + assert_parses("var let = [new Date];"); + // let as keyword, then identifier + assert_parses("let v = let;"); + // `let .` -> ExpressionStatement + assert_parses("let.length;"); + // `let [` -> LexicalDeclaration + assert_syntax_error("let[0].getYear();"); + */ + + assert_parses( + " + var of = [1, 2, 3]; + for (of of of) console.log(of); // logs 1, 2, 3 + ", + ); + + // Not implemented: + // assert_parses("var of, let, private, target;"); + + assert_parses("class X { get y() {} }"); + + // Not implemented: + // assert_parses("async: { break async; }"); + + assert_parses("var get = { get get() {}, set get(v) {}, set: 3 };"); + + // Not implemented (requires hack; grammar is not LR(1)): + // assert_parses("for (async of => {};;) {}"); + // assert_parses("for (async of []) {}"); +} + +#[test] +fn test_async_arrows() { + // FIXME: async (multiple lookahead) + assert_not_implemented("const a = async a => 1;"); + /* + assert_parses("let f = async arg => body;"); + assert_parses("f = async (a1, a2) => {};"); + assert_parses("f = async (a1 = b + c, ...a2) => {};"); + + assert_error_eq("f = async (a, b + c) => {};", ParseError::InvalidParameter); + assert_error_eq( + "f = async (...a1, a2) => {};", + ParseError::ArrowParametersWithNonFinalRest, + ); + assert_error_eq("obj.async() => {}", ParseError::ArrowHeadInvalid); + */ + + assert_error_eq("foo(a, b) => {}", ParseError::ArrowHeadInvalid); +} + +#[test] +fn test_binary() { + assert_parses("1 == 2"); + assert_parses("1 != 2"); + assert_parses("1 === 2"); + assert_parses("1 !== 2"); + assert_parses("1 < 2"); + assert_parses("1 <= 2"); + assert_parses("1 > 2"); + assert_parses("1 >= 2"); + assert_parses("1 in 2"); + assert_parses("1 instanceof 2"); + assert_parses("1 << 2"); + assert_parses("1 >> 2"); + assert_parses("1 >>> 2"); + assert_parses("1 + 2"); + assert_parses("1 - 2"); + assert_parses("1 * 2"); + assert_parses("1 / 2"); + assert_parses("1 % 2"); + assert_parses("1 ** 2"); + assert_parses("1 , 2"); + assert_parses("1 || 2"); + assert_parses("1 && 2"); + assert_parses("1 | 2"); + assert_parses("1 ^ 2"); + assert_parses("1 & 2"); +} + +#[test] +fn test_coalesce() { + assert_parses("const f = options.prop ?? 0;"); + assert_syntax_error("if (options.prop ?? 0 || options.prop > 1000) {}"); +} + +#[test] +fn test_no_line_terminator_here() { + // Parse `code` as a Script and compute some function of the resulting AST. + fn parse_then<F, R>(code: &str, f: F) -> R + where + F: FnOnce(&Script) -> R, + { + let allocator = &Bump::new(); + match try_parse(allocator, code) { + Err(err) => { + panic!("Failed to parse code {:?}: {}", code, err); + } + Ok(script) => f(&*script), + } + } + + // Parse `code` as a Script and return the number of top-level + // StatementListItems. + fn count_items(code: &str) -> usize { + parse_then(code, |script| script.statements.len()) + } + + // Without a newline, labelled `break` in loop. But a line break changes + // the meaning -- then it's a plain `break` statement, followed by + // ExpressionStatement `LOOP;` + assert_eq!(count_items("LOOP: while (true) break LOOP;"), 1); + assert_eq!(count_items("LOOP: while (true) break \n LOOP;"), 2); + + // The same, but for `continue`. + assert_eq!(count_items("LOOP: while (true) continue LOOP;"), 1); + assert_eq!(count_items("LOOP: while (true) continue \n LOOP;"), 2); + + // Parse `code` as a Script, expected to contain a single function + // declaration, and return the number of statements in the function body. + fn count_statements_in_function(code: &str) -> usize { + parse_then(code, |script| { + assert_eq!( + script.statements.len(), + 1, + "expected function declaration, got {:?}", + script + ); + match &script.statements[0] { + Statement::FunctionDeclaration(func) => func.body.statements.len(), + _ => panic!("expected function declaration, got {:?}", script), + } + }) + } + + assert_eq!( + count_statements_in_function("function f() { return x; }"), + 1 + ); + assert_eq!( + count_statements_in_function("function f() { return\n x; }"), + 2 + ); + + assert_parses("x++"); + assert_incomplete("x\n++"); + + assert_parses("throw fit;"); + assert_syntax_error("throw\nfit;"); + + // Alternative ways of spelling LineTerminator + assert_syntax_error("throw//\nfit;"); + assert_syntax_error("throw/*\n*/fit;"); + assert_syntax_error("throw\rfit;"); + assert_syntax_error("throw\r\nfit;"); +} diff --git a/third_party/rust/jsparagus-parser/src/unicode.rs b/third_party/rust/jsparagus-parser/src/unicode.rs new file mode 100644 index 0000000000..16bfc1fe85 --- /dev/null +++ b/third_party/rust/jsparagus-parser/src/unicode.rs @@ -0,0 +1,41 @@ +use crate::unicode_data::{ + char_info, is_id_continue_non_bmp, is_id_start_non_bmp, IS_ID_CONTINUE_TABLE, IS_ID_START_TABLE, +}; + +const UTF16_MAX: char = '\u{ffff}'; + +fn is_id_start_ascii(c: char) -> bool { + IS_ID_START_TABLE[c as usize] +} + +fn is_id_continue_ascii(c: char) -> bool { + IS_ID_CONTINUE_TABLE[c as usize] +} + +fn is_id_start_bmp_non_ascii(c: char) -> bool { + char_info(c).is_id_start() +} + +fn is_id_continue_bmp_non_ascii(c: char) -> bool { + char_info(c).is_id_continue() +} + +pub fn is_id_start(c: char) -> bool { + if c > UTF16_MAX { + return is_id_start_non_bmp(c); + } + if c < '\u{80}' { + return is_id_start_ascii(c); + } + is_id_start_bmp_non_ascii(c) +} + +pub fn is_id_continue(c: char) -> bool { + if c > UTF16_MAX { + return is_id_continue_non_bmp(c); + } + if c < '\u{80}' { + return is_id_continue_ascii(c); + } + is_id_continue_bmp_non_ascii(c) +} diff --git a/third_party/rust/jsparagus-parser/src/unicode_data.rs b/third_party/rust/jsparagus-parser/src/unicode_data.rs new file mode 100644 index 0000000000..7b2acb773b --- /dev/null +++ b/third_party/rust/jsparagus-parser/src/unicode_data.rs @@ -0,0 +1,2117 @@ +// Generated by update_unicode.py DO NOT MODIFY +// Unicode version: 14.0.0 + +const FLAG_ID_START: u8 = 1; +const FLAG_ID_CONTINUE: u8 = 2; + +pub struct CharInfo { + flags: u8, +} + +impl CharInfo { + pub fn is_id_start(&self) -> bool { + self.flags & FLAG_ID_START != 0 + } + + pub fn is_id_continue(&self) -> bool { + self.flags & FLAG_ID_CONTINUE != 0 + } +} + +pub const CHAR_INFO_TABLE: &'static [CharInfo] = &[ + CharInfo { flags: 0 }, + CharInfo { flags: 2 }, + CharInfo { flags: 3 }, +]; + +pub const INDEX1: &'static [u8] = &[ + 0, 0, 0, 1, 2, 3, 2, 4, 0, 0, 5, 6, 7, 8, 7, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 10, 11, 0, 12, 12, 12, 12, 12, 12, 12, 13, 14, 7, 15, 7, + 7, 7, 7, 16, 7, 7, 7, 7, 7, 7, 7, 7, 17, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 18, 7, 7, 19, 20, + 12, 21, 22, 7, 23, 24, 0, 25, 7, 7, 26, 12, 27, 28, 7, 7, 7, 7, 7, 29, 30, 31, 0, 32, 7, 12, + 33, 7, 7, 7, 7, 7, 34, 35, 36, 7, 26, 37, 7, 38, 39, 0, 7, 40, 4, 7, 41, 42, 7, 7, 43, 12, 44, + 12, 45, 7, 7, 46, 12, 47, 48, 2, 49, 50, 51, 52, 53, 54, 48, 55, 56, 50, 51, 57, 58, 59, 60, + 61, 62, 15, 51, 63, 64, 65, 48, 66, 67, 50, 51, 63, 68, 69, 48, 70, 71, 72, 73, 74, 75, 76, 60, + 0, 77, 78, 51, 79, 80, 81, 48, 0, 82, 78, 51, 83, 80, 84, 48, 85, 86, 78, 7, 87, 88, 89, 48, + 90, 91, 92, 7, 93, 94, 95, 60, 96, 2, 7, 7, 97, 98, 1, 0, 0, 99, 7, 100, 101, 102, 103, 0, 0, + 65, 104, 1, 105, 106, 7, 107, 20, 108, 109, 12, 110, 111, 0, 0, 0, 7, 7, 26, 112, 1, 113, 114, + 115, 116, 117, 7, 7, 118, 7, 7, 119, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 120, 121, 7, 7, 120, 7, 7, 122, 123, 8, 7, 7, 7, 123, 7, 7, 7, 124, 125, 126, 7, 0, 7, 7, 7, + 7, 7, 127, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 128, 7, 2, 4, 7, 7, 7, 7, 129, 19, 7, 130, 7, 131, 7, 132, 133, + 134, 7, 7, 7, 135, 12, 136, 1, 0, 137, 1, 7, 7, 7, 7, 7, 19, 7, 7, 138, 7, 7, 7, 7, 139, 7, + 140, 141, 141, 60, 7, 142, 143, 7, 7, 144, 7, 145, 25, 0, 0, 7, 146, 7, 7, 7, 147, 12, 148, 1, + 1, 149, 21, 150, 0, 0, 0, 151, 7, 7, 135, 152, 1, 153, 154, 155, 7, 156, 36, 7, 7, 34, 154, 7, + 7, 135, 157, 158, 36, 7, 142, 19, 7, 7, 159, 0, 160, 161, 162, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 12, 12, 12, 12, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 127, 7, 7, 127, 163, 7, + 142, 7, 7, 7, 164, 165, 166, 107, 165, 0, 0, 0, 167, 168, 169, 0, 170, 0, 107, 0, 0, 0, 110, + 171, 168, 172, 173, 174, 175, 176, 0, 7, 7, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 177, 178, 7, 7, 118, 7, 7, 7, 179, 167, 7, 180, 181, 181, 181, 181, 12, 12, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 182, 0, 183, 184, 2, 7, 7, 7, 7, 185, 2, 7, 7, 7, 7, 119, 186, 7, 7, 2, 7, 7, 7, 7, 140, 0, 7, + 7, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 107, 0, 0, 0, 0, 7, 7, 142, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 107, 7, 187, 0, 7, 7, 188, 189, 7, 190, 7, 7, 7, 7, 7, 126, 0, 191, 192, + 7, 7, 7, 7, 7, 193, 7, 7, 7, 4, 194, 0, 192, 195, 7, 196, 0, 7, 7, 7, 197, 198, 7, 7, 135, 199, + 1, 12, 200, 36, 7, 201, 7, 202, 154, 7, 107, 45, 7, 7, 203, 204, 1, 205, 206, 7, 7, 207, 208, + 209, 1, 7, 210, 7, 7, 7, 211, 212, 213, 26, 214, 215, 216, 181, 7, 7, 119, 145, 7, 7, 7, 7, 7, + 7, 7, 217, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 197, 7, 218, 7, 7, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 142, 7, 7, 7, 7, 7, 7, 145, 0, 0, 180, 219, 51, 220, + 221, 7, 7, 7, 7, 7, 7, 10, 0, 222, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 142, 0, 7, 7, 7, 7, 192, 7, 7, 223, 0, 0, 144, 12, 0, 12, 224, 225, 0, 0, 226, 7, 7, 7, 7, + 7, 7, 7, 107, 0, 1, 2, 3, 2, 4, 227, 7, 7, 7, 7, 140, 228, 229, 0, 0, +]; + +pub const INDEX2: &'static [u8] = &[ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, + 0, 0, 0, 0, 0, 0, 2, 1, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, + 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, + 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 0, 0, 2, + 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, + 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, + 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, + 2, 0, 2, 0, 0, 0, 2, 2, 2, 2, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, + 0, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0, 1, 0, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 1, 2, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, + 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 2, 2, 0, 2, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, + 2, 0, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 2, 0, 2, 2, 0, 0, 0, 2, 2, 0, 0, 0, 2, 2, 2, 0, 0, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, + 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, + 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 1, 2, 1, 1, + 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 2, 2, 0, 0, 2, 0, 0, + 2, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 1, 2, 1, 1, + 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, + 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, + 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, + 2, 2, 2, 2, 2, 0, 2, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, + 1, 1, 1, 1, 1, 0, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, + 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, + 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 0, + 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, + 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, + 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, + 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 2, + 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 0, 2, + 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, + 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, + 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, + 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, + 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, + 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, + 0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, + 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, + 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, + 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, + 0, 0, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, + 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, 1, 2, + 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 0, +]; + +const SHIFT: usize = 4; + +pub fn char_info(c: char) -> &'static CharInfo { + let code = c as usize; + let index = INDEX1[code >> SHIFT] as usize; + let index = INDEX2[(index << SHIFT) + (code & ((1 << SHIFT) - 1))] as usize; + + &CHAR_INFO_TABLE[index] +} + +pub const IS_ID_START_TABLE: &'static [bool] = &[ + false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, + true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, + false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, + false, false, +]; + +pub const IS_ID_CONTINUE_TABLE: &'static [bool] = &[ + false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, true, true, true, true, true, + true, true, true, true, true, false, false, false, false, false, false, false, true, true, + true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, false, false, false, false, true, false, true, + true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, false, false, false, false, false, +]; + +pub fn is_id_start_non_bmp(c: char) -> bool { + if c >= '\u{10000}' && c <= '\u{1000B}' { + return true; + } + if c >= '\u{1000D}' && c <= '\u{10026}' { + return true; + } + if c >= '\u{10028}' && c <= '\u{1003A}' { + return true; + } + if c >= '\u{1003C}' && c <= '\u{1003D}' { + return true; + } + if c >= '\u{1003F}' && c <= '\u{1004D}' { + return true; + } + if c >= '\u{10050}' && c <= '\u{1005D}' { + return true; + } + if c >= '\u{10080}' && c <= '\u{100FA}' { + return true; + } + if c >= '\u{10140}' && c <= '\u{10174}' { + return true; + } + if c >= '\u{10280}' && c <= '\u{1029C}' { + return true; + } + if c >= '\u{102A0}' && c <= '\u{102D0}' { + return true; + } + if c >= '\u{10300}' && c <= '\u{1031F}' { + return true; + } + if c >= '\u{1032D}' && c <= '\u{1034A}' { + return true; + } + if c >= '\u{10350}' && c <= '\u{10375}' { + return true; + } + if c >= '\u{10380}' && c <= '\u{1039D}' { + return true; + } + if c >= '\u{103A0}' && c <= '\u{103C3}' { + return true; + } + if c >= '\u{103C8}' && c <= '\u{103CF}' { + return true; + } + if c >= '\u{103D1}' && c <= '\u{103D5}' { + return true; + } + if c >= '\u{10400}' && c <= '\u{1049D}' { + return true; + } + if c >= '\u{104B0}' && c <= '\u{104D3}' { + return true; + } + if c >= '\u{104D8}' && c <= '\u{104FB}' { + return true; + } + if c >= '\u{10500}' && c <= '\u{10527}' { + return true; + } + if c >= '\u{10530}' && c <= '\u{10563}' { + return true; + } + if c >= '\u{10570}' && c <= '\u{1057A}' { + return true; + } + if c >= '\u{1057C}' && c <= '\u{1058A}' { + return true; + } + if c >= '\u{1058C}' && c <= '\u{10592}' { + return true; + } + if c >= '\u{10594}' && c <= '\u{10595}' { + return true; + } + if c >= '\u{10597}' && c <= '\u{105A1}' { + return true; + } + if c >= '\u{105A3}' && c <= '\u{105B1}' { + return true; + } + if c >= '\u{105B3}' && c <= '\u{105B9}' { + return true; + } + if c >= '\u{105BB}' && c <= '\u{105BC}' { + return true; + } + if c >= '\u{10600}' && c <= '\u{10736}' { + return true; + } + if c >= '\u{10740}' && c <= '\u{10755}' { + return true; + } + if c >= '\u{10760}' && c <= '\u{10767}' { + return true; + } + if c >= '\u{10780}' && c <= '\u{10785}' { + return true; + } + if c >= '\u{10787}' && c <= '\u{107B0}' { + return true; + } + if c >= '\u{107B2}' && c <= '\u{107BA}' { + return true; + } + if c >= '\u{10800}' && c <= '\u{10805}' { + return true; + } + if c >= '\u{10808}' && c <= '\u{10808}' { + return true; + } + if c >= '\u{1080A}' && c <= '\u{10835}' { + return true; + } + if c >= '\u{10837}' && c <= '\u{10838}' { + return true; + } + if c >= '\u{1083C}' && c <= '\u{1083C}' { + return true; + } + if c >= '\u{1083F}' && c <= '\u{10855}' { + return true; + } + if c >= '\u{10860}' && c <= '\u{10876}' { + return true; + } + if c >= '\u{10880}' && c <= '\u{1089E}' { + return true; + } + if c >= '\u{108E0}' && c <= '\u{108F2}' { + return true; + } + if c >= '\u{108F4}' && c <= '\u{108F5}' { + return true; + } + if c >= '\u{10900}' && c <= '\u{10915}' { + return true; + } + if c >= '\u{10920}' && c <= '\u{10939}' { + return true; + } + if c >= '\u{10980}' && c <= '\u{109B7}' { + return true; + } + if c >= '\u{109BE}' && c <= '\u{109BF}' { + return true; + } + if c >= '\u{10A00}' && c <= '\u{10A00}' { + return true; + } + if c >= '\u{10A10}' && c <= '\u{10A13}' { + return true; + } + if c >= '\u{10A15}' && c <= '\u{10A17}' { + return true; + } + if c >= '\u{10A19}' && c <= '\u{10A35}' { + return true; + } + if c >= '\u{10A60}' && c <= '\u{10A7C}' { + return true; + } + if c >= '\u{10A80}' && c <= '\u{10A9C}' { + return true; + } + if c >= '\u{10AC0}' && c <= '\u{10AC7}' { + return true; + } + if c >= '\u{10AC9}' && c <= '\u{10AE4}' { + return true; + } + if c >= '\u{10B00}' && c <= '\u{10B35}' { + return true; + } + if c >= '\u{10B40}' && c <= '\u{10B55}' { + return true; + } + if c >= '\u{10B60}' && c <= '\u{10B72}' { + return true; + } + if c >= '\u{10B80}' && c <= '\u{10B91}' { + return true; + } + if c >= '\u{10C00}' && c <= '\u{10C48}' { + return true; + } + if c >= '\u{10C80}' && c <= '\u{10CB2}' { + return true; + } + if c >= '\u{10CC0}' && c <= '\u{10CF2}' { + return true; + } + if c >= '\u{10D00}' && c <= '\u{10D23}' { + return true; + } + if c >= '\u{10E80}' && c <= '\u{10EA9}' { + return true; + } + if c >= '\u{10EB0}' && c <= '\u{10EB1}' { + return true; + } + if c >= '\u{10F00}' && c <= '\u{10F1C}' { + return true; + } + if c >= '\u{10F27}' && c <= '\u{10F27}' { + return true; + } + if c >= '\u{10F30}' && c <= '\u{10F45}' { + return true; + } + if c >= '\u{10F70}' && c <= '\u{10F81}' { + return true; + } + if c >= '\u{10FB0}' && c <= '\u{10FC4}' { + return true; + } + if c >= '\u{10FE0}' && c <= '\u{10FF6}' { + return true; + } + if c >= '\u{11003}' && c <= '\u{11037}' { + return true; + } + if c >= '\u{11071}' && c <= '\u{11072}' { + return true; + } + if c >= '\u{11075}' && c <= '\u{11075}' { + return true; + } + if c >= '\u{11083}' && c <= '\u{110AF}' { + return true; + } + if c >= '\u{110D0}' && c <= '\u{110E8}' { + return true; + } + if c >= '\u{11103}' && c <= '\u{11126}' { + return true; + } + if c >= '\u{11144}' && c <= '\u{11144}' { + return true; + } + if c >= '\u{11147}' && c <= '\u{11147}' { + return true; + } + if c >= '\u{11150}' && c <= '\u{11172}' { + return true; + } + if c >= '\u{11176}' && c <= '\u{11176}' { + return true; + } + if c >= '\u{11183}' && c <= '\u{111B2}' { + return true; + } + if c >= '\u{111C1}' && c <= '\u{111C4}' { + return true; + } + if c >= '\u{111DA}' && c <= '\u{111DA}' { + return true; + } + if c >= '\u{111DC}' && c <= '\u{111DC}' { + return true; + } + if c >= '\u{11200}' && c <= '\u{11211}' { + return true; + } + if c >= '\u{11213}' && c <= '\u{1122B}' { + return true; + } + if c >= '\u{11280}' && c <= '\u{11286}' { + return true; + } + if c >= '\u{11288}' && c <= '\u{11288}' { + return true; + } + if c >= '\u{1128A}' && c <= '\u{1128D}' { + return true; + } + if c >= '\u{1128F}' && c <= '\u{1129D}' { + return true; + } + if c >= '\u{1129F}' && c <= '\u{112A8}' { + return true; + } + if c >= '\u{112B0}' && c <= '\u{112DE}' { + return true; + } + if c >= '\u{11305}' && c <= '\u{1130C}' { + return true; + } + if c >= '\u{1130F}' && c <= '\u{11310}' { + return true; + } + if c >= '\u{11313}' && c <= '\u{11328}' { + return true; + } + if c >= '\u{1132A}' && c <= '\u{11330}' { + return true; + } + if c >= '\u{11332}' && c <= '\u{11333}' { + return true; + } + if c >= '\u{11335}' && c <= '\u{11339}' { + return true; + } + if c >= '\u{1133D}' && c <= '\u{1133D}' { + return true; + } + if c >= '\u{11350}' && c <= '\u{11350}' { + return true; + } + if c >= '\u{1135D}' && c <= '\u{11361}' { + return true; + } + if c >= '\u{11400}' && c <= '\u{11434}' { + return true; + } + if c >= '\u{11447}' && c <= '\u{1144A}' { + return true; + } + if c >= '\u{1145F}' && c <= '\u{11461}' { + return true; + } + if c >= '\u{11480}' && c <= '\u{114AF}' { + return true; + } + if c >= '\u{114C4}' && c <= '\u{114C5}' { + return true; + } + if c >= '\u{114C7}' && c <= '\u{114C7}' { + return true; + } + if c >= '\u{11580}' && c <= '\u{115AE}' { + return true; + } + if c >= '\u{115D8}' && c <= '\u{115DB}' { + return true; + } + if c >= '\u{11600}' && c <= '\u{1162F}' { + return true; + } + if c >= '\u{11644}' && c <= '\u{11644}' { + return true; + } + if c >= '\u{11680}' && c <= '\u{116AA}' { + return true; + } + if c >= '\u{116B8}' && c <= '\u{116B8}' { + return true; + } + if c >= '\u{11700}' && c <= '\u{1171A}' { + return true; + } + if c >= '\u{11740}' && c <= '\u{11746}' { + return true; + } + if c >= '\u{11800}' && c <= '\u{1182B}' { + return true; + } + if c >= '\u{118A0}' && c <= '\u{118DF}' { + return true; + } + if c >= '\u{118FF}' && c <= '\u{11906}' { + return true; + } + if c >= '\u{11909}' && c <= '\u{11909}' { + return true; + } + if c >= '\u{1190C}' && c <= '\u{11913}' { + return true; + } + if c >= '\u{11915}' && c <= '\u{11916}' { + return true; + } + if c >= '\u{11918}' && c <= '\u{1192F}' { + return true; + } + if c >= '\u{1193F}' && c <= '\u{1193F}' { + return true; + } + if c >= '\u{11941}' && c <= '\u{11941}' { + return true; + } + if c >= '\u{119A0}' && c <= '\u{119A7}' { + return true; + } + if c >= '\u{119AA}' && c <= '\u{119D0}' { + return true; + } + if c >= '\u{119E1}' && c <= '\u{119E1}' { + return true; + } + if c >= '\u{119E3}' && c <= '\u{119E3}' { + return true; + } + if c >= '\u{11A00}' && c <= '\u{11A00}' { + return true; + } + if c >= '\u{11A0B}' && c <= '\u{11A32}' { + return true; + } + if c >= '\u{11A3A}' && c <= '\u{11A3A}' { + return true; + } + if c >= '\u{11A50}' && c <= '\u{11A50}' { + return true; + } + if c >= '\u{11A5C}' && c <= '\u{11A89}' { + return true; + } + if c >= '\u{11A9D}' && c <= '\u{11A9D}' { + return true; + } + if c >= '\u{11AB0}' && c <= '\u{11AF8}' { + return true; + } + if c >= '\u{11C00}' && c <= '\u{11C08}' { + return true; + } + if c >= '\u{11C0A}' && c <= '\u{11C2E}' { + return true; + } + if c >= '\u{11C40}' && c <= '\u{11C40}' { + return true; + } + if c >= '\u{11C72}' && c <= '\u{11C8F}' { + return true; + } + if c >= '\u{11D00}' && c <= '\u{11D06}' { + return true; + } + if c >= '\u{11D08}' && c <= '\u{11D09}' { + return true; + } + if c >= '\u{11D0B}' && c <= '\u{11D30}' { + return true; + } + if c >= '\u{11D46}' && c <= '\u{11D46}' { + return true; + } + if c >= '\u{11D60}' && c <= '\u{11D65}' { + return true; + } + if c >= '\u{11D67}' && c <= '\u{11D68}' { + return true; + } + if c >= '\u{11D6A}' && c <= '\u{11D89}' { + return true; + } + if c >= '\u{11D98}' && c <= '\u{11D98}' { + return true; + } + if c >= '\u{11EE0}' && c <= '\u{11EF2}' { + return true; + } + if c >= '\u{11FB0}' && c <= '\u{11FB0}' { + return true; + } + if c >= '\u{12000}' && c <= '\u{12399}' { + return true; + } + if c >= '\u{12400}' && c <= '\u{1246E}' { + return true; + } + if c >= '\u{12480}' && c <= '\u{12543}' { + return true; + } + if c >= '\u{12F90}' && c <= '\u{12FF0}' { + return true; + } + if c >= '\u{13000}' && c <= '\u{1342E}' { + return true; + } + if c >= '\u{14400}' && c <= '\u{14646}' { + return true; + } + if c >= '\u{16800}' && c <= '\u{16A38}' { + return true; + } + if c >= '\u{16A40}' && c <= '\u{16A5E}' { + return true; + } + if c >= '\u{16A70}' && c <= '\u{16ABE}' { + return true; + } + if c >= '\u{16AD0}' && c <= '\u{16AED}' { + return true; + } + if c >= '\u{16B00}' && c <= '\u{16B2F}' { + return true; + } + if c >= '\u{16B40}' && c <= '\u{16B43}' { + return true; + } + if c >= '\u{16B63}' && c <= '\u{16B77}' { + return true; + } + if c >= '\u{16B7D}' && c <= '\u{16B8F}' { + return true; + } + if c >= '\u{16E40}' && c <= '\u{16E7F}' { + return true; + } + if c >= '\u{16F00}' && c <= '\u{16F4A}' { + return true; + } + if c >= '\u{16F50}' && c <= '\u{16F50}' { + return true; + } + if c >= '\u{16F93}' && c <= '\u{16F9F}' { + return true; + } + if c >= '\u{16FE0}' && c <= '\u{16FE1}' { + return true; + } + if c >= '\u{16FE3}' && c <= '\u{16FE3}' { + return true; + } + if c >= '\u{17000}' && c <= '\u{187F7}' { + return true; + } + if c >= '\u{18800}' && c <= '\u{18CD5}' { + return true; + } + if c >= '\u{18D00}' && c <= '\u{18D08}' { + return true; + } + if c >= '\u{1AFF0}' && c <= '\u{1AFF3}' { + return true; + } + if c >= '\u{1AFF5}' && c <= '\u{1AFFB}' { + return true; + } + if c >= '\u{1AFFD}' && c <= '\u{1AFFE}' { + return true; + } + if c >= '\u{1B000}' && c <= '\u{1B122}' { + return true; + } + if c >= '\u{1B150}' && c <= '\u{1B152}' { + return true; + } + if c >= '\u{1B164}' && c <= '\u{1B167}' { + return true; + } + if c >= '\u{1B170}' && c <= '\u{1B2FB}' { + return true; + } + if c >= '\u{1BC00}' && c <= '\u{1BC6A}' { + return true; + } + if c >= '\u{1BC70}' && c <= '\u{1BC7C}' { + return true; + } + if c >= '\u{1BC80}' && c <= '\u{1BC88}' { + return true; + } + if c >= '\u{1BC90}' && c <= '\u{1BC99}' { + return true; + } + if c >= '\u{1D400}' && c <= '\u{1D454}' { + return true; + } + if c >= '\u{1D456}' && c <= '\u{1D49C}' { + return true; + } + if c >= '\u{1D49E}' && c <= '\u{1D49F}' { + return true; + } + if c >= '\u{1D4A2}' && c <= '\u{1D4A2}' { + return true; + } + if c >= '\u{1D4A5}' && c <= '\u{1D4A6}' { + return true; + } + if c >= '\u{1D4A9}' && c <= '\u{1D4AC}' { + return true; + } + if c >= '\u{1D4AE}' && c <= '\u{1D4B9}' { + return true; + } + if c >= '\u{1D4BB}' && c <= '\u{1D4BB}' { + return true; + } + if c >= '\u{1D4BD}' && c <= '\u{1D4C3}' { + return true; + } + if c >= '\u{1D4C5}' && c <= '\u{1D505}' { + return true; + } + if c >= '\u{1D507}' && c <= '\u{1D50A}' { + return true; + } + if c >= '\u{1D50D}' && c <= '\u{1D514}' { + return true; + } + if c >= '\u{1D516}' && c <= '\u{1D51C}' { + return true; + } + if c >= '\u{1D51E}' && c <= '\u{1D539}' { + return true; + } + if c >= '\u{1D53B}' && c <= '\u{1D53E}' { + return true; + } + if c >= '\u{1D540}' && c <= '\u{1D544}' { + return true; + } + if c >= '\u{1D546}' && c <= '\u{1D546}' { + return true; + } + if c >= '\u{1D54A}' && c <= '\u{1D550}' { + return true; + } + if c >= '\u{1D552}' && c <= '\u{1D6A5}' { + return true; + } + if c >= '\u{1D6A8}' && c <= '\u{1D6C0}' { + return true; + } + if c >= '\u{1D6C2}' && c <= '\u{1D6DA}' { + return true; + } + if c >= '\u{1D6DC}' && c <= '\u{1D6FA}' { + return true; + } + if c >= '\u{1D6FC}' && c <= '\u{1D714}' { + return true; + } + if c >= '\u{1D716}' && c <= '\u{1D734}' { + return true; + } + if c >= '\u{1D736}' && c <= '\u{1D74E}' { + return true; + } + if c >= '\u{1D750}' && c <= '\u{1D76E}' { + return true; + } + if c >= '\u{1D770}' && c <= '\u{1D788}' { + return true; + } + if c >= '\u{1D78A}' && c <= '\u{1D7A8}' { + return true; + } + if c >= '\u{1D7AA}' && c <= '\u{1D7C2}' { + return true; + } + if c >= '\u{1D7C4}' && c <= '\u{1D7CB}' { + return true; + } + if c >= '\u{1DF00}' && c <= '\u{1DF1E}' { + return true; + } + if c >= '\u{1E100}' && c <= '\u{1E12C}' { + return true; + } + if c >= '\u{1E137}' && c <= '\u{1E13D}' { + return true; + } + if c >= '\u{1E14E}' && c <= '\u{1E14E}' { + return true; + } + if c >= '\u{1E290}' && c <= '\u{1E2AD}' { + return true; + } + if c >= '\u{1E2C0}' && c <= '\u{1E2EB}' { + return true; + } + if c >= '\u{1E7E0}' && c <= '\u{1E7E6}' { + return true; + } + if c >= '\u{1E7E8}' && c <= '\u{1E7EB}' { + return true; + } + if c >= '\u{1E7ED}' && c <= '\u{1E7EE}' { + return true; + } + if c >= '\u{1E7F0}' && c <= '\u{1E7FE}' { + return true; + } + if c >= '\u{1E800}' && c <= '\u{1E8C4}' { + return true; + } + if c >= '\u{1E900}' && c <= '\u{1E943}' { + return true; + } + if c >= '\u{1E94B}' && c <= '\u{1E94B}' { + return true; + } + if c >= '\u{1EE00}' && c <= '\u{1EE03}' { + return true; + } + if c >= '\u{1EE05}' && c <= '\u{1EE1F}' { + return true; + } + if c >= '\u{1EE21}' && c <= '\u{1EE22}' { + return true; + } + if c >= '\u{1EE24}' && c <= '\u{1EE24}' { + return true; + } + if c >= '\u{1EE27}' && c <= '\u{1EE27}' { + return true; + } + if c >= '\u{1EE29}' && c <= '\u{1EE32}' { + return true; + } + if c >= '\u{1EE34}' && c <= '\u{1EE37}' { + return true; + } + if c >= '\u{1EE39}' && c <= '\u{1EE39}' { + return true; + } + if c >= '\u{1EE3B}' && c <= '\u{1EE3B}' { + return true; + } + if c >= '\u{1EE42}' && c <= '\u{1EE42}' { + return true; + } + if c >= '\u{1EE47}' && c <= '\u{1EE47}' { + return true; + } + if c >= '\u{1EE49}' && c <= '\u{1EE49}' { + return true; + } + if c >= '\u{1EE4B}' && c <= '\u{1EE4B}' { + return true; + } + if c >= '\u{1EE4D}' && c <= '\u{1EE4F}' { + return true; + } + if c >= '\u{1EE51}' && c <= '\u{1EE52}' { + return true; + } + if c >= '\u{1EE54}' && c <= '\u{1EE54}' { + return true; + } + if c >= '\u{1EE57}' && c <= '\u{1EE57}' { + return true; + } + if c >= '\u{1EE59}' && c <= '\u{1EE59}' { + return true; + } + if c >= '\u{1EE5B}' && c <= '\u{1EE5B}' { + return true; + } + if c >= '\u{1EE5D}' && c <= '\u{1EE5D}' { + return true; + } + if c >= '\u{1EE5F}' && c <= '\u{1EE5F}' { + return true; + } + if c >= '\u{1EE61}' && c <= '\u{1EE62}' { + return true; + } + if c >= '\u{1EE64}' && c <= '\u{1EE64}' { + return true; + } + if c >= '\u{1EE67}' && c <= '\u{1EE6A}' { + return true; + } + if c >= '\u{1EE6C}' && c <= '\u{1EE72}' { + return true; + } + if c >= '\u{1EE74}' && c <= '\u{1EE77}' { + return true; + } + if c >= '\u{1EE79}' && c <= '\u{1EE7C}' { + return true; + } + if c >= '\u{1EE7E}' && c <= '\u{1EE7E}' { + return true; + } + if c >= '\u{1EE80}' && c <= '\u{1EE89}' { + return true; + } + if c >= '\u{1EE8B}' && c <= '\u{1EE9B}' { + return true; + } + if c >= '\u{1EEA1}' && c <= '\u{1EEA3}' { + return true; + } + if c >= '\u{1EEA5}' && c <= '\u{1EEA9}' { + return true; + } + if c >= '\u{1EEAB}' && c <= '\u{1EEBB}' { + return true; + } + if c >= '\u{20000}' && c <= '\u{2A6DF}' { + return true; + } + if c >= '\u{2A700}' && c <= '\u{2B738}' { + return true; + } + if c >= '\u{2B740}' && c <= '\u{2B81D}' { + return true; + } + if c >= '\u{2B820}' && c <= '\u{2CEA1}' { + return true; + } + if c >= '\u{2CEB0}' && c <= '\u{2EBE0}' { + return true; + } + if c >= '\u{2F800}' && c <= '\u{2FA1D}' { + return true; + } + if c >= '\u{30000}' && c <= '\u{3134A}' { + return true; + } + false +} + +pub fn is_id_continue_non_bmp(c: char) -> bool { + if c >= '\u{10000}' && c <= '\u{1000B}' { + return true; + } + if c >= '\u{1000D}' && c <= '\u{10026}' { + return true; + } + if c >= '\u{10028}' && c <= '\u{1003A}' { + return true; + } + if c >= '\u{1003C}' && c <= '\u{1003D}' { + return true; + } + if c >= '\u{1003F}' && c <= '\u{1004D}' { + return true; + } + if c >= '\u{10050}' && c <= '\u{1005D}' { + return true; + } + if c >= '\u{10080}' && c <= '\u{100FA}' { + return true; + } + if c >= '\u{10140}' && c <= '\u{10174}' { + return true; + } + if c >= '\u{101FD}' && c <= '\u{101FD}' { + return true; + } + if c >= '\u{10280}' && c <= '\u{1029C}' { + return true; + } + if c >= '\u{102A0}' && c <= '\u{102D0}' { + return true; + } + if c >= '\u{102E0}' && c <= '\u{102E0}' { + return true; + } + if c >= '\u{10300}' && c <= '\u{1031F}' { + return true; + } + if c >= '\u{1032D}' && c <= '\u{1034A}' { + return true; + } + if c >= '\u{10350}' && c <= '\u{1037A}' { + return true; + } + if c >= '\u{10380}' && c <= '\u{1039D}' { + return true; + } + if c >= '\u{103A0}' && c <= '\u{103C3}' { + return true; + } + if c >= '\u{103C8}' && c <= '\u{103CF}' { + return true; + } + if c >= '\u{103D1}' && c <= '\u{103D5}' { + return true; + } + if c >= '\u{10400}' && c <= '\u{1049D}' { + return true; + } + if c >= '\u{104A0}' && c <= '\u{104A9}' { + return true; + } + if c >= '\u{104B0}' && c <= '\u{104D3}' { + return true; + } + if c >= '\u{104D8}' && c <= '\u{104FB}' { + return true; + } + if c >= '\u{10500}' && c <= '\u{10527}' { + return true; + } + if c >= '\u{10530}' && c <= '\u{10563}' { + return true; + } + if c >= '\u{10570}' && c <= '\u{1057A}' { + return true; + } + if c >= '\u{1057C}' && c <= '\u{1058A}' { + return true; + } + if c >= '\u{1058C}' && c <= '\u{10592}' { + return true; + } + if c >= '\u{10594}' && c <= '\u{10595}' { + return true; + } + if c >= '\u{10597}' && c <= '\u{105A1}' { + return true; + } + if c >= '\u{105A3}' && c <= '\u{105B1}' { + return true; + } + if c >= '\u{105B3}' && c <= '\u{105B9}' { + return true; + } + if c >= '\u{105BB}' && c <= '\u{105BC}' { + return true; + } + if c >= '\u{10600}' && c <= '\u{10736}' { + return true; + } + if c >= '\u{10740}' && c <= '\u{10755}' { + return true; + } + if c >= '\u{10760}' && c <= '\u{10767}' { + return true; + } + if c >= '\u{10780}' && c <= '\u{10785}' { + return true; + } + if c >= '\u{10787}' && c <= '\u{107B0}' { + return true; + } + if c >= '\u{107B2}' && c <= '\u{107BA}' { + return true; + } + if c >= '\u{10800}' && c <= '\u{10805}' { + return true; + } + if c >= '\u{10808}' && c <= '\u{10808}' { + return true; + } + if c >= '\u{1080A}' && c <= '\u{10835}' { + return true; + } + if c >= '\u{10837}' && c <= '\u{10838}' { + return true; + } + if c >= '\u{1083C}' && c <= '\u{1083C}' { + return true; + } + if c >= '\u{1083F}' && c <= '\u{10855}' { + return true; + } + if c >= '\u{10860}' && c <= '\u{10876}' { + return true; + } + if c >= '\u{10880}' && c <= '\u{1089E}' { + return true; + } + if c >= '\u{108E0}' && c <= '\u{108F2}' { + return true; + } + if c >= '\u{108F4}' && c <= '\u{108F5}' { + return true; + } + if c >= '\u{10900}' && c <= '\u{10915}' { + return true; + } + if c >= '\u{10920}' && c <= '\u{10939}' { + return true; + } + if c >= '\u{10980}' && c <= '\u{109B7}' { + return true; + } + if c >= '\u{109BE}' && c <= '\u{109BF}' { + return true; + } + if c >= '\u{10A00}' && c <= '\u{10A03}' { + return true; + } + if c >= '\u{10A05}' && c <= '\u{10A06}' { + return true; + } + if c >= '\u{10A0C}' && c <= '\u{10A13}' { + return true; + } + if c >= '\u{10A15}' && c <= '\u{10A17}' { + return true; + } + if c >= '\u{10A19}' && c <= '\u{10A35}' { + return true; + } + if c >= '\u{10A38}' && c <= '\u{10A3A}' { + return true; + } + if c >= '\u{10A3F}' && c <= '\u{10A3F}' { + return true; + } + if c >= '\u{10A60}' && c <= '\u{10A7C}' { + return true; + } + if c >= '\u{10A80}' && c <= '\u{10A9C}' { + return true; + } + if c >= '\u{10AC0}' && c <= '\u{10AC7}' { + return true; + } + if c >= '\u{10AC9}' && c <= '\u{10AE6}' { + return true; + } + if c >= '\u{10B00}' && c <= '\u{10B35}' { + return true; + } + if c >= '\u{10B40}' && c <= '\u{10B55}' { + return true; + } + if c >= '\u{10B60}' && c <= '\u{10B72}' { + return true; + } + if c >= '\u{10B80}' && c <= '\u{10B91}' { + return true; + } + if c >= '\u{10C00}' && c <= '\u{10C48}' { + return true; + } + if c >= '\u{10C80}' && c <= '\u{10CB2}' { + return true; + } + if c >= '\u{10CC0}' && c <= '\u{10CF2}' { + return true; + } + if c >= '\u{10D00}' && c <= '\u{10D27}' { + return true; + } + if c >= '\u{10D30}' && c <= '\u{10D39}' { + return true; + } + if c >= '\u{10E80}' && c <= '\u{10EA9}' { + return true; + } + if c >= '\u{10EAB}' && c <= '\u{10EAC}' { + return true; + } + if c >= '\u{10EB0}' && c <= '\u{10EB1}' { + return true; + } + if c >= '\u{10F00}' && c <= '\u{10F1C}' { + return true; + } + if c >= '\u{10F27}' && c <= '\u{10F27}' { + return true; + } + if c >= '\u{10F30}' && c <= '\u{10F50}' { + return true; + } + if c >= '\u{10F70}' && c <= '\u{10F85}' { + return true; + } + if c >= '\u{10FB0}' && c <= '\u{10FC4}' { + return true; + } + if c >= '\u{10FE0}' && c <= '\u{10FF6}' { + return true; + } + if c >= '\u{11000}' && c <= '\u{11046}' { + return true; + } + if c >= '\u{11066}' && c <= '\u{11075}' { + return true; + } + if c >= '\u{1107F}' && c <= '\u{110BA}' { + return true; + } + if c >= '\u{110C2}' && c <= '\u{110C2}' { + return true; + } + if c >= '\u{110D0}' && c <= '\u{110E8}' { + return true; + } + if c >= '\u{110F0}' && c <= '\u{110F9}' { + return true; + } + if c >= '\u{11100}' && c <= '\u{11134}' { + return true; + } + if c >= '\u{11136}' && c <= '\u{1113F}' { + return true; + } + if c >= '\u{11144}' && c <= '\u{11147}' { + return true; + } + if c >= '\u{11150}' && c <= '\u{11173}' { + return true; + } + if c >= '\u{11176}' && c <= '\u{11176}' { + return true; + } + if c >= '\u{11180}' && c <= '\u{111C4}' { + return true; + } + if c >= '\u{111C9}' && c <= '\u{111CC}' { + return true; + } + if c >= '\u{111CE}' && c <= '\u{111DA}' { + return true; + } + if c >= '\u{111DC}' && c <= '\u{111DC}' { + return true; + } + if c >= '\u{11200}' && c <= '\u{11211}' { + return true; + } + if c >= '\u{11213}' && c <= '\u{11237}' { + return true; + } + if c >= '\u{1123E}' && c <= '\u{1123E}' { + return true; + } + if c >= '\u{11280}' && c <= '\u{11286}' { + return true; + } + if c >= '\u{11288}' && c <= '\u{11288}' { + return true; + } + if c >= '\u{1128A}' && c <= '\u{1128D}' { + return true; + } + if c >= '\u{1128F}' && c <= '\u{1129D}' { + return true; + } + if c >= '\u{1129F}' && c <= '\u{112A8}' { + return true; + } + if c >= '\u{112B0}' && c <= '\u{112EA}' { + return true; + } + if c >= '\u{112F0}' && c <= '\u{112F9}' { + return true; + } + if c >= '\u{11300}' && c <= '\u{11303}' { + return true; + } + if c >= '\u{11305}' && c <= '\u{1130C}' { + return true; + } + if c >= '\u{1130F}' && c <= '\u{11310}' { + return true; + } + if c >= '\u{11313}' && c <= '\u{11328}' { + return true; + } + if c >= '\u{1132A}' && c <= '\u{11330}' { + return true; + } + if c >= '\u{11332}' && c <= '\u{11333}' { + return true; + } + if c >= '\u{11335}' && c <= '\u{11339}' { + return true; + } + if c >= '\u{1133B}' && c <= '\u{11344}' { + return true; + } + if c >= '\u{11347}' && c <= '\u{11348}' { + return true; + } + if c >= '\u{1134B}' && c <= '\u{1134D}' { + return true; + } + if c >= '\u{11350}' && c <= '\u{11350}' { + return true; + } + if c >= '\u{11357}' && c <= '\u{11357}' { + return true; + } + if c >= '\u{1135D}' && c <= '\u{11363}' { + return true; + } + if c >= '\u{11366}' && c <= '\u{1136C}' { + return true; + } + if c >= '\u{11370}' && c <= '\u{11374}' { + return true; + } + if c >= '\u{11400}' && c <= '\u{1144A}' { + return true; + } + if c >= '\u{11450}' && c <= '\u{11459}' { + return true; + } + if c >= '\u{1145E}' && c <= '\u{11461}' { + return true; + } + if c >= '\u{11480}' && c <= '\u{114C5}' { + return true; + } + if c >= '\u{114C7}' && c <= '\u{114C7}' { + return true; + } + if c >= '\u{114D0}' && c <= '\u{114D9}' { + return true; + } + if c >= '\u{11580}' && c <= '\u{115B5}' { + return true; + } + if c >= '\u{115B8}' && c <= '\u{115C0}' { + return true; + } + if c >= '\u{115D8}' && c <= '\u{115DD}' { + return true; + } + if c >= '\u{11600}' && c <= '\u{11640}' { + return true; + } + if c >= '\u{11644}' && c <= '\u{11644}' { + return true; + } + if c >= '\u{11650}' && c <= '\u{11659}' { + return true; + } + if c >= '\u{11680}' && c <= '\u{116B8}' { + return true; + } + if c >= '\u{116C0}' && c <= '\u{116C9}' { + return true; + } + if c >= '\u{11700}' && c <= '\u{1171A}' { + return true; + } + if c >= '\u{1171D}' && c <= '\u{1172B}' { + return true; + } + if c >= '\u{11730}' && c <= '\u{11739}' { + return true; + } + if c >= '\u{11740}' && c <= '\u{11746}' { + return true; + } + if c >= '\u{11800}' && c <= '\u{1183A}' { + return true; + } + if c >= '\u{118A0}' && c <= '\u{118E9}' { + return true; + } + if c >= '\u{118FF}' && c <= '\u{11906}' { + return true; + } + if c >= '\u{11909}' && c <= '\u{11909}' { + return true; + } + if c >= '\u{1190C}' && c <= '\u{11913}' { + return true; + } + if c >= '\u{11915}' && c <= '\u{11916}' { + return true; + } + if c >= '\u{11918}' && c <= '\u{11935}' { + return true; + } + if c >= '\u{11937}' && c <= '\u{11938}' { + return true; + } + if c >= '\u{1193B}' && c <= '\u{11943}' { + return true; + } + if c >= '\u{11950}' && c <= '\u{11959}' { + return true; + } + if c >= '\u{119A0}' && c <= '\u{119A7}' { + return true; + } + if c >= '\u{119AA}' && c <= '\u{119D7}' { + return true; + } + if c >= '\u{119DA}' && c <= '\u{119E1}' { + return true; + } + if c >= '\u{119E3}' && c <= '\u{119E4}' { + return true; + } + if c >= '\u{11A00}' && c <= '\u{11A3E}' { + return true; + } + if c >= '\u{11A47}' && c <= '\u{11A47}' { + return true; + } + if c >= '\u{11A50}' && c <= '\u{11A99}' { + return true; + } + if c >= '\u{11A9D}' && c <= '\u{11A9D}' { + return true; + } + if c >= '\u{11AB0}' && c <= '\u{11AF8}' { + return true; + } + if c >= '\u{11C00}' && c <= '\u{11C08}' { + return true; + } + if c >= '\u{11C0A}' && c <= '\u{11C36}' { + return true; + } + if c >= '\u{11C38}' && c <= '\u{11C40}' { + return true; + } + if c >= '\u{11C50}' && c <= '\u{11C59}' { + return true; + } + if c >= '\u{11C72}' && c <= '\u{11C8F}' { + return true; + } + if c >= '\u{11C92}' && c <= '\u{11CA7}' { + return true; + } + if c >= '\u{11CA9}' && c <= '\u{11CB6}' { + return true; + } + if c >= '\u{11D00}' && c <= '\u{11D06}' { + return true; + } + if c >= '\u{11D08}' && c <= '\u{11D09}' { + return true; + } + if c >= '\u{11D0B}' && c <= '\u{11D36}' { + return true; + } + if c >= '\u{11D3A}' && c <= '\u{11D3A}' { + return true; + } + if c >= '\u{11D3C}' && c <= '\u{11D3D}' { + return true; + } + if c >= '\u{11D3F}' && c <= '\u{11D47}' { + return true; + } + if c >= '\u{11D50}' && c <= '\u{11D59}' { + return true; + } + if c >= '\u{11D60}' && c <= '\u{11D65}' { + return true; + } + if c >= '\u{11D67}' && c <= '\u{11D68}' { + return true; + } + if c >= '\u{11D6A}' && c <= '\u{11D8E}' { + return true; + } + if c >= '\u{11D90}' && c <= '\u{11D91}' { + return true; + } + if c >= '\u{11D93}' && c <= '\u{11D98}' { + return true; + } + if c >= '\u{11DA0}' && c <= '\u{11DA9}' { + return true; + } + if c >= '\u{11EE0}' && c <= '\u{11EF6}' { + return true; + } + if c >= '\u{11FB0}' && c <= '\u{11FB0}' { + return true; + } + if c >= '\u{12000}' && c <= '\u{12399}' { + return true; + } + if c >= '\u{12400}' && c <= '\u{1246E}' { + return true; + } + if c >= '\u{12480}' && c <= '\u{12543}' { + return true; + } + if c >= '\u{12F90}' && c <= '\u{12FF0}' { + return true; + } + if c >= '\u{13000}' && c <= '\u{1342E}' { + return true; + } + if c >= '\u{14400}' && c <= '\u{14646}' { + return true; + } + if c >= '\u{16800}' && c <= '\u{16A38}' { + return true; + } + if c >= '\u{16A40}' && c <= '\u{16A5E}' { + return true; + } + if c >= '\u{16A60}' && c <= '\u{16A69}' { + return true; + } + if c >= '\u{16A70}' && c <= '\u{16ABE}' { + return true; + } + if c >= '\u{16AC0}' && c <= '\u{16AC9}' { + return true; + } + if c >= '\u{16AD0}' && c <= '\u{16AED}' { + return true; + } + if c >= '\u{16AF0}' && c <= '\u{16AF4}' { + return true; + } + if c >= '\u{16B00}' && c <= '\u{16B36}' { + return true; + } + if c >= '\u{16B40}' && c <= '\u{16B43}' { + return true; + } + if c >= '\u{16B50}' && c <= '\u{16B59}' { + return true; + } + if c >= '\u{16B63}' && c <= '\u{16B77}' { + return true; + } + if c >= '\u{16B7D}' && c <= '\u{16B8F}' { + return true; + } + if c >= '\u{16E40}' && c <= '\u{16E7F}' { + return true; + } + if c >= '\u{16F00}' && c <= '\u{16F4A}' { + return true; + } + if c >= '\u{16F4F}' && c <= '\u{16F87}' { + return true; + } + if c >= '\u{16F8F}' && c <= '\u{16F9F}' { + return true; + } + if c >= '\u{16FE0}' && c <= '\u{16FE1}' { + return true; + } + if c >= '\u{16FE3}' && c <= '\u{16FE4}' { + return true; + } + if c >= '\u{16FF0}' && c <= '\u{16FF1}' { + return true; + } + if c >= '\u{17000}' && c <= '\u{187F7}' { + return true; + } + if c >= '\u{18800}' && c <= '\u{18CD5}' { + return true; + } + if c >= '\u{18D00}' && c <= '\u{18D08}' { + return true; + } + if c >= '\u{1AFF0}' && c <= '\u{1AFF3}' { + return true; + } + if c >= '\u{1AFF5}' && c <= '\u{1AFFB}' { + return true; + } + if c >= '\u{1AFFD}' && c <= '\u{1AFFE}' { + return true; + } + if c >= '\u{1B000}' && c <= '\u{1B122}' { + return true; + } + if c >= '\u{1B150}' && c <= '\u{1B152}' { + return true; + } + if c >= '\u{1B164}' && c <= '\u{1B167}' { + return true; + } + if c >= '\u{1B170}' && c <= '\u{1B2FB}' { + return true; + } + if c >= '\u{1BC00}' && c <= '\u{1BC6A}' { + return true; + } + if c >= '\u{1BC70}' && c <= '\u{1BC7C}' { + return true; + } + if c >= '\u{1BC80}' && c <= '\u{1BC88}' { + return true; + } + if c >= '\u{1BC90}' && c <= '\u{1BC99}' { + return true; + } + if c >= '\u{1BC9D}' && c <= '\u{1BC9E}' { + return true; + } + if c >= '\u{1CF00}' && c <= '\u{1CF2D}' { + return true; + } + if c >= '\u{1CF30}' && c <= '\u{1CF46}' { + return true; + } + if c >= '\u{1D165}' && c <= '\u{1D169}' { + return true; + } + if c >= '\u{1D16D}' && c <= '\u{1D172}' { + return true; + } + if c >= '\u{1D17B}' && c <= '\u{1D182}' { + return true; + } + if c >= '\u{1D185}' && c <= '\u{1D18B}' { + return true; + } + if c >= '\u{1D1AA}' && c <= '\u{1D1AD}' { + return true; + } + if c >= '\u{1D242}' && c <= '\u{1D244}' { + return true; + } + if c >= '\u{1D400}' && c <= '\u{1D454}' { + return true; + } + if c >= '\u{1D456}' && c <= '\u{1D49C}' { + return true; + } + if c >= '\u{1D49E}' && c <= '\u{1D49F}' { + return true; + } + if c >= '\u{1D4A2}' && c <= '\u{1D4A2}' { + return true; + } + if c >= '\u{1D4A5}' && c <= '\u{1D4A6}' { + return true; + } + if c >= '\u{1D4A9}' && c <= '\u{1D4AC}' { + return true; + } + if c >= '\u{1D4AE}' && c <= '\u{1D4B9}' { + return true; + } + if c >= '\u{1D4BB}' && c <= '\u{1D4BB}' { + return true; + } + if c >= '\u{1D4BD}' && c <= '\u{1D4C3}' { + return true; + } + if c >= '\u{1D4C5}' && c <= '\u{1D505}' { + return true; + } + if c >= '\u{1D507}' && c <= '\u{1D50A}' { + return true; + } + if c >= '\u{1D50D}' && c <= '\u{1D514}' { + return true; + } + if c >= '\u{1D516}' && c <= '\u{1D51C}' { + return true; + } + if c >= '\u{1D51E}' && c <= '\u{1D539}' { + return true; + } + if c >= '\u{1D53B}' && c <= '\u{1D53E}' { + return true; + } + if c >= '\u{1D540}' && c <= '\u{1D544}' { + return true; + } + if c >= '\u{1D546}' && c <= '\u{1D546}' { + return true; + } + if c >= '\u{1D54A}' && c <= '\u{1D550}' { + return true; + } + if c >= '\u{1D552}' && c <= '\u{1D6A5}' { + return true; + } + if c >= '\u{1D6A8}' && c <= '\u{1D6C0}' { + return true; + } + if c >= '\u{1D6C2}' && c <= '\u{1D6DA}' { + return true; + } + if c >= '\u{1D6DC}' && c <= '\u{1D6FA}' { + return true; + } + if c >= '\u{1D6FC}' && c <= '\u{1D714}' { + return true; + } + if c >= '\u{1D716}' && c <= '\u{1D734}' { + return true; + } + if c >= '\u{1D736}' && c <= '\u{1D74E}' { + return true; + } + if c >= '\u{1D750}' && c <= '\u{1D76E}' { + return true; + } + if c >= '\u{1D770}' && c <= '\u{1D788}' { + return true; + } + if c >= '\u{1D78A}' && c <= '\u{1D7A8}' { + return true; + } + if c >= '\u{1D7AA}' && c <= '\u{1D7C2}' { + return true; + } + if c >= '\u{1D7C4}' && c <= '\u{1D7CB}' { + return true; + } + if c >= '\u{1D7CE}' && c <= '\u{1D7FF}' { + return true; + } + if c >= '\u{1DA00}' && c <= '\u{1DA36}' { + return true; + } + if c >= '\u{1DA3B}' && c <= '\u{1DA6C}' { + return true; + } + if c >= '\u{1DA75}' && c <= '\u{1DA75}' { + return true; + } + if c >= '\u{1DA84}' && c <= '\u{1DA84}' { + return true; + } + if c >= '\u{1DA9B}' && c <= '\u{1DA9F}' { + return true; + } + if c >= '\u{1DAA1}' && c <= '\u{1DAAF}' { + return true; + } + if c >= '\u{1DF00}' && c <= '\u{1DF1E}' { + return true; + } + if c >= '\u{1E000}' && c <= '\u{1E006}' { + return true; + } + if c >= '\u{1E008}' && c <= '\u{1E018}' { + return true; + } + if c >= '\u{1E01B}' && c <= '\u{1E021}' { + return true; + } + if c >= '\u{1E023}' && c <= '\u{1E024}' { + return true; + } + if c >= '\u{1E026}' && c <= '\u{1E02A}' { + return true; + } + if c >= '\u{1E100}' && c <= '\u{1E12C}' { + return true; + } + if c >= '\u{1E130}' && c <= '\u{1E13D}' { + return true; + } + if c >= '\u{1E140}' && c <= '\u{1E149}' { + return true; + } + if c >= '\u{1E14E}' && c <= '\u{1E14E}' { + return true; + } + if c >= '\u{1E290}' && c <= '\u{1E2AE}' { + return true; + } + if c >= '\u{1E2C0}' && c <= '\u{1E2F9}' { + return true; + } + if c >= '\u{1E7E0}' && c <= '\u{1E7E6}' { + return true; + } + if c >= '\u{1E7E8}' && c <= '\u{1E7EB}' { + return true; + } + if c >= '\u{1E7ED}' && c <= '\u{1E7EE}' { + return true; + } + if c >= '\u{1E7F0}' && c <= '\u{1E7FE}' { + return true; + } + if c >= '\u{1E800}' && c <= '\u{1E8C4}' { + return true; + } + if c >= '\u{1E8D0}' && c <= '\u{1E8D6}' { + return true; + } + if c >= '\u{1E900}' && c <= '\u{1E94B}' { + return true; + } + if c >= '\u{1E950}' && c <= '\u{1E959}' { + return true; + } + if c >= '\u{1EE00}' && c <= '\u{1EE03}' { + return true; + } + if c >= '\u{1EE05}' && c <= '\u{1EE1F}' { + return true; + } + if c >= '\u{1EE21}' && c <= '\u{1EE22}' { + return true; + } + if c >= '\u{1EE24}' && c <= '\u{1EE24}' { + return true; + } + if c >= '\u{1EE27}' && c <= '\u{1EE27}' { + return true; + } + if c >= '\u{1EE29}' && c <= '\u{1EE32}' { + return true; + } + if c >= '\u{1EE34}' && c <= '\u{1EE37}' { + return true; + } + if c >= '\u{1EE39}' && c <= '\u{1EE39}' { + return true; + } + if c >= '\u{1EE3B}' && c <= '\u{1EE3B}' { + return true; + } + if c >= '\u{1EE42}' && c <= '\u{1EE42}' { + return true; + } + if c >= '\u{1EE47}' && c <= '\u{1EE47}' { + return true; + } + if c >= '\u{1EE49}' && c <= '\u{1EE49}' { + return true; + } + if c >= '\u{1EE4B}' && c <= '\u{1EE4B}' { + return true; + } + if c >= '\u{1EE4D}' && c <= '\u{1EE4F}' { + return true; + } + if c >= '\u{1EE51}' && c <= '\u{1EE52}' { + return true; + } + if c >= '\u{1EE54}' && c <= '\u{1EE54}' { + return true; + } + if c >= '\u{1EE57}' && c <= '\u{1EE57}' { + return true; + } + if c >= '\u{1EE59}' && c <= '\u{1EE59}' { + return true; + } + if c >= '\u{1EE5B}' && c <= '\u{1EE5B}' { + return true; + } + if c >= '\u{1EE5D}' && c <= '\u{1EE5D}' { + return true; + } + if c >= '\u{1EE5F}' && c <= '\u{1EE5F}' { + return true; + } + if c >= '\u{1EE61}' && c <= '\u{1EE62}' { + return true; + } + if c >= '\u{1EE64}' && c <= '\u{1EE64}' { + return true; + } + if c >= '\u{1EE67}' && c <= '\u{1EE6A}' { + return true; + } + if c >= '\u{1EE6C}' && c <= '\u{1EE72}' { + return true; + } + if c >= '\u{1EE74}' && c <= '\u{1EE77}' { + return true; + } + if c >= '\u{1EE79}' && c <= '\u{1EE7C}' { + return true; + } + if c >= '\u{1EE7E}' && c <= '\u{1EE7E}' { + return true; + } + if c >= '\u{1EE80}' && c <= '\u{1EE89}' { + return true; + } + if c >= '\u{1EE8B}' && c <= '\u{1EE9B}' { + return true; + } + if c >= '\u{1EEA1}' && c <= '\u{1EEA3}' { + return true; + } + if c >= '\u{1EEA5}' && c <= '\u{1EEA9}' { + return true; + } + if c >= '\u{1EEAB}' && c <= '\u{1EEBB}' { + return true; + } + if c >= '\u{1FBF0}' && c <= '\u{1FBF9}' { + return true; + } + if c >= '\u{20000}' && c <= '\u{2A6DF}' { + return true; + } + if c >= '\u{2A700}' && c <= '\u{2B738}' { + return true; + } + if c >= '\u{2B740}' && c <= '\u{2B81D}' { + return true; + } + if c >= '\u{2B820}' && c <= '\u{2CEA1}' { + return true; + } + if c >= '\u{2CEB0}' && c <= '\u{2EBE0}' { + return true; + } + if c >= '\u{2F800}' && c <= '\u{2FA1D}' { + return true; + } + if c >= '\u{30000}' && c <= '\u{3134A}' { + return true; + } + if c >= '\u{E0100}' && c <= '\u{E01EF}' { + return true; + } + false +} |