/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* * Token-affiliated data structures except for TokenKind (defined in its own * header). */ #ifndef frontend_Token_h #define frontend_Token_h #include "mozilla/Assertions.h" // MOZ_ASSERT #include // uint32_t #include "frontend/ParserAtom.h" // js::frontend::{ParserAtom,ParserName} #include "frontend/TokenKind.h" // js::frontend::TokenKind #include "js/RegExpFlags.h" // JS::RegExpFlags namespace js { namespace frontend { struct TokenPos { uint32_t begin = 0; // Offset of the token's first code unit. uint32_t end = 0; // Offset of 1 past the token's last code unit. TokenPos() = default; TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {} // Return a TokenPos that covers left, right, and anything in between. static TokenPos box(const TokenPos& left, const TokenPos& right) { MOZ_ASSERT(left.begin <= left.end); MOZ_ASSERT(left.end <= right.begin); MOZ_ASSERT(right.begin <= right.end); return TokenPos(left.begin, right.end); } bool operator==(const TokenPos& bpos) const { return begin == bpos.begin && end == bpos.end; } bool operator!=(const TokenPos& bpos) const { return begin != bpos.begin || end != bpos.end; } bool operator<(const TokenPos& bpos) const { return begin < bpos.begin; } bool operator<=(const TokenPos& bpos) const { return begin <= bpos.begin; } bool operator>(const TokenPos& bpos) const { return !(*this <= bpos); } bool operator>=(const TokenPos& bpos) const { return !(*this < bpos); } bool encloses(const TokenPos& pos) const { return begin <= pos.begin && pos.end <= end; } }; enum DecimalPoint { NoDecimal = false, HasDecimal = true }; // The only escapes found in IdentifierName are of the Unicode flavor. enum class IdentifierEscapes { None, SawUnicodeEscape }; enum class NameVisibility { Public, Private }; class TokenStreamShared; struct Token { private: // The lexical grammar of JavaScript has a quirk around the '/' character. // As the spec puts it: // // > There are several situations where the identification of lexical input // > elements is sensitive to the syntactic grammar context that is consuming // > the input elements. This requires multiple goal symbols for the lexical // > grammar. [...] The InputElementRegExp goal symbol is used in all // > syntactic grammar contexts where a RegularExpressionLiteral is permitted // > [...] In all other contexts, InputElementDiv is used as the lexical // > goal symbol. // // https://tc39.github.io/ecma262/#sec-lexical-and-regexp-grammars // // What "sensitive to the syntactic grammar context" means is, the parser has // to tell the TokenStream whether to interpret '/' as division or // RegExp. Because only one or the other (or neither) will be legal at that // point in the program, and only the parser knows which one. // // But there's a problem: the parser often gets a token, puts it back, then // consumes it later; or (equivalently) peeks at a token, leaves it, peeks // again later, then finally consumes it. Of course we don't actually re-scan // the token every time; we cache it in the TokenStream. This leads to the // following rule: // // The parser must not pass SlashIsRegExp when getting/peeking at a token // previously scanned with SlashIsDiv; or vice versa. // // That way, code that asks for a SlashIsRegExp mode will never get a cached // Div token. But this rule is easy to screw up, because tokens are so often // peeked at on Parser.cpp line A and consumed on line B, where |A-B| is // thousands of lines. We therefore enforce it with the frontend's most // annoying assertion (in verifyConsistentModifier), and provide // Modifier::SlashIsInvalid to help avoid tripping it. // // This enum belongs in TokenStream, but C++, so we define it here and // typedef it there. enum Modifier { // Parse `/` and `/=` as the division operators. (That is, use // InputElementDiv as the goal symbol.) SlashIsDiv, // Parse `/` as the beginning of a RegExp literal. (That is, use // InputElementRegExp.) SlashIsRegExp, // Neither a Div token nor a RegExp token is syntactically valid here. When // the parser calls `getToken(SlashIsInvalid)`, it must be prepared to see // either one (and throw a SyntaxError either way). // // It's OK to use SlashIsInvalid to get a token that was originally scanned // with SlashIsDiv or SlashIsRegExp. The reverse--peeking with // SlashIsInvalid, then getting with another mode--is not OK. If either Div // or RegExp is syntactically valid here, use the appropriate modifier. SlashIsInvalid, }; friend class TokenStreamShared; public: // WARNING: TokenStreamPosition assumes that the only GC things a Token // includes are atoms. DON'T ADD NON-ATOM GC THING POINTERS HERE // UNLESS YOU ADD ADDITIONAL ROOTING TO THAT CLASS. /** The type of this token. */ TokenKind type; /** The token's position in the overall script. */ TokenPos pos; union { private: friend struct Token; /** Non-numeric atom. */ const ParserName* name; /** Potentially-numeric atom. */ const ParserAtom* atom; struct { /** Numeric literal's value. */ double value; /** Does the numeric literal contain a '.'? */ DecimalPoint decimalPoint; } number; /** Regular expression flags; use charBuffer to access source chars. */ JS::RegExpFlags reflags; } u; #ifdef DEBUG /** The modifier used to get this token. */ Modifier modifier; #endif // Mutators void setName(const ParserName* name) { MOZ_ASSERT(type == TokenKind::Name || type == TokenKind::PrivateName); u.name = name; } void setAtom(const ParserAtom* atom) { MOZ_ASSERT(type == TokenKind::String || type == TokenKind::TemplateHead || type == TokenKind::NoSubsTemplate); u.atom = atom; } void setRegExpFlags(JS::RegExpFlags flags) { MOZ_ASSERT(type == TokenKind::RegExp); u.reflags = flags; } void setNumber(double n, DecimalPoint decimalPoint) { MOZ_ASSERT(type == TokenKind::Number); u.number.value = n; u.number.decimalPoint = decimalPoint; } // Type-safe accessors const ParserName* name() const { MOZ_ASSERT(type == TokenKind::Name || type == TokenKind::PrivateName); return u.name->asName(); // poor-man's type verification } const ParserAtom* atom() const { MOZ_ASSERT(type == TokenKind::String || type == TokenKind::TemplateHead || type == TokenKind::NoSubsTemplate); return u.atom; } JS::RegExpFlags regExpFlags() const { MOZ_ASSERT(type == TokenKind::RegExp); return u.reflags; } double number() const { MOZ_ASSERT(type == TokenKind::Number); return u.number.value; } DecimalPoint decimalPoint() const { MOZ_ASSERT(type == TokenKind::Number); return u.number.decimalPoint; } }; } // namespace frontend } // namespace js #endif // frontend_Token_h