summaryrefslogtreecommitdiffstats
path: root/js/src/frontend/Token.h
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/frontend/Token.h')
-rw-r--r--js/src/frontend/Token.h226
1 files changed, 226 insertions, 0 deletions
diff --git a/js/src/frontend/Token.h b/js/src/frontend/Token.h
new file mode 100644
index 0000000000..5d8db585d8
--- /dev/null
+++ b/js/src/frontend/Token.h
@@ -0,0 +1,226 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Token-affiliated data structures except for TokenKind (defined in its own
+ * header).
+ */
+
+#ifndef frontend_Token_h
+#define frontend_Token_h
+
+#include "mozilla/Assertions.h" // MOZ_ASSERT
+
+#include <stdint.h> // uint32_t
+
+#include "frontend/ParserAtom.h" // js::frontend::{ParserAtom,ParserName}
+#include "frontend/TokenKind.h" // js::frontend::TokenKind
+#include "js/RegExpFlags.h" // JS::RegExpFlags
+
+namespace js {
+
+namespace frontend {
+
+struct TokenPos {
+ uint32_t begin = 0; // Offset of the token's first code unit.
+ uint32_t end = 0; // Offset of 1 past the token's last code unit.
+
+ TokenPos() = default;
+ TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}
+
+ // Return a TokenPos that covers left, right, and anything in between.
+ static TokenPos box(const TokenPos& left, const TokenPos& right) {
+ MOZ_ASSERT(left.begin <= left.end);
+ MOZ_ASSERT(left.end <= right.begin);
+ MOZ_ASSERT(right.begin <= right.end);
+ return TokenPos(left.begin, right.end);
+ }
+
+ bool operator==(const TokenPos& bpos) const {
+ return begin == bpos.begin && end == bpos.end;
+ }
+
+ bool operator!=(const TokenPos& bpos) const {
+ return begin != bpos.begin || end != bpos.end;
+ }
+
+ bool operator<(const TokenPos& bpos) const { return begin < bpos.begin; }
+
+ bool operator<=(const TokenPos& bpos) const { return begin <= bpos.begin; }
+
+ bool operator>(const TokenPos& bpos) const { return !(*this <= bpos); }
+
+ bool operator>=(const TokenPos& bpos) const { return !(*this < bpos); }
+
+ bool encloses(const TokenPos& pos) const {
+ return begin <= pos.begin && pos.end <= end;
+ }
+};
+
+enum DecimalPoint { NoDecimal = false, HasDecimal = true };
+
+// The only escapes found in IdentifierName are of the Unicode flavor.
+enum class IdentifierEscapes { None, SawUnicodeEscape };
+
+enum class NameVisibility { Public, Private };
+
+class TokenStreamShared;
+
+struct Token {
+ private:
+ // The lexical grammar of JavaScript has a quirk around the '/' character.
+ // As the spec puts it:
+ //
+ // > There are several situations where the identification of lexical input
+ // > elements is sensitive to the syntactic grammar context that is consuming
+ // > the input elements. This requires multiple goal symbols for the lexical
+ // > grammar. [...] The InputElementRegExp goal symbol is used in all
+ // > syntactic grammar contexts where a RegularExpressionLiteral is permitted
+ // > [...] In all other contexts, InputElementDiv is used as the lexical
+ // > goal symbol.
+ //
+ // https://tc39.github.io/ecma262/#sec-lexical-and-regexp-grammars
+ //
+ // What "sensitive to the syntactic grammar context" means is, the parser has
+ // to tell the TokenStream whether to interpret '/' as division or
+ // RegExp. Because only one or the other (or neither) will be legal at that
+ // point in the program, and only the parser knows which one.
+ //
+ // But there's a problem: the parser often gets a token, puts it back, then
+ // consumes it later; or (equivalently) peeks at a token, leaves it, peeks
+ // again later, then finally consumes it. Of course we don't actually re-scan
+ // the token every time; we cache it in the TokenStream. This leads to the
+ // following rule:
+ //
+ // The parser must not pass SlashIsRegExp when getting/peeking at a token
+ // previously scanned with SlashIsDiv; or vice versa.
+ //
+ // That way, code that asks for a SlashIsRegExp mode will never get a cached
+ // Div token. But this rule is easy to screw up, because tokens are so often
+ // peeked at on Parser.cpp line A and consumed on line B, where |A-B| is
+ // thousands of lines. We therefore enforce it with the frontend's most
+ // annoying assertion (in verifyConsistentModifier), and provide
+ // Modifier::SlashIsInvalid to help avoid tripping it.
+ //
+ // This enum belongs in TokenStream, but C++, so we define it here and
+ // typedef it there.
+ enum Modifier {
+ // Parse `/` and `/=` as the division operators. (That is, use
+ // InputElementDiv as the goal symbol.)
+ SlashIsDiv,
+
+ // Parse `/` as the beginning of a RegExp literal. (That is, use
+ // InputElementRegExp.)
+ SlashIsRegExp,
+
+ // Neither a Div token nor a RegExp token is syntactically valid here. When
+ // the parser calls `getToken(SlashIsInvalid)`, it must be prepared to see
+ // either one (and throw a SyntaxError either way).
+ //
+ // It's OK to use SlashIsInvalid to get a token that was originally scanned
+ // with SlashIsDiv or SlashIsRegExp. The reverse--peeking with
+ // SlashIsInvalid, then getting with another mode--is not OK. If either Div
+ // or RegExp is syntactically valid here, use the appropriate modifier.
+ SlashIsInvalid,
+ };
+ friend class TokenStreamShared;
+
+ public:
+ // WARNING: TokenStreamPosition assumes that the only GC things a Token
+ // includes are atoms. DON'T ADD NON-ATOM GC THING POINTERS HERE
+ // UNLESS YOU ADD ADDITIONAL ROOTING TO THAT CLASS.
+
+ /** The type of this token. */
+ TokenKind type;
+
+ /** The token's position in the overall script. */
+ TokenPos pos;
+
+ union {
+ private:
+ friend struct Token;
+
+ /** Non-numeric atom. */
+ const ParserName* name;
+
+ /** Potentially-numeric atom. */
+ const ParserAtom* atom;
+
+ struct {
+ /** Numeric literal's value. */
+ double value;
+
+ /** Does the numeric literal contain a '.'? */
+ DecimalPoint decimalPoint;
+ } number;
+
+ /** Regular expression flags; use charBuffer to access source chars. */
+ JS::RegExpFlags reflags;
+ } u;
+
+#ifdef DEBUG
+ /** The modifier used to get this token. */
+ Modifier modifier;
+#endif
+
+ // Mutators
+
+ void setName(const ParserName* name) {
+ MOZ_ASSERT(type == TokenKind::Name || type == TokenKind::PrivateName);
+ u.name = name;
+ }
+
+ void setAtom(const ParserAtom* atom) {
+ MOZ_ASSERT(type == TokenKind::String || type == TokenKind::TemplateHead ||
+ type == TokenKind::NoSubsTemplate);
+ u.atom = atom;
+ }
+
+ void setRegExpFlags(JS::RegExpFlags flags) {
+ MOZ_ASSERT(type == TokenKind::RegExp);
+ u.reflags = flags;
+ }
+
+ void setNumber(double n, DecimalPoint decimalPoint) {
+ MOZ_ASSERT(type == TokenKind::Number);
+ u.number.value = n;
+ u.number.decimalPoint = decimalPoint;
+ }
+
+ // Type-safe accessors
+
+ const ParserName* name() const {
+ MOZ_ASSERT(type == TokenKind::Name || type == TokenKind::PrivateName);
+ return u.name->asName(); // poor-man's type verification
+ }
+
+ const ParserAtom* atom() const {
+ MOZ_ASSERT(type == TokenKind::String || type == TokenKind::TemplateHead ||
+ type == TokenKind::NoSubsTemplate);
+ return u.atom;
+ }
+
+ JS::RegExpFlags regExpFlags() const {
+ MOZ_ASSERT(type == TokenKind::RegExp);
+ return u.reflags;
+ }
+
+ double number() const {
+ MOZ_ASSERT(type == TokenKind::Number);
+ return u.number.value;
+ }
+
+ DecimalPoint decimalPoint() const {
+ MOZ_ASSERT(type == TokenKind::Number);
+ return u.number.decimalPoint;
+ }
+};
+
+} // namespace frontend
+
+} // namespace js
+
+#endif // frontend_Token_h