summaryrefslogtreecommitdiffstats
path: root/third_party/rust/jsparagus-generated-parser/src/token.rs
blob: 8c87e390933c74e4d043ed8fa8d04cef9961c595 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
use crate::parser_tables_generated::TerminalId;
use ast::source_atom_set::SourceAtomSetIndex;
use ast::source_slice_list::SourceSliceIndex;
use ast::SourceLocation;

#[derive(Clone, Debug, PartialEq)]
pub enum TokenValue {
    None,
    Number(f64),
    Atom(SourceAtomSetIndex),
    Slice(SourceSliceIndex),
}

impl TokenValue {
    pub fn as_number(&self) -> f64 {
        match self {
            Self::Number(n) => *n,
            _ => panic!("expected number"),
        }
    }

    pub fn as_atom(&self) -> SourceAtomSetIndex {
        match self {
            Self::Atom(index) => *index,
            _ => panic!("expected atom"),
        }
    }

    pub fn as_slice(&self) -> SourceSliceIndex {
        match self {
            Self::Slice(index) => *index,
            _ => panic!("expected atom"),
        }
    }
}

/// An ECMAScript input token. The lexer discards input matching *WhiteSpace*,
/// *LineTerminator*, and *Comment*. The remaining input elements are called
/// tokens, and they're fed to the parser.
///
/// Tokens match the goal terminals of the ECMAScript lexical grammar; see
/// <https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar>.
#[derive(Clone, Debug, PartialEq)]
pub struct Token {
    /// Token type.
    pub terminal_id: TerminalId,

    /// Offset of this token, in bytes, within the source buffer.
    pub loc: SourceLocation,

    /// True if this token is the first token on a source line. This is true at
    /// the start of a script or module and after each LineTerminatorSequence.
    /// It is unaffected by single-line `/* delimited */` comments.
    ///
    /// *LineContinuation*s (a backslash followed by a newline) also don't
    /// affect this, since they can only happen inside strings, not between
    /// tokens.
    ///
    /// For a `TerminalId::End` token, this is false, regardless of whether
    /// there was a newline at the end of the file.
    pub is_on_new_line: bool,

    /// Data about the token. The exact meaning of this field depends on the
    /// `terminal_id`.
    ///
    /// For names and keyword tokens, this is just the token as it appears in
    /// the source. Same goes for *BooleanLiteral*, *NumericLiteral*, and
    /// *RegularExpressionLiteral* tokens.
    ///
    /// For a string literal, the string characters, after decoding
    /// *EscapeSequence*s and removing *LineContinuation*s (the SV of the
    /// literal, in standardese).
    ///
    /// For all other tokens (including template literal parts), the content is
    /// unspecified for now. TODO.
    pub value: TokenValue,
}

impl Token {
    pub fn basic_token(terminal_id: TerminalId, loc: SourceLocation) -> Self {
        Self {
            terminal_id,
            loc,
            is_on_new_line: false,
            value: TokenValue::None,
        }
    }
}