1 files changed, 1248 insertions, 0 deletions
diff --git a/third_party/rust/jsparagus/jsparagus/grammar.py b/third_party/rust/jsparagus/jsparagus/grammar.py
new file mode 100644
index 0000000000..bcaf5a02ae
--- /dev/null
+++ b/third_party/rust/jsparagus/jsparagus/grammar.py
@@ -0,0 +1,1248 @@
+""" Data structures for representing grammars. """
+
+from __future__ import annotations
+# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls
+
+import copy
+import typing
+import dataclasses
+from dataclasses import dataclass
+from .ordered import OrderedSet, OrderedFrozenSet
+from . import types
+
+
+# *** What is a grammar? ******************************************************
+#
+# A grammar is a dictionary mapping nonterminal names to lists of right-hand
+# sides. Each right-hand side (also called a "production") is a list whose
+# elements can include terminals, nonterminals, Optional elements, LookaheadRules,
+# and NoLineTerminatorHere.
+#
+# The most common elements are terminals and nonterminals, so a grammar usually
+# looks something like this:
+def example_grammar() -> Grammar:
+    rules: typing.Dict[typing.Union[str, InitNt, Nt], LenientNtDef] = {
+        'expr': [
+            ['term'],
+            ['expr', '+', 'term'],
+            ['expr', '-', 'term'],
+        ],
+        'term': [
+            ['unary'],
+            ['term', '*', 'unary'],
+            ['term', '/', 'unary'],
+        ],
+        'unary': [
+            ['prim'],
+            ['-', 'unary'],
+        ],
+        'prim': [
+            ['NUM'],
+            ['VAR'],
+            ['(', 'expr', ')'],
+        ],
+    }
+
+    # The goal nonterminals are the nonterminals we're actually interested in
+    # parsing. Here we want to parse expressions; all the other nonterminals
+    # are interesting only as the building blocks of expressions.
+    #
+    # Variable terminals are terminal symbols that can have several different
+    # values, like a VAR token that could be any identifier, or a NUM token
+    # that could be any number.
+    return Grammar(rules, goal_nts=['expr'], variable_terminals=['NUM', 'VAR'])
+
+
+Condition = typing.Tuple[str, bool]
+
+
+# A production consists of a left side, an optional condition, a right side,
+# and a reducer. A `Production` object includes everything except the left
+# side. Incorporating reducers lets us transform a grammar while preserving
+# behavior.
+#
+# The production `expr ::= term` is represented by
+# `Production(["term"], 0)`.
+#
+# The production `expr ::= expr + term => add($0, $2)` is represented by
+# `Production(["expr", "+", "term"], CallMethod("add", (0, 2))`.
+#
+@dataclass
+class Production:
+    __slots__ = ['body', 'reducer', 'condition']
+    body: typing.List[Element]
+    reducer: ReduceExprOrAccept
+    condition: typing.Optional[Condition]
+
+    def __init__(self,
+                 body: typing.List[Element],
+                 reducer: ReduceExprOrAccept,
+                 *,
+                 condition: typing.Optional[Condition] = None):
+        self.body = body
+        self.reducer = reducer
+        self.condition = condition
+
+    def __repr__(self) -> str:
+        if self.condition is None:
+            return "Production({!r}, reducer={!r})".format(self.body, self.reducer)
+        else:
+            return ("Production({!r}, reducer={!r}, condition={!r})"
+                    .format(self.body, self.reducer, self.condition))
+
+    def copy_with(self, **kwargs: typing.Any) -> Production:
+        return dataclasses.replace(self, **kwargs)
+
+
+# *** Reduce expressions ******************************************************
+#
+# Reduce expressions ("reducers" for short) are a little language used to
+# specify what happens when a production is matched. A reduce expression is
+# one of these:
+#
+# *   An integer evaluates to one of the syntactic components of the
+#     production. For example, if the production we're reducing is
+#     `sum ::= sum + term`, the integer `0` evaluates the `sum` to the left of
+#     the plus sign, and `2` means the `term` on the right. (The integer
+#     `1` would refer to the `+` token itself, but that's not super useful.)
+#
+#     These integers are not quite indexes into `production.body`, because
+#     LookaheadRule, ErrorSymbol, and NoLineTerminatorHere elements don't
+#     count: in the production `stmt ::= [lookahead != "let"] expr ";"`, `0` is
+#     the expr, and `1` is the semicolon token.  See `is_concrete_element(e)`.
+#
+# *   CallMethod objects pass values to a builder method and return the result.
+#     The `args` are nested reduce expressions.
+#
+# *   None is an expression used as a placeholder when an optional symbol is
+#     omitted.
+#
+# *   Some(expr) is used when an optional symbol is found and parsed.
+#     In Python, this just expands to the same thing as `expr`, but in Rust
+#     this expands to a use of `Option::Some()`.
+#
+# In addition, the special reducer 'accept' means stop parsing. This is
+# used only in productions for init nonterminals, created automatically by
+# Grammar.__init__(). It's not a reduce expression, so it can't be nested.
+
+
+@dataclass(frozen=True)
+class CallMethod:
+    """Express a method call, and give it a given set of arguments. A trait is
+    added as the parser should implement this trait to call this method."""
+
+    method: str
+    args: typing.Tuple[ReduceExpr, ...]
+    trait: types.Type = types.Type("AstBuilder")
+    fallible: bool = False
+
+
+@dataclass(frozen=True)
+class Some:
+    inner: ReduceExpr
+
+
+def expr_to_str(expr: ReduceExprOrAccept) -> str:
+    if isinstance(expr, int):
+        return "${}".format(expr)
+    elif isinstance(expr, CallMethod):
+        return "{}::{}({}){}".format(
+            expr.trait, expr.method,
+            ', '.join(expr_to_str(arg) for arg in expr.args),
+            expr.fallible and '?' or '')
+    elif expr is None:
+        return "None"
+    elif isinstance(expr, Some):
+        return "Some({})".format(expr_to_str(expr.inner))
+    elif expr == "accept":
+        return "<accept>"
+    else:
+        raise ValueError("unrecognized expression: {!r}".format(expr))
+
+
+# Type parameter for Grammar.intern().
+Internable = typing.TypeVar("Internable")
+
+SyntheticTerminalsDict = typing.Dict[str, OrderedFrozenSet[str]]
+
+
+class Grammar:
+    """A collection of productions.
+
+    *   self.variable_terminals - OrderedFrozenSet[str] - Terminals that carry
+        data, like (in JS) numeric literals and RegExps.
+
+    *   self.terminals - OrderedFrozenSet[str] - All terminals used in the
+        language, including those in self.variable_terminals and
+        self.synthetic_terminals.
+
+    *   self.synthetic_terminals - {str: OrderedFrozenSet[str]} - Maps terminals
+        to sets of terminals. An entry `name: set` in this dictionary means
+        that `name` is shorthand for "one of the terminals in `set`".
+
+    *   self.nonterminals - {LenientNt: NtDef} - Keys are either (str|InitNt), early
+        in the pipeline, or Nt objects later on. Values are the NtDef objects
+        that contain the actual Productions.
+
+    *   self.methods - {str: MethodType} - Type information for methods.
+
+    *   self.init_nts - [InitNt or Nt] - The list of all elements of
+        self.nonterminals.keys() that are init nonterminals.
+
+    *   self.exec_modes - DefaultDict{str, OrderedSet[Type]} or None - ?
+
+    *   self.type_to_mods - {Type: [str]} or None - ?
+
+    *   self._cache - {Any: Any} - Cache of immutable objects used by
+        Grammar.intern().
+    """
+
+    variable_terminals: OrderedFrozenSet[str]
+    terminals: OrderedFrozenSet[typing.Union[str, End]]
+    synthetic_terminals: SyntheticTerminalsDict
+    nonterminals: typing.Dict[LenientNt, NtDef]
+    methods: typing.Dict[str, types.MethodType]
+    init_nts: typing.List[typing.Union[Nt, InitNt]]
+    exec_modes: typing.Optional[typing.DefaultDict[str, OrderedSet[types.Type]]]
+    type_to_modes: typing.Optional[typing.Mapping[types.Type, typing.List[str]]]
+    _cache: typing.Dict[typing.Any, typing.Any]
+
+    def __init__(
+            self,
+            nonterminals: typing.Mapping[LenientNt, LenientNtDef],
+            *,
+            goal_nts: typing.Optional[typing.Iterable[LenientNt]] = None,
+            variable_terminals: typing.Iterable[str] = (),
+            synthetic_terminals: SyntheticTerminalsDict = None,
+            method_types: typing.Optional[typing.Dict[str, types.MethodType]] = None,
+            exec_modes: typing.Optional[typing.DefaultDict[str, OrderedSet[types.Type]]] = None,
+            type_to_modes: typing.Optional[typing.Mapping[types.Type, typing.List[str]]] = None):
+
+        # This constructor supports passing in a sort of jumbled blob of
+        # strings, lists, and actual objects, and normalizes it all to a more
+        # typeful structure. Being able to interpret simple
+        # "list-of-lists-of-strings" input is super useful for tests.
+        #
+        # We don't check here that the grammar is LR, that it's cycle-free, or
+        # any other nice properties.
+
+        # Copy/infer the arguments.
+        my_nonterminals: typing.Dict[LenientNt, LenientNtDef] = dict(nonterminals.items())
+        if goal_nts is None:
+            # Default to the first nonterminal in the dictionary.
+            my_goal_nts = []
+            for name in my_nonterminals:
+                my_goal_nts.append(name)
+                break
+        else:
+            my_goal_nts = list(goal_nts)
+        self.variable_terminals = OrderedFrozenSet(variable_terminals)
+        if synthetic_terminals is None:
+            synthetic_terminals = {}
+        else:
+            synthetic_terminals = {
+                name: OrderedFrozenSet(set)
+                for name, set in synthetic_terminals.items()
+            }
+            for synthetic_key, values in synthetic_terminals.items():
+                if synthetic_key in my_nonterminals:
+                    raise ValueError(
+                        "invalid grammar: {!r} is both a terminal and a nonterminal"
+                        .format(synthetic_key))
+                for t in values:
+                    if t in my_nonterminals:
+                        raise ValueError(
+                            "invalid grammar: {!r} is both ".format(t)
+                            + "a representation of a synthetic terminal and a nonterminal")
+                    if t in synthetic_terminals:
+                        # Nested synthetic terminals. Throw, for now. (This
+                        # should pretty much just work, except expand_terminal
+                        # doesn't support it; and we don't check for cycles
+                        # where a synthetic terminal includes itself directly
+                        # or indirectly.)
+                        raise ValueError(
+                            "unsupported: synthetic terminals can't include other "
+                            "synthetic terminals; {!r} includes {!r}"
+                            .format(synthetic_key, t))
+        # self.synthetic_terminals = synthetic_terminals
+        self.synthetic_terminals = {}
+
+        keys_are_nt = isinstance(next(iter(my_nonterminals)), Nt)
+        key_type: typing.Union[typing.Type, typing.Tuple[typing.Type, ...]]
+        key_type = Nt if keys_are_nt else (str, InitNt)
+
+        self._cache = {}
+
+        # Gather some information just by looking at keys (without examining
+        # every production).
+        #
+        # str_to_nt maps the name of each non-parameterized
+        # nonterminal to `Nt(name)`, a cache.
+        str_to_nt: typing.Dict[typing.Union[str, InitNt], Nt] = {}
+        # nt_params lists the names of each nonterminal's parameters (empty
+        # tuple for non-parameterized nts).
+        nt_params: typing.Dict[typing.Union[str, InitNt], typing.Tuple[str, ...]] = {}
+        for key in my_nonterminals:
+            if not isinstance(key, key_type):
+                raise ValueError(
+                    "invalid grammar: conflicting key types in nonterminals dict - "
+                    "expected either all str or all Nt, got {!r}"
+                    .format(key.__class__.__name__))
+            nt_name: typing.Union[str, InitNt]
+            param_names: typing.Tuple[str, ...]
+            if keys_are_nt:
+                assert isinstance(key, Nt)
+                nt_name = key.name
+                param_names = tuple(name for name, value in key.args)
+            else:
+                assert isinstance(key, (str, InitNt))
+                nt_name = key
+                param_names = ()
+                my_nt = my_nonterminals[key]
+                if isinstance(my_nt, NtDef):
+                    param_names = tuple(my_nt.params)
+            if nt_name not in nt_params:
+                nt_params[nt_name] = param_names
+            else:
+                if nt_params[nt_name] != param_names:
+                    raise ValueError(
+                        "conflicting parameter name lists for nt {!r}: "
+                        "both {!r} and {!r}"
+                        .format(nt_name, nt_params[nt_name], param_names))
+            if param_names == () and nt_name not in str_to_nt:
+                str_to_nt[nt_name] = self.intern(Nt(nt_name))
+
+        # Validate, desugar, and copy the grammar. As a side effect, calling
+        # validate_element on every element of the grammar populates
+        # all_terminals.
+        all_terminals: OrderedSet[typing.Union[str, End]] = OrderedSet(self.variable_terminals)
+        all_terminals.add(End())
+
+        def note_terminal(t: str) -> None:
+            """Add t (and all representations of it, if synthetic) to all_terminals."""
+            if t not in all_terminals:
+                all_terminals.add(t)
+                if t in self.synthetic_terminals:
+                    for k in self.synthetic_terminals[t]:
+                        note_terminal(k)
+
+        # Note: i and j are normally list indexes, but they are sometimes the
+        # special string '?'. It's OK because they are used only in error
+        # messages.
+        def validate_element(
+                nt: LenientNt,
+                i: typing.Union[int, str],
+                j: typing.Union[int, str],
+                e: Element,
+                context_params: typing.Tuple[str, ...]
+        ) -> Element:
+            if isinstance(e, str):
+                if e in nt_params:
+                    if nt_params[e] != ():
+                        raise ValueError(
+                            "invalid grammar: missing parameters for {!r} "
+                            "in production `grammar[{!r}][{}][{}].inner`: {!r}"
+                            .format(e, nt, i, j, nt_params[e]))
+                    return str_to_nt[e]
+                else:
+                    note_terminal(e)
+                    return e
+            elif isinstance(e, Optional):
+                if not isinstance(e.inner, (str, Nt)):
+                    raise TypeError(
+                        "invalid grammar: unrecognized element "
+                        "in production `grammar[{!r}][{}][{}].inner`: {!r}"
+                        .format(nt, i, j, e.inner))
+                inner = validate_element(nt, i, j, e.inner, context_params)
+                return self.intern(Optional(inner))
+            elif isinstance(e, Literal):
+                if not isinstance(e.text, str):
+                    raise TypeError(
+                        "invalid grammar: unrecognized element "
+                        "in production `grammar[{!r}][{}][{}].text`: {!r}"
+                        .format(nt, i, j, e.text))
+                return self.intern(e)
+            elif isinstance(e, UnicodeCategory):
+                if not isinstance(e.cat_prefix, str):
+                    raise TypeError(
+                        "invalid grammar: unrecognized element "
+                        "in production `grammar[{!r}][{}][{}].cat_prefix`: {!r}"
+                        .format(nt, i, j, e.cat_prefix))
+                return self.intern(e)
+            elif isinstance(e, Exclude):
+                if not isinstance(e.inner, (str, Nt)):
+                    raise TypeError(
+                        "invalid grammar: unrecognized element "
+                        "in production `grammar[{!r}][{}][{}].inner`: {!r}"
+                        .format(nt, i, j, e.inner))
+                exclusion_list = []
+                for value in e.exclusion_list:
+                    if not isinstance(value, (str, Nt)):
+                        raise TypeError(
+                            "invalid grammar: unrecognized element "
+                            "in production `grammar[{!r}][{}][{}].exclusion_list`: {!r}"
+                            .format(nt, i, j, value))
+                    value = validate_element(nt, i, j, value, context_params)
+                    exclusion_list.append(value)
+                inner = validate_element(nt, i, j, e.inner, context_params)
+                return self.intern(Exclude(inner, tuple(exclusion_list)))
+            elif isinstance(e, Nt):
+                # Either the application or the original parameterized
+                # production must be present in the dictionary.
+                if e not in my_nonterminals and e.name not in my_nonterminals:
+                    raise ValueError(
+                        "invalid grammar: unrecognized nonterminal "
+                        "in production `grammar[{!r}][{}][{}]`: {!r}"
+                        .format(nt, i, j, e.name))
+                args = tuple(pair[0] for pair in e.args)
+                if e.name in nt_params and args != nt_params[e.name]:
+                    raise ValueError(
+                        "invalid grammar: wrong arguments passed to {!r} "
+                        "in production `grammar[{!r}][{}][{}]`: "
+                        "passed {!r}, expected {!r}"
+                        .format(e.name, nt, i, j,
+                                args, nt_params[e.name]))
+                for param_name, arg_expr in e.args:
+                    if isinstance(arg_expr, Var):
+                        if arg_expr.name not in context_params:
+                            raise ValueError(
+                                "invalid grammar: undefined variable {!r} "
+                                "in production `grammar[{!r}][{}][{}]`"
+                                .format(arg_expr.name, nt, i, j))
+                return self.intern(e)
+            elif isinstance(e, (LookaheadRule, End, ErrorSymbol)):
+                return self.intern(e)
+            elif e is NoLineTerminatorHere:
+                return e
+            elif isinstance(e, CallMethod):
+                return self.intern(e)
+            else:
+                raise TypeError(
+                    "invalid grammar: unrecognized element in production "
+                    "`grammar[{!r}][{}][{}]`: {!r}"
+                    .format(nt, i, j, e))
+            assert False, "unreachable"
+
+        def check_reduce_expr(
+                nt: LenientNt,
+                i: int,
+                rhs: Production,
+                expr: ReduceExprOrAccept) -> None:
+            if isinstance(expr, int):
+                concrete_len = sum(1 for e in rhs.body
+                                   if is_concrete_element(e))
+                if not (0 <= expr < concrete_len):
+                    raise ValueError(
+                        "invalid grammar: element number {} out of range for "
+                        "production {!r} in grammar[{!r}][{}].reducer ({!r})"
+                        .format(expr, nt, rhs.body, i, rhs.reducer))
+            elif isinstance(expr, CallMethod):
+                if not isinstance(expr.method, str):
+                    raise TypeError(
+                        "invalid grammar: method names must be strings, "
+                        "not {!r}, in grammar[{!r}[{}].reducer"
+                        .format(expr.method, nt, i))
+                if not expr.method.isidentifier():
+                    name, space, pn = expr.method.partition(' ')
+                    if space == ' ' and name.isidentifier() and pn.isdigit():
+                        pass
+                    else:
+                        raise ValueError(
+                            "invalid grammar: invalid method name {!r} "
+                            "(not an identifier), in grammar[{!r}[{}].reducer"
+                            .format(expr.method, nt, i))
+                for arg_expr in expr.args:
+                    check_reduce_expr(nt, i, rhs, arg_expr)
+            elif expr is None:
+                pass
+            elif isinstance(expr, Some):
+                check_reduce_expr(nt, i, rhs, expr.inner)
+            else:
+                raise TypeError(
+                    "invalid grammar: unrecognized reduce expression {!r} "
+                    "in grammar[{!r}][{}].reducer"
+                    .format(expr, nt, i))
+
+        def copy_rhs(
+                nt: LenientNt,
+                i: int,
+                sole_production: bool,
+                rhs: LenientProduction,
+                context_params: typing.Tuple[str, ...]) -> Production:
+            if isinstance(rhs, list):
+                # Bare list, no reducer. Desugar to a Production, inferring a
+                # reasonable default reducer.
+                nargs = sum(1 for e in rhs if is_concrete_element(e))
+                reducer: ReduceExpr
+                if len(rhs) == 1 and nargs == 1:
+                    reducer = 0  # don't call a method, just propagate the value
+                else:
+                    # Call a method named after the production. If the
+                    # nonterminal has exactly one production, there's no need
+                    # to include the production index `i` to the method name.
+                    if sole_production:
+                        method = str(nt)
+                    else:
+                        method = '{}_{}'.format(nt, i)
+                    reducer = CallMethod(method, tuple(range(nargs)))
+                rhs = Production(rhs, reducer)
+
+            if not isinstance(rhs, Production):
+                raise TypeError(
+                    "invalid grammar: grammar[{!r}][{}] should be "
+                    "a Production or list of grammar symbols, not {!r}"
+                    .format(nt, i, rhs))
+
+            if rhs.condition is not None:
+                param, value = rhs.condition
+                if param not in context_params:
+                    raise TypeError(
+                        "invalid grammar: undefined parameter {!r} "
+                        "in conditional for grammar[{!r}][{}]"
+                        .format(param, nt, i))
+            if rhs.reducer != 'accept':
+                check_reduce_expr(nt, i, rhs, rhs.reducer)
+            assert isinstance(rhs.body, list)
+            return rhs.copy_with(body=[
+                validate_element(nt, i, j, e, context_params)
+                for j, e in enumerate(rhs.body)
+            ])
+
+        def copy_nt_def(
+                nt: LenientNt,
+                nt_def: typing.Union[NtDef, typing.List[LenientProduction]],
+        ) -> NtDef:
+            rhs_list: typing.Sequence[LenientProduction]
+            if isinstance(nt_def, NtDef):
+                for i, param in enumerate(nt_def.params):
+                    if not isinstance(param, str):
+                        raise TypeError(
+                            "invalid grammar: parameter {} of {} should be "
+                            "a string, not {!r}"
+                            .format(i + 1, nt, param))
+                params = nt_def.params
+                rhs_list = nt_def.rhs_list
+                ty = nt_def.type
+            else:
+                params = ()
+                rhs_list = nt_def
+                ty = None
+
+            if not isinstance(rhs_list, list):
+                raise TypeError(
+                    "invalid grammar: grammar[{!r}] should be either a "
+                    "list of right-hand sides or NtDef, not {!r}"
+                    .format(nt, type(rhs_list).__name__))
+
+            sole_production = len(rhs_list) == 1
+            productions = [copy_rhs(nt, i, sole_production, rhs, params)
+                           for i, rhs in enumerate(rhs_list)]
+            return NtDef(params, productions, ty)
+
+        def check_nt_key(nt: LenientNt) -> None:
+            if isinstance(nt, str):
+                if not nt.isidentifier():
+                    raise ValueError(
+                        "invalid grammar: nonterminal names must be identifiers, not {!r}"
+                        .format(nt))
+                if nt in self.variable_terminals or nt in self.synthetic_terminals:
+                    raise TypeError(
+                        "invalid grammar: {!r} is both a nonterminal and a variable terminal"
+                        .format(nt))
+            elif isinstance(nt, Nt):
+                assert keys_are_nt  # checked earlier
+                if not (isinstance(nt.name, (str, InitNt))
+                        and isinstance(nt.args, tuple)):
+                    raise TypeError(
+                        "invalid grammar: expected str or Nt(name=str, "
+                        "args=tuple) keys in nonterminals dict, got {!r}"
+                        .format(nt))
+                check_nt_key(nt.name)
+                for pair in nt.args:
+                    if (not isinstance(pair, tuple)
+                            or len(pair) != 2
+                            or not isinstance(pair[0], str)
+                            or not isinstance(pair[1], bool)):
+                        raise TypeError(
+                            "invalid grammar: expected tuple((str, bool)) args, got {!r}"
+                            .format(nt))
+            elif isinstance(nt, InitNt):
+                # Users don't include init nonterminals when initially creating
+                # a Grammar. They are automatically added below. But if this
+                # Grammar is being created by hacking on a previous Grammar, it
+                # will already have them.
+                if not isinstance(nt.goal, Nt):
+                    raise TypeError(
+                        "invalid grammar: InitNt.goal should be a nonterminal, "
+                        "got {!r}"
+                        .format(nt))
+                # nt.goal is a "use", not a "def". Check it like a use.
+                # Bogus question marks appear in error messages :-|
+                validate_element(nt, '?', '?', nt.goal, ())
+                if nt.goal not in my_goal_nts:
+                    raise TypeError(
+                        "invalid grammar: nonterminal referenced by InitNt "
+                        "is not in the list of goals: {!r}"
+                        .format(nt))
+            else:
+                raise TypeError(
+                    "invalid grammar: expected string keys in nonterminals dict, got {!r}"
+                    .format(nt))
+
+        def validate_nt(
+                nt: LenientNt,
+                nt_def: LenientNtDef
+        ) -> typing.Tuple[LenientNt, NtDef]:
+            check_nt_key(nt)
+            if isinstance(nt, InitNt):
+                # Check the form of init productions. Initially these look like
+                # [[goal]], but after the pipeline goes to work, they can be
+                # [[Optional(goal)]] or [[], [goal]].
+                if not isinstance(nt_def, NtDef):
+                    raise TypeError(
+                        "invalid grammar: key {!r} must map to "
+                        "value of type NtDef, not {!r}"
+                        .format(nt, nt_def))
+                rhs_list = nt_def.rhs_list
+                g = nt.goal
+                if (rhs_list != [Production([g], 0),
+                                 Production([Nt(nt, ()), End()], 'accept')]
+                    and rhs_list != [Production([Optional(g)], 0),
+                                     Production([Nt(nt, ()), End()], 'accept')]
+                    and rhs_list != [Production([End()], 'accept'),
+                                     Production([g, End()], 'accept'),
+                                     Production([Nt(nt, ()), End()], 'accept')]):
+                    raise ValueError(
+                        "invalid grammar: grammar[{!r}] is not one of "
+                        "the expected forms: got {!r}"
+                        .format(nt, rhs_list))
+
+            return nt, copy_nt_def(nt, nt_def)
+
+        self.nonterminals = {}
+        for nt1, nt_def1 in my_nonterminals.items():
+            nt, nt_def = validate_nt(nt1, nt_def1)
+            self.nonterminals[nt] = nt_def
+        for syn_term_name, t_set in synthetic_terminals.items():
+            nt_def = NtDef((syn_term_name,), [Production([e], 0) for e in t_set], None)
+            nt, nt_def = validate_nt(syn_term_name, nt_def)
+            self.nonterminals[nt] = nt_def
+            # Remove synthetic terminals from the list of terminals.
+            all_terminals.remove(syn_term_name)
+
+        self.terminals = OrderedFrozenSet(all_terminals)
+
+        # Check types of reduce expressions and infer method types. But if the
+        # caller passed in precalculated type info, skip it -- otherwise we
+        # would redo type checking many times as we make minor changes to the
+        # Grammar along the pipeline.
+        if method_types is None:
+            types.infer_types(self)
+        else:
+            for nt, nt_def in self.nonterminals.items():
+                assert isinstance(nt_def, NtDef)
+                assert isinstance(nt_def.type, types.Type)
+            self.methods = method_types
+
+        # Synthesize "init" nonterminals.
+        self.init_nts = []
+        for goal in my_goal_nts:
+            # Convert str goals to Nt objects and validate.
+            if isinstance(goal, str):
+                ok = goal in str_to_nt
+                if ok:
+                    goal = str_to_nt[goal]
+            elif isinstance(goal, Nt):
+                if keys_are_nt:
+                    ok = goal in my_nonterminals
+                else:
+                    ok = goal.name in my_nonterminals
+            if not ok:
+                raise ValueError(
+                    "goal nonterminal {!r} is undefined".format(goal))
+            assert isinstance(goal, Nt)
+
+            # Weird, but the key of an init nonterminal really is
+            # `Nt(InitNt(Nt(goal_name, goal_args)), ())`. It takes no arguments,
+            # but it refers to a goal that might take arguments.
+            init_nt = InitNt(goal)
+            init_key: LenientNt = init_nt
+            goal_nt = Nt(init_nt, ())
+            if keys_are_nt:
+                init_key = goal_nt
+            if init_key not in self.nonterminals:
+                self.nonterminals[init_key] = NtDef(
+                    (),
+                    [Production([goal], 0),
+                     Production([goal_nt, End()], 'accept')],
+                    types.NoReturnType)
+            self.init_nts.append(goal_nt)
+
+        # Add the various execution backends which would rely on the same parse table.
+        self.exec_modes = exec_modes
+        self.type_to_modes = type_to_modes
+
+    # The argument is a list of extension.GrammarExtensions. The annotation is
+    # vague because this module does not import the extension module. It would
+    # be a cyclic dependency.
+    def patch(self, extensions: typing.List) -> None:
+        assert self.type_to_modes is not None
+        assert self.exec_modes is not None
+        if extensions == []:
+            return
+        # Copy of nonterminals which would be mutated by the patches.
+        nonterminals = copy.copy(self.nonterminals)
+        for ext in extensions:
+            # Add the given trait to the execution mode, depending on which
+            # type it got implemented for.
+            for mode in self.type_to_modes[ext.target.for_type]:
+                self.exec_modes[mode].add(ext.target.trait)
+            # Apply grammar transformations.
+            ext.apply_patch(self, nonterminals)
+        # Replace with the modified version of nonterminals
+        self.nonterminals = nonterminals
+
+    def intern(self, obj: Internable) -> Internable:
+        """Return a shared copy of the immutable object `obj`.
+
+        This saves memory and consistent use allows code to use `is` for
+        equality testing.
+        """
+        try:
+            return self._cache[obj]
+        except KeyError:
+            self._cache[obj] = obj
+            return obj
+
+    # Terminals are tokens that must appear verbatim in the input wherever they
+    # appear in the grammar, like the operators '+' '-' *' '/' and brackets '(' ')'
+    # in the example grammar.
+    def is_terminal(self, element: object) -> bool:
+        return type(element) is str
+
+    def expand_set_of_terminals(
+            self,
+            terminals: typing.Iterable[typing.Union[str, None, ErrorSymbol]]
+    ) -> OrderedSet[typing.Union[str, None, ErrorSymbol]]:
+        """Copy a set of terminals, replacing any synthetic terminals with their representations.
+
+        Returns a new OrderedSet.
+
+        terminals - an iterable of terminals and/or other unique elements like
+        None or ErrorSymbol.
+        """
+        result: OrderedSet[typing.Union[str, None, ErrorSymbol]] = OrderedSet()
+        for t in terminals:
+            if isinstance(t, str) and t in self.synthetic_terminals:
+                result |= self.expand_set_of_terminals(self.synthetic_terminals[t])
+            else:
+                result.add(t)
+        return result
+
+    def goals(self) -> typing.List[Nt]:
+        """Return a list of this grammar's goal nonterminals."""
+        return [init_nt.name.goal for init_nt in self.init_nts]  # type: ignore
+
+    def with_nonterminals(
+            self,
+            nonterminals: typing.Mapping[LenientNt, LenientNtDef]
+    ) -> Grammar:
+        """Return a copy of self with the same attributes except different nonterminals."""
+        if self.methods is not None:
+            for nt_def in nonterminals.values():
+                assert isinstance(nt_def, NtDef)
+                assert nt_def.type is not None
+        return Grammar(
+            nonterminals,
+            goal_nts=self.goals(),
+            variable_terminals=self.variable_terminals,
+            synthetic_terminals=self.synthetic_terminals,
+            method_types=self.methods,
+            exec_modes=self.exec_modes,
+            type_to_modes=self.type_to_modes)
+
+    # === A few methods for dumping pieces of grammar.
+
+    def element_to_str(self, e: Element) -> str:
+        if isinstance(e, Nt):
+            return e.pretty()
+        elif self.is_terminal(e):
+            assert isinstance(e, str)
+            if e in self.variable_terminals or e in self.synthetic_terminals:
+                return e
+            return '"' + repr(e)[1:-1] + '"'
+        elif isinstance(e, Optional):
+            return self.element_to_str(e.inner) + "?"
+        elif isinstance(e, LookaheadRule):
+            if len(e.set) == 1:
+                op = "==" if e.positive else "!="
+                s = repr(list(e.set)[0])
+            else:
+                op = "in" if e.positive else "not in"
+                s = '{' + repr(list(e.set))[1:-1] + '}'
+            return "[lookahead {} {}]".format(op, s)
+        elif isinstance(e, End):
+            return "<END>"
+        elif e is NoLineTerminatorHere:
+            return "[no LineTerminator here]"
+        elif isinstance(e, CallMethod):
+            return "{{ {} }}".format(expr_to_str(e))
+        else:
+            return str(e)
+
+    def symbols_to_str(self, rhs: typing.Iterable[Element]) -> str:
+        return " ".join(self.element_to_str(e) for e in rhs)
+
+    def rhs_to_str(self, rhs: LenientProduction) -> str:
+        if isinstance(rhs, Production):
+            if rhs.condition is None:
+                prefix = ''
+            else:
+                param, value = rhs.condition
+                if value is True:
+                    condition = "+" + param
+                elif value is False:
+                    condition = "~" + param
+                else:
+                    condition = "{} == {!r}".format(param, value)
+                prefix = "#[if {}] ".format(condition)
+            return prefix + self.rhs_to_str(rhs.body)
+        elif len(rhs) == 0:
+            return "[empty]"
+        else:
+            return self.symbols_to_str(rhs)
+
+    def nt_to_str(self, nt: LenientNt) -> str:
+        if isinstance(nt, Nt):
+            return self.element_to_str(nt)
+        else:
+            return str(nt)
+
+    def production_to_str(
+            self,
+            nt: LenientNt,
+            rhs: LenientProduction,
+            *reducer: ReduceExpr
+    ) -> str:
+        # As we have two ways of representing productions at the moment, just
+        # take multiple arguments :(
+        return "{} ::= {}{}".format(
+            self.nt_to_str(nt),
+            self.rhs_to_str(rhs),
+            "".join(" => " + expr_to_str(expr) for expr in reducer))
+
+    # The type of `item` is `lr0.LRItem`. No annotation because this module
+    # does not import `lr0`. It would be a cyclic dependency.
+    def lr_item_to_str(self, prods: typing.List, item: typing.Any) -> str:
+        prod = prods[item.prod_index]
+        if item.lookahead is None:
+            la = []
+        else:
+            la = [self.element_to_str(item.lookahead)]
+        return "{} ::= {} >> {{{}}}".format(
+            self.element_to_str(prod.nt),
+            " ".join([self.element_to_str(e) for e in prod.rhs[:item.offset]]
+                     + ["\N{MIDDLE DOT}"]
+                     + la
+                     + [self.element_to_str(e) for e in prod.rhs[item.offset:]]),
+            ", ".join(
+                "$" if t is None else self.element_to_str(t)
+                for t in item.followed_by)
+        )
+
+    def item_set_to_str(
+            self,
+            prods: typing.List,
+            item_set: OrderedFrozenSet
+    ) -> str:
+        return "{{{}}}".format(
+            ",  ".join(self.lr_item_to_str(prods, item) for item in item_set)
+        )
+
+    def expand_terminal(self, t: str) -> OrderedFrozenSet[str]:
+        return self.synthetic_terminals.get(t) or OrderedFrozenSet([t])
+
+    def compatible_elements(self, e1: Element, e2: Element) -> bool:
+        # "type: ignore" because mypy doesn't know that `self.is_terminal(e1)`
+        # means `e1` is a terminal, and thus `self.expand_terminal(e1)` is OK.
+        return (e1 == e2
+                or (self.is_terminal(e1)
+                    and self.is_terminal(e2)
+                    and len(self.expand_terminal(e1)  # type: ignore
+                            & self.expand_terminal(e2)) > 0))  # type: ignore
+
+    def compatible_sequences(
+            self,
+            seq1: typing.Sequence[Element],
+            seq2: typing.Sequence[Element]) -> bool:
+        """True if the two sequences could be the same terminals."""
+        return (len(seq1) == len(seq1)
+                and all(self.compatible_elements(e1, e2) for e1, e2 in zip(seq1, seq2)))
+
+    def dump(self) -> None:
+        for nt, nt_def in self.nonterminals.items():
+            left_side = self.nt_to_str(nt)
+            if nt_def.params:
+                left_side += "[" + ", ".join(nt_def.params) + "]"
+            print(left_side + " ::=")
+            for rhs in nt_def.rhs_list:
+                print("   ", self.rhs_to_str(rhs))
+            print()
+
+    def dump_type_info(self) -> None:
+        for nt, nt_def in self.nonterminals.items():
+            print(nt, nt_def.type)
+        for name, mty in self.methods.items():
+            print("fn {}({}) -> {}"
+                  .format(name,
+                          ", ".join(str(ty) for ty in mty.argument_types),
+                          str(mty.return_type)))
+
+    def is_shifted_element(self, e: Element) -> bool:
+        if isinstance(e, Nt):
+            return True
+        elif self.is_terminal(e):
+            return True
+        elif isinstance(e, Optional):
+            return True
+        elif isinstance(e, LookaheadRule):
+            return False
+        elif isinstance(e, End):
+            return True
+        elif e is NoLineTerminatorHere:
+            return True
+        return False
+
+
+@dataclass(frozen=True)
+class InitNt:
+    """InitNt(goal) is the name of the init nonterminal for the given goal.
+
+    One init nonterminal is created internally for each goal symbol in the grammar.
+
+    The idea is to have a nonterminal that the user has no control over, that is
+    never used in any production, but *only* as an entry point for the grammar,
+    that always has a single production "init_nt ::= goal_nt". This predictable
+    structure makes it easier to get into and out of parsing at run time.
+
+    When an init nonterminal is matched, we take the "accept" action rather than
+    a "reduce" action.
+    """
+    goal: Nt
+
+
+# *** Elements ****************************************************************
+#
+# Elements are the things that can appear in the .body list of a Production:
+#
+# *   Strings represent terminals (see `Grammar.is_terminal`)
+#
+# *   `Nt` objects refer to nonterminals.
+#
+# *   `Optional` objects represent optional elements.
+#
+# *   `LookaheadRule` objects are like lookahead assertions in regular
+#     expressions.
+#
+# *   The `NoLineTerminatorHere` singleton object can appear between two other
+#     symbols to rule out line breaks between them.
+#
+# *   `ErrorSymbol` objects never match anything produced by the lexer. Instead
+#     they match an ErrorToken that's artificially injected into the token
+#     stream at runtime, by the parser itself, just before a token that does
+#     not match anything else.
+
+
+def is_concrete_element(e: Element) -> bool:
+    """True if parsing the element `e` produces a value.
+
+    A production's concrete elements can be used in reduce expressions.
+    """
+    return not isinstance(e, (LookaheadRule, ErrorSymbol, NoLineTerminatorHereClass))
+
+
+# Nonterminals in the ECMAScript grammar can be parameterized; NtParameter is
+# the type of the parameters.
+#
+# For example, `BindingIdentifier[?Yield, ?Await]` is represented as
+# `Nt('BindingIdentifier', (('Yield', Var('Yield')), ('Await', Var('Await'))))`.
+#
+# A nonterminal-parameter-expression is represented by either a Var object or
+# the actual value, a boolean. (In theory, parameters don't *have* to be
+# boolean; all the code would probably work for anything hashable. In practice,
+# all parameters in the ECMAScript grammar are boolean.)
+NtParameter = typing.Hashable
+
+
+class Nt:
+    """Nt(name, ((param0, arg0), ...)) - An invocation of a nonterminal.
+
+    Nonterminals are like lambdas. Each nonterminal in a grammar is defined by an
+    NtDef which has 0 or more parameters.
+
+    Parameter names `param0...` are strings. The actual arguments `arg0...` are
+    NtParameters (see above).
+    """
+
+    __slots__ = ['name', 'args']
+
+    name: typing.Union[str, InitNt]
+    args: typing.Tuple[typing.Tuple[str, NtParameter], ...]
+
+    def __init__(self,
+                 name: typing.Union[str, InitNt],
+                 args: typing.Tuple[typing.Tuple[str, NtParameter], ...] = ()):
+        self.name = name
+        self.args = args
+
+    def __hash__(self) -> int:
+        return hash(('nt', self.name, self.args))
+
+    def __eq__(self, other: object) -> bool:
+        return (isinstance(other, Nt)
+                and (self.name, self.args) == (other.name, other.args))
+
+    def __repr__(self) -> str:
+        if self.args:
+            return 'Nt({!r}, {!r})'.format(self.name, self.args)
+        else:
+            return 'Nt({!r})'.format(self.name)
+
+    def pretty(self) -> str:
+        """Unique version of this Nt to use in the Python runtime.
+
+        Also used in debug/verbose output.
+        """
+        def arg_to_str(name: str, value: NtParameter) -> str:
+            if value is True:
+                return '+' + name
+            elif value is False:
+                return '~' + name
+            elif isinstance(value, Var):
+                if value.name == name:
+                    return '?' + value.name
+                return name + "=" + value.name
+            else:
+                return name + "=" + repr(value)
+
+        if isinstance(self.name, InitNt):
+            return "Start_" + self.name.goal.pretty()
+        if len(self.args) == 0:
+            return self.name
+        return "{}[{}]".format(self.name,
+                               ", ".join(arg_to_str(name, value)
+                                         for name, value in self.args))
+
+
+@dataclass(frozen=True)
+class Optional:
+    """Optional(nt) matches either nothing or the given nt.
+
+    Optional elements are expanded out before states are calculated, so the
+    core of the algorithm never sees them.
+    """
+    inner: Element
+
+
+@dataclass(frozen=True)
+class Literal:
+    """Literal(str) matches a sequence of characters.
+
+    Literal elements are sequences of characters which are expected to appear
+    verbatim in the input.
+    """
+    text: str
+
+
+@dataclass(frozen=True)
+class UnicodeCategory:
+    """UnicodeCategory(str) matches any character with a category matching
+    the cat_prefix.
+
+    UnicodeCategory elements are a set of literal elements which correspond to a
+    given unicode cat_prefix.
+    """
+    cat_prefix: str
+
+
+@dataclass(frozen=True)
+class LookaheadRule:
+    """LookaheadRule(set, pos) imposes a lookahead restriction on whatever follows.
+
+    It never consumes any tokens itself. Instead, the right-hand side
+    [LookaheadRule(frozenset(['a', 'b']), False), 'Thing']
+    matches a Thing that does not start with the token `a` or `b`.
+    """
+    set: typing.FrozenSet[str]
+    positive: bool
+
+
+# A lookahead restriction really just specifies a set of allowed terminals.
+#
+# -   No lookahead restriction at all is equivalent to a rule specifying all terminals.
+#
+# -   A positive lookahead restriction explicitly lists all allowed tokens.
+#
+# -   A negative lookahead restriction instead specifies the set of all tokens
+#     except a few.
+#
+def lookahead_contains(rule: typing.Optional[LookaheadRule], t: str) -> bool:
+    """True if the given lookahead restriction `rule` allows the terminal `t`."""
+    return (rule is None
+            or (t in rule.set if rule.positive
+                else t not in rule.set))
+
+
+def lookahead_intersect(
+        a: typing.Optional[LookaheadRule],
+        b: typing.Optional[LookaheadRule]
+) -> typing.Optional[LookaheadRule]:
+    """Returns a single rule enforcing both `a` and `b`, allowing only terminals that pass both."""
+    if a is None:
+        return b
+    elif b is None:
+        return a
+    elif a.positive:
+        if b.positive:
+            return LookaheadRule(a.set & b.set, True)
+        else:
+            return LookaheadRule(a.set - b.set, True)
+    else:
+        if b.positive:
+            return LookaheadRule(b.set - a.set, True)
+        else:
+            return LookaheadRule(a.set | b.set, False)
+
+
+class NoLineTerminatorHereClass:
+    def __str__(self) -> str:
+        return 'NoLineTerminatorHere'
+
+
+NoLineTerminatorHere = NoLineTerminatorHereClass()
+
+
+# Optional elements. These are expanded out before states are calculated,
+# so the core of the algorithm never sees them.
+@dataclass(frozen=True)
+class Exclude:
+    """Exclude(nt1, nt2) matches if nt1 matches and nt2 does not."""
+    inner: Element
+    exclusion_list: typing.Tuple[Element, ...]
+
+
+# End. This is used to represent the terminal which is infinitely produced by
+# the lexer when input end is reached.
+@dataclass(frozen=True)
+class End:
+    """End() represents the end of the input content."""
+
+
+# Special grammar symbol that can be consumed to handle a syntax error.
+#
+# The error code is passed to an error-handling routine at run time which
+# decides if the error is recoverable or not.
+@dataclass(frozen=True)
+class ErrorSymbol:
+    """Special grammar symbol that can be consumed to handle a syntax error."""
+    error_code: int
+
+
+Element = typing.Union[
+    str,
+    Optional,
+    Literal,
+    UnicodeCategory,
+    Exclude,
+    Nt,
+    LookaheadRule,
+    End,
+    ErrorSymbol,
+    NoLineTerminatorHereClass,
+    CallMethod]
+
+
+@dataclass
+class NtDef:
+    """Definition of a nonterminal.
+
+    Instances have three attributes:
+
+    .params - Tuple of strings, the names of the parameters.
+
+    .rhs_list - List of Production objects. Arguments to Nt elements in the
+    productions can be Var(s) where `s in params`, indicating that parameter
+    should be passed through unchanged.
+
+    .type - The type of runtime value produced by parsing an instance of this
+    nonterminal, or None.
+
+    An NtDef is a sort of lambda.
+
+    Some langauges have constructs that are allowed or disallowed in particular
+    situations. For example, in many languages `return` statements are allowed
+    only inside functions or methods. The ECMAScript standard (5.1.5 "Grammar
+    Notation") offers this example of the notation it uses to specify this sort
+    of thing:
+
+        StatementList [Return] :
+            [+Return] ReturnStatement
+            ExpressionStatement
+
+    This is an abbreviation for:
+
+        StatementList :
+            ExpressionStatement
+
+        StatementList_Return :
+            ReturnStatement
+            ExpressionStatement
+
+    We offer NtDef.params as a way of representing this in our system.
+
+        "StatementList": NtDef(("Return",), [
+            Production(["ReturnStatement"], condition=("Return", True)),
+            ["ExpressionStatement"],
+        ], None),
+
+    This is an abbreviation for:
+
+        "StatementList_0": [
+            ["ExpressionStatement"],
+        ],
+        "StatementList_1": [
+            ["ReturnStatement"],
+            ["ExpressionStatement"],
+        ],
+
+    """
+
+    __slots__ = ['params', 'rhs_list', 'type']
+
+    params: typing.Tuple[str, ...]
+    rhs_list: typing.List[Production]
+    type: typing.Optional[types.Type]
+
+    def with_rhs_list(self, new_rhs_list: typing.List[Production]) -> NtDef:
+        return dataclasses.replace(self, rhs_list=new_rhs_list)
+
+
+@dataclass(frozen=True)
+class Var:
+    """Var(name) represents the run-time value of the parameter with the given name."""
+
+    name: str
+
+
+ReduceExpr = typing.Union[int, CallMethod, None, Some]
+ReduceExprOrAccept = typing.Union[ReduceExpr, str]
+
+# The Grammar constructor is very lax about the types you pass to it. It can
+# accept a `Dict[str, List[List[str]]]`, for example; it copies the data into
+# NtDef and Production objects.
+#
+# The `Lenient` types below are the relaxed types that Grammar() actually
+# accepts.
+LenientNt = typing.Union[Nt, str, InitNt]
+LenientProduction = typing.Union[Production, typing.List[Element]]
+LenientNtDef = typing.Union[NtDef, typing.List[LenientProduction]]