diff options
Diffstat (limited to 'third_party/rust/jsparagus/tests')
-rw-r--r-- | third_party/rust/jsparagus/tests/__init__.py | 0 | ||||
-rwxr-xr-x | third_party/rust/jsparagus/tests/test.py | 1204 | ||||
-rw-r--r-- | third_party/rust/jsparagus/tests/test_js.py | 207 | ||||
-rw-r--r-- | third_party/rust/jsparagus/tests/test_parse_pgen.py | 33 |
4 files changed, 1444 insertions, 0 deletions
diff --git a/third_party/rust/jsparagus/tests/__init__.py b/third_party/rust/jsparagus/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/rust/jsparagus/tests/__init__.py diff --git a/third_party/rust/jsparagus/tests/test.py b/third_party/rust/jsparagus/tests/test.py new file mode 100755 index 0000000000..2d064098b5 --- /dev/null +++ b/third_party/rust/jsparagus/tests/test.py @@ -0,0 +1,1204 @@ +#!/usr/bin/env python3 + +import io +import re +import unittest +import typing + +import jsparagus +from jsparagus import gen, lexer, rewrites +from jsparagus.grammar import (Grammar, Production, CallMethod, Nt, + Optional, LookaheadRule, NtDef, Var) +from js_parser.parse_esgrammar import parse_esgrammar + + +LispTokenizer = lexer.LexicalGrammar("( )", SYMBOL=r'[!%&*+:<=>?@A-Z^_a-z~]+') + + +def prod(body, method_name): + return Production(body, CallMethod(method_name, list(range(len(body))))) + + +class GenTestCase(unittest.TestCase): + def compile(self, tokenize, grammar, **kwargs): + """Compile a grammar. Use this when you expect compilation to + succeed.""" + self.tokenize = tokenize + self.parser_class = gen.compile(grammar, **kwargs) + + def parse(self, text, goal=None): + if goal is None: + parser = self.parser_class() + else: + parser = self.parser_class(goal=goal) + lexer = self.tokenize(parser) + lexer.write(text) + return lexer.close() + + def compile_multi(self, tokenize, grammar): + self.tokenize = tokenize + obj = gen.compile_multi(grammar) + for attr in dir(obj): + if attr.startswith("parse_"): + setattr(self, attr, getattr(obj, attr)) + + def assertParse(self, s, expected=None, *, goal=None): + result = self.parse(s, goal=goal) + if expected is not None: + self.assertEqual(expected, result) + + def assertNoParse(self, s, *, goal=None, message="banana"): + if goal is None: + kwargs = {} + else: + kwargs = {"goal": goal} + self.assertRaisesRegex( + SyntaxError, + re.escape(message), + lambda: self.parse(s, **kwargs)) + + def testSimple(self): + grammar = parse_esgrammar( + """ + expr : + SYMBOL => $0 + `(` tail + + tail : + `)` => $0 + expr tail + """, + terminal_names=["SYMBOL"] + ) + self.compile(LispTokenizer, grammar) + + self.assertParse( + "(lambda (x) (* x x))", + ('expr_1', + '(', + ('tail_1', + 'lambda', + ('tail_1', + ('expr_1', '(', ('tail_1', 'x', ')')), + ('tail_1', + ('expr_1', + '(', + ('tail_1', + '*', + ('tail_1', + 'x', + ('tail_1', 'x', ')')))), + ')'))))) + + def testEnd(self): + self.compile( + lexer.LexicalGrammar("ONE TWO"), + Grammar({ + 'goal': [ + ['ONE', 'TWO'] + ] + }) + ) + self.assertNoParse("ONE TWO TWO", + message="expected 'end of input', got 'TWO'") + + def testList(self): + list_grammar = Grammar({ + 'prelist': [ + ['word', 'list'] + ], + 'list': [ + ['word'], + ['list', 'word'], + ], + 'word': [ + ['SYMBOL'] + ], + }) + self.compile(LispTokenizer, list_grammar) + self.assertParse( + "the quick brown fox jumped over the lazy dog", + ('prelist', + 'the', + ('list_1', + ('list_1', + ('list_1', + ('list_1', + ('list_1', + ('list_1', + ('list_1', + 'quick', + 'brown'), + 'fox'), + 'jumped'), + 'over'), + 'the'), + 'lazy'), + 'dog'))) + + def testArithmetic(self): + tokenize = lexer.LexicalGrammar( + "+ - * / ( )", + NUM=r'[0-9]\w*', + VAR=r'[A-Za-z]\w*') + arith_grammar = Grammar({ + 'expr': [ + ['term'], + ['expr', '+', 'term'], + ['expr', '-', 'term'], + ], + 'term': [ + ['prim'], + ['term', '*', 'prim'], + ['term', '/', 'prim'], + ], + 'prim': [ + ['NUM'], + ['VAR'], + ['(', 'expr', ')'], + ], + }) + self.compile(tokenize, arith_grammar) + + self.assertParse( + '2 * 3 + 4 * (5 + 7)', + ('expr_1', + ('term_1', '2', '*', '3'), + '+', + ('term_1', + '4', + '*', + ('prim_2', + '(', + ('expr_1', '5', '+', '7'), + ')')))) + + self.assertNoParse( + "(", + message="unexpected end of input") + self.assertNoParse( + ")", + message="expected one of ['(', 'NUM', 'VAR'], got ')'") + + def testAmbiguous(self): + # This grammar should fail verification. + # It's ambiguous: is ABC s(A)y(BC) or s(AB)y(C)? + grammar = Grammar({ + 'goal': [ + ['s', 'y'], + ], + 's': [ + ['A'], + ['s', 'B'], + ], + 'y': [ + ['C'], + ['B', 'C'], + ], + }) + + out = io.StringIO() + self.assertRaisesRegex(ValueError, r"conflict", + lambda: gen.generate_parser(out, grammar)) + + def testAmbiguousEmpty(self): + """Reject grammars that are ambiguous due to empty productions. + + (Empty productions are ones that match the empty string.)""" + + def check(rules): + grammar = Grammar(rules, goal_nts=['goal']) + out = io.StringIO() + self.assertRaisesRegex( + ValueError, + r"ambiguous grammar|conflict", + lambda: gen.generate_parser(out, grammar)) + + check({'goal': [[], []]}) + check({'goal': [[Optional('X')], []]}) + check({'goal': [[Optional('X')], [Optional('Y')]]}) + check({'goal': [[Optional('X'), Optional('Y')], [Optional('Z')]]}) + + # Issue #3: This also has an abiguity; empty string matches either + # `goal ::= [empty]` or `goal ::= phrase, phrase ::= [empty]`. + check({ + 'goal': [[Optional('phrase')]], + 'phrase': [[Optional('X')]], + }) + + # Input "X" is ambiguous, could be ('goal', ('a', None), ('a', 'X')) + # or the other 'a' could be the one that's missing. + check({ + 'goal': [['a', 'a']], + 'a': [[Optional('X')]], + }) + + def testLeftFactor(self): + """Most basic left-factoring test.""" + tokenize = lexer.LexicalGrammar("A B") + grammar = Grammar({ + 'goal': [ + ['A'], + ['A', 'B'], + ], + }) + + self.compile(tokenize, grammar) + self.assertParse("A", 'A') + self.assertParse("A B", ('goal_1', 'A', 'B')) + + def testLeftFactorMulti(self): + """Test left-factoring with common prefix of length >1.""" + tokenize = lexer.LexicalGrammar("A B C D E") + grammar = Grammar({ + 'goal': [ + ['A', 'B', 'C', 'D'], + ['A', 'B', 'C', 'E'], + ], + }) + self.compile(tokenize, grammar) + self.assertParse( + "A B C D", + ('goal_0', 'A', 'B', 'C', 'D')) + self.assertParse( + "A B C E", + ('goal_1', 'A', 'B', 'C', 'E')) + + def testLeftFactorMultiLevel(self): + """Test left-factoring again on a nonterminal introduced by + left-factoring.""" + tokenize = lexer.LexicalGrammar("FOR IN TO BY ( ) = ;", + VAR=r'[A-Za-z]+') + + # The first left-factoring pass on `stmt` will left-factor `FOR ( VAR`. + # A second pass is needed to left-factor `= expr TO expr`. + grammar = Grammar({ + 'stmt': [ + ['expr', ';'], + ['FOR', '(', 'VAR', 'IN', 'expr', ')', 'stmt'], + ['FOR', '(', 'VAR', '=', 'expr', 'TO', 'expr', ')', 'stmt'], + ['FOR', '(', 'VAR', '=', 'expr', 'TO', 'expr', + 'BY', 'expr', ')', 'stmt'], + ['IF', '(', 'expr', ')', 'stmt'], + ], + 'expr': [ + ['VAR'], + ], + }) + self.compile(tokenize, grammar) + self.assertParse( + "FOR (x IN y) z;", + ('stmt_1', 'FOR', '(', 'x', 'IN', 'y', ')', + ('stmt_0', 'z', ';'))) + self.assertParse( + "FOR (x = y TO z) x;", + ('stmt_2', 'FOR', '(', 'x', '=', 'y', 'TO', 'z', ')', + ('stmt_0', 'x', ';'))) + self.assertParse( + "FOR (x = y TO z BY w) x;", + ('stmt_3', 'FOR', '(', 'x', '=', 'y', 'TO', 'z', 'BY', 'w', ')', + ('stmt_0', 'x', ';'))) + + def testFirstFirstConflict(self): + """This grammar is unambiguous, but is not LL(1) due to a first/first conflict. + + Cribbed from: https://stackoverflow.com/a/17047370/94977 + """ + + tokenize = lexer.LexicalGrammar("A B C") + grammar = Grammar({ + 's': [ + ['x', 'B'], + ['y', 'C'], + ], + 'x': [ + prod(['A'], "x"), + ], + 'y': [ + prod(['A'], "y"), + ], + }) + self.compile(tokenize, grammar) + + self.assertParse("A B", ('s_0', ('x', 'A'), 'B')) + self.assertParse("A C", ('s_1', ('y', 'A'), 'C')) + + def testLeftHandSideExpression(self): + """Example of a grammar that's in SLR(1) but hard to smoosh into an LL(1) form. + + This is taken from the ECMAScript grammar. + + ...Of course, it's not really possible to enforce the desired syntactic + restrictions in LR(k) either; the ES grammar matches `(x + y) = z` and + an additional attribute grammar (IsValidSimpleAssignmentTarget) is + necessary to rule it out. + """ + self.compile( + lexer.LexicalGrammar("= +", VAR=r'[a-z]+\b'), + Grammar({ + 'AssignmentExpression': [ + ['AdditiveExpression'], + ['LeftHandSideExpression', '=', 'AssignmentExpression'], + ], + 'AdditiveExpression': [ + ['LeftHandSideExpression'], + ['AdditiveExpression', '+', 'LeftHandSideExpression'], + ], + 'LeftHandSideExpression': [ + ['VAR'], + ] + }) + ) + self.assertParse("z = x + y") + self.assertNoParse( + "x + y = z", + message="expected one of ['+', 'end of input'], got '='") + + def testDeepRecursion(self): + grammar = Grammar({ + 'expr': [ + ['SYMBOL'], + ['(', ')'], + ['(', 'exprs', ')'], + ], + 'exprs': [ + ['expr'], + ['exprs', 'expr'], + ], + }) + self.compile(LispTokenizer, grammar) + + N = 3000 + s = "x" + t = ('expr_0', 'x') + for i in range(N): + s = "(" + s + ")" + t = ('expr_2', '(', t, ')') + + result = self.parse(s) + + # Python can't check that result == t; it causes a RecursionError. + # Testing that repr(result) == repr(t), same deal. So: + for i in range(N): + self.assertIsInstance(result, tuple) + self.assertEqual(len(result), 4) + self.assertEqual(result[0], 'expr_2') + self.assertEqual(result[1], '(') + self.assertEqual(result[3], ')') + result = result[2] + + def testExpandOptional(self): + grammar = Grammar({'goal': [[]]}) + empties = {} + # Unit test for rewrites.expand_optional_symbols_in_rhs + self.assertEqual( + list(rewrites.expand_optional_symbols_in_rhs(['ONE', 'TWO', '3'], + grammar, empties)), + [(['ONE', 'TWO', '3'], {})]) + self.assertEqual( + list(rewrites.expand_optional_symbols_in_rhs( + ['a', 'b', Optional('c')], grammar, empties)), + [(['a', 'b'], {2: None}), + (['a', 'b', 'c'], {})]) + self.assertEqual( + list(rewrites.expand_optional_symbols_in_rhs( + [Optional('a'), Optional('b')], grammar, empties)), + [([], {0: None, 1: None}), + (['a'], {1: None}), + (['b'], {0: None}), + (['a', 'b'], {})]) + + def testEmptyGrammar(self): + tokenize = lexer.LexicalGrammar("X") + self.compile(tokenize, Grammar({'goal': [[]]})) + self.assertParse("", ('goal',)) + self.assertNoParse( + "X", + message="expected 'end of input', got 'X' (line 1)") + + def testOptionalEmpty(self): + tokenize = lexer.LexicalGrammar("X Y") + grammar = Grammar({ + 'a': [ + [Optional('b'), Optional('c')], + ], + 'b': [ + prod(['X'], 'b'), + ], + 'c': [ + prod(['Y'], 'c'), + ] + }) + self.compile(tokenize, grammar) + self.assertParse("", ('a', None, None)) + self.assertParse("X", ('a', ('b', 'X'), None)) + self.assertParse("Y", ('a', None, ('c', 'Y'))) + self.assertParse("X Y", ('a', ('b', 'X'), ('c', 'Y'))) + + def testOptional(self): + tokenize = lexer.LexicalGrammar('[ ] , X') + grammar = Grammar({ + 'array': [ + ['[', Optional('elision'), ']'], + ['[', 'elements', ']'], + ['[', 'elements', ',', Optional('elision'), ']'] + ], + 'elements': [ + [Optional('elision'), 'X'], + ['elements', ',', Optional('elision'), 'X'] + ], + 'elision': [ + [','], + ['elision', ','] + ] + }) + self.compile(tokenize, grammar) + self.assertParse("[]", + ('array_0', '[', None, ']')) + self.assertParse("[,]", + ('array_0', '[', ',', ']')) + self.assertParse( + "[,,X,,X,]", + ('array_2', + '[', + ('elements_1', + ('elements_0', + ('elision_1', + ',', + ','), + 'X'), + ',', + ',', + 'X'), + ',', + None, + ']')) + + def testPositiveLookahead(self): + self.compile( + lexer.LexicalGrammar('A B + ( )'), + Grammar({ + 'goal': [ + [LookaheadRule(frozenset({'A', 'B'}), True), 'expr'], + ], + 'expr': [ + ['term'], + ['expr', '+', 'term'], + ], + 'term': [ + ['A'], + ['B'], + ['(', 'expr', ')'], + ] + }) + ) + self.assertNoParse( + "(A)", + message="expected one of ['A', 'B'], got '('") + self.assertParse("A + B") + + def testNegativeLookahead(self): + tokenize = lexer.LexicalGrammar('a b') + rules = { + 'goal': [ + [LookaheadRule(frozenset({'a'}), False), 'abs'], + ], + 'abs': [ + ['a'], + ['b'], + ['abs', 'a'], + ['abs', 'b'], + ], + } + + self.compile(tokenize, Grammar(rules)) + self.assertNoParse("a b", message="expected 'b', got 'a'") + self.assertParse( + 'b a', + ('goal', ('abs_2', 'b', 'a'))) + + # In simple cases like this, the lookahead restriction can even + # disambiguate a grammar that would otherwise be ambiguous. + rules['goal'].append(prod(['a'], 'goal_a')) + self.compile(tokenize, Grammar(rules)) + self.assertParse('a', ('goal_a', 'a')) + + def disabledNegativeLookaheadDisambiguation(self): + tokenize = lexer.LexicalGrammar( + '( ) { } ; function =', + IDENT=r'[A-Za-z_][A-Za-z_0-9]*') + grammar = Grammar({ + 'stmts': [ + ['stmt'], + ['stmts', 'stmt'], + ], + 'stmt': [ + [LookaheadRule(set=frozenset({'function'}), positive=False), + 'expr', ';'], + ['fndecl'], + ], + 'fndecl': [ + ['function', 'IDENT', '(', ')', '{', Optional('stmt'), '}'], + ], + 'expr': [ + ['term'], + ['IDENT', '=', 'expr'], + ], + 'term': [ + ['(', 'expr', ')'], + ['fndecl'], + ['term', '(', 'expr', ')'], + ], + }) + self.compile(tokenize, grammar) + + # Test that without the lookahead restriction, we reject this grammar + # (it's ambiguous): + del grammar['stmt'][0][0] + self.assertRaisesRegex(ValueError, + 'banana', + lambda: gen.compile(grammar)) + + self.assertParse( + 'function f() { x = function y() {}; }', + ('stmt', 1, + ('fndecl', + 'function', 'f', '(', ')', '{', + ('stmt', 0, + ('expr', 1, + 'x', + '=', + ('expr', 0, + ('term', 1, + ('fndecl', + 'function', 'y', '(', ')', + '{', None, '}')))), + ';')))) + + self.assertParse( + '(function g(){});', + ('stmts', 0, + ('stmt', 0, + ('term', 1, + ('fndecl', + 'function', 'g', '(', ')', '{', None, '}')), + ';'))) + + def testTrailingLookahead(self): + """Lookahead at the end of a production is banned.""" + tokenize = lexer.LexicalGrammar('IF ( X ) ELSE OTHER ;') + grammar = gen.Grammar({ + 'goal': [['stmt']], + 'stmt': [ + ['OTHER', ';'], + ['IF', '(', 'X', ')', 'stmt', + LookaheadRule(frozenset({'ELSE'}), False)], + ['IF', '(', 'X', ')', 'stmt', 'ELSE', 'stmt'], + ], + }) + + def stmt_0(): + return ('stmt_0', 'OTHER', ';') + + def stmt_1(t): + return ('stmt_1', 'IF', '(', 'X', ')', t) + + def stmt_2(t, e): + return ('stmt_2', 'IF', '(', 'X', ')', t, 'ELSE', e) + + self.compile(tokenize, grammar) + self.assertParse('IF(X) OTHER;', stmt_1(stmt_0())) + self.assertParse('IF(X) OTHER; ELSE OTHER;', + stmt_2(stmt_0(), stmt_0())) + self.assertParse('IF(X) IF(X) OTHER; ELSE OTHER; ELSE OTHER;', + stmt_2(stmt_2(stmt_0(), stmt_0()), stmt_0())) + self.assertParse('IF(X) OTHER; ELSE IF(X) OTHER; ELSE OTHER;', + stmt_2(stmt_0(), stmt_2(stmt_0(), stmt_0()))) + self.assertParse('IF(X) IF(X) OTHER; ELSE OTHER;', + stmt_1(stmt_2(stmt_0(), stmt_0()))) + + def testLookaheadBeforeOptional(self): + self.compile( + lexer.LexicalGrammar( + '= : _', + PUBLIC=r'public\b', + IDENT=r'[a-z]+\b', + NUM=r'[0-9]\b'), + Grammar({ + 'decl': [ + [ + LookaheadRule(frozenset({'IDENT'}), True), + Optional('attrs'), + 'pat', '=', 'NUM' + ], + ], + 'attrs': [ + ['attr'], + ['attrs', 'attr'], + ], + 'attr': [ + ['PUBLIC', ':'], + ['IDENT', ':'], + ], + 'pat': [ + ['IDENT'], + ['_'], + ], + }) + ) + self.assertEqual( + self.parse("x = 0"), + ("decl", None, "x", "=", "0")) + self.assertParse("thread: x = 0") + self.assertNoParse( + "public: x = 0", + message="expected 'IDENT', got 'public'") + self.assertNoParse("_ = 0", message="expected 'IDENT', got '_'") + self.assertParse("funny: public: x = 0") + self.assertParse("funny: _ = 0") + + def testForLookahead(self): + grammar = Grammar({ + 'Stmt': [ + [';'], + ['ForStmt'], + ], + 'ForStmt': [ + ["for", "(", LookaheadRule(frozenset({"let"}), False), + "Expr", ";", ";", ")", "Stmt"], + ], + 'Expr': [ + ["0"], + ["let"], + ], + }) + self.compile(lexer.LexicalGrammar("for ( let ; ) 0"), grammar) + self.assertParse("for (0;;) ;") + self.assertNoParse("for (let;;) ;", message="expected '0', got 'let'") + + def testLookaheadDisambiguation(self): + """A lookahead restriction should be able to rule out certain nonterminals entirely.""" + + grammar = Grammar({ + 'Script': [ + ['Statement'], + ['Statement', 'Statement'], + ], + 'Statement': [ + [LookaheadRule(frozenset({'function'}), False), 'Expression', ';'], + ['Function'], + ], + 'Function': [ + ['function', 'x', '(', ')', '{', '}'], + ], + 'Expression': [ + ['Primary'], + ['++', 'Primary'], + ['Primary', '++'], + ], + 'Primary': [ + ['Function'], + ['x'], + ], + }) + + self.compile(lexer.LexicalGrammar("function x ( ) { } ++ ;"), grammar) + self.assertParse("function x() {}") + self.assertParse("++function x() {};") + self.assertNoParse("++function x() {}", message="unexpected end") + # TODO: The parser generator fails to handle this case because it does + # not forward the restriction from producting a Function to the + # Primitive rule. Therefore, `Function [lookahead: ;]` is incorrectly + # reduced to a `Primitive [lookahead: ;]` + # self.assertNoParse("function x() {}++;", message="got ';'") + self.assertParse("function x() {} ++x;") + + # XXX to test: combination of lookaheads, ++, +-, -+, -- + # XXX todo: find an example where lookahead canonicalization matters + + def testHugeExample(self): + grammar = Grammar( + { + 'grammar': [['nt_def_or_blank_line'], + ['grammar', 'nt_def_or_blank_line']], + 'arg': [['sigil', 'NT']], + 'args': [['arg'], ['args', ',', 'arg']], + 'definite_sigil': [['~'], ['+']], + 'exclusion': [['terminal'], + ['nonterminal'], + ['CHR', 'through', 'CHR']], + 'exclusion_list': [['exclusion'], + ['exclusion_list', 'or', 'exclusion']], + 'ifdef': [['[', 'definite_sigil', 'NT', ']']], + 'line_terminator': [['NT'], ['NTALT']], + 'lookahead_assertion': [ + ['==', 'terminal'], + ['!=', 'terminal'], + ['<!', 'NT'], + ['<!', '{', 'lookahead_exclusions', '}']], + 'lookahead_exclusion': [['lookahead_exclusion_element'], + ['lookahead_exclusion', + 'lookahead_exclusion_element']], + 'lookahead_exclusion_element': [['terminal'], + ['no_line_terminator_here']], + 'lookahead_exclusions': [['lookahead_exclusion'], + ['lookahead_exclusions', ',', + 'lookahead_exclusion']], + 'no_line_terminator_here': [ + ['[', 'no', 'line_terminator', 'here', ']']], + 'nonterminal': [['NT'], ['NTCALL', '[', 'args', ']']], + 'nt_def': [['nt_lhs', 'EQ', 'NL', 'rhs_lines', 'NL'], + ['nt_lhs', 'EQ', 'one', 'of', 'NL', + 't_list_lines', 'NL']], + 'nt_def_or_blank_line': [['NL'], ['nt_def']], + 'nt_lhs': [['NT'], ['NTCALL', '[', 'params', ']']], + 'param': [['NT']], + 'params': [['param'], ['params', ',', 'param']], + 'rhs': [['symbols'], ['[', 'empty', ']']], + 'rhs_line': [[Optional(inner='ifdef'), 'rhs', + Optional(inner='PRODID'), 'NL'], + ['PROSE', 'NL']], + 'rhs_lines': [['rhs_line'], ['rhs_lines', 'rhs_line']], + 'sigil': [['definite_sigil'], ['?']], + 'symbol': [['terminal'], + ['nonterminal'], + ['nonterminal', '?'], + ['nonterminal', 'but', 'not', 'exclusion'], + ['nonterminal', 'but', 'not', 'one', 'of', + 'exclusion_list'], + ['[', 'lookahead', 'lookahead_assertion', ']'], + ['no_line_terminator_here'], + ['WPROSE']], + 'symbols': [['symbol'], ['symbols', 'symbol']], + 't_list_line': [['terminal_seq', 'NL']], + 't_list_lines': [['t_list_line'], + ['t_list_lines', 't_list_line']], + 'terminal': [['T'], ['CHR']], + 'terminal_seq': [['terminal'], ['terminal_seq', 'terminal']] + }, + variable_terminals='EQ T CHR NTCALL NT NTALT ' + 'PRODID PROSE WPROSE'.split() + ) + + # Note: This lexical grammar is not suitable for use with incremental + # parsing. + emu_grammar_lexer = lexer.LexicalGrammar( + # the operators and keywords: + "[ ] { } , ~ + ? <! == != " + "but empty here lookahead no not of one or through", + NL="\n", + # any number of colons together + EQ=r':+', + # terminals of the ES grammar, quoted with backticks + T=r'`[^` \n]+`|```', + # also terminals, denoting control characters + CHR=r'<[A-Z]+>|U\+[0-9A-f]{4}', + # nonterminals that will be followed by boolean parameters + NTCALL=r'(?:uri|[A-Z])\w*(?=\[)', + # nonterminals (also, boolean parameters) + NT=r'(?:uri|[A-Z])\w*', + # nonterminals wrapped in vertical bars for no apparent reason + NTALT=r'\|[A-Z]\w+\|', + # the spec also gives a few productions names + PRODID=r'#[A-Za-z]\w*', + # prose to the end of the line + PROSE=r'>.*', + # prose wrapped in square brackets + WPROSE=r'\[>[^]]*\]', + ) + + self.compile(emu_grammar_lexer, grammar) + + source = """\ + IdentifierReference[Yield, Await] : + Identifier + [~Yield] `yield` + [~Await] `await` + + """ + + self.assertParse(source) + + def testParameterizedProductions(self): + passthru = ('Yield', Var('Yield')), + name = Nt("name", passthru) + stmt = Nt("stmt", passthru) + stmts = Nt("stmts", passthru) + grammar = Grammar({ + 'script': [ + ['def'], + ['script', 'def'], + ], + 'def': [ + [ + 'function', 'IDENT', '(', ')', '{', + Nt('stmts', (('Yield', False),)), '}' + ], + [ + 'function', '*', 'IDENT', '(', ')', '{', + Nt('stmts', (('Yield', True),)), '}' + ], + ], + 'stmts': NtDef(('Yield',), [ + [stmt], + [stmts, stmt], + ], None), + 'stmt': NtDef(('Yield',), [ + [name, "(", ")", ";"], + [name, "=", name, ";"], + Production(["yield", name, ";"], + reducer=CallMethod("yield_stmt", [1]), + condition=('Yield', True)), + ], None), + 'name': NtDef(('Yield',), [ + ["IDENT"], + # Specifically ask for a method here, because otherwise we + # wouldn't get one and then type checking would fail. + Production(["yield"], + CallMethod("yield_as_name", []), + condition=('Yield', False)), + ], None), + }, variable_terminals=["IDENT"]) + self.compile(lexer.LexicalGrammar("( ) { } ; * = function yield", + IDENT=r'[A-Za-z]\w*'), + grammar) + self.assertParse("function* farm() { cow = pig; yield cow; }") + self.assertNoParse( + "function city() { yield toOncomingTraffic; }", + message="expected one of ['(', '='], got 'toOncomingTraffic'") + self.assertNoParse( + "function* farm() { yield = corn; yield yield; }", + message="expected 'IDENT', got '='") + + def testMissingParameterError(self): + grammar = { + 'Foo': [ + ['Bar'], + ], + 'Bar': NtDef(('Arg',), [ + ['NUM'], + Production(['STR'], + reducer=0, + condition=('Arg', True)), + ], None), + } + + self.assertRaisesRegex(ValueError, "missing parameters for 'Bar'", + lambda: Grammar(grammar)) + + def testCanonicalLR(self): + """Example 4.39 (grammar 4.20) from the book.""" + + # Modified as marked below + grammar = Grammar({ + "S": [ + ["L", "=", "R"], + ["R"], + ], + "L": [ + ["*", "R"], + ["id"], + ], + "R": [ + ["L"], + # added so we can have a negative test, showing that + # `R = R` is not an S: + ["7"], + ], + }) + self.compile(lexer.LexicalGrammar("id = * 7"), grammar) + self.assertParse("id = *id") + self.assertParse("*id = id") + self.assertParse("id = 7") + self.assertNoParse("7 = id", + message="expected 'end of input', got '='") + + def testLookaheadWithCanonicalLR(self): + """Only a lookahead assertion makes this grammar unambiguous.""" + tokenize = lexer.LexicalGrammar("async => { } ;", Identifier=r'\w+') + grammar = Grammar({ + "script": [ + ["Expression", ";"], + ], + "Expression": [ + ["PrimaryExpression"], + ["async", "Identifier", "=>", "AsyncConciseBody"], + ], + "AsyncConciseBody": [ + [LookaheadRule(set=frozenset(["{"]), positive=False), + "Expression"], + ["{", "}"], + ], + "PrimaryExpression": [ + ["{", "}"], + ], + }) + + self.compile(tokenize, grammar) + self.assertParse("{};") + self.assertParse("async x => {};") + self.assertParse("async x => async y => {};") + + def testMultiGoal(self): + tokenize = lexer.LexicalGrammar("WHILE DEF FN { } ( ) -> ;", ID=r'\w+') + grammar = Grammar({ + "stmt": [ + ["expr", ";"], + ["{", "stmts", "}"], + ["WHILE", "(", "expr", ")", "stmt"], + ["DEF", "ID", "(", "ID", ")", "{", Optional("stmts"), "}"], + ], + "stmts": [ + ["stmt"], + ["stmts", "stmt"], + ], + "expr": [ + ["FN", "ID", "->", "expr"], + ["call_expr"], + ], + "call_expr": [ + ["ID"], + ["call_expr", "(", "expr", ")"], + ["(", "expr", ")"], + ], + }, goal_nts=["stmts", "expr"]) + self.compile(tokenize, grammar) + self.assertParse("WHILE ( x ) { decx ( x ) ; }", goal="stmts") + self.assertNoParse( + "WHILE ( x ) { decx ( x ) ; }", goal="expr", + message="expected one of ['(', 'FN', 'ID'], got 'WHILE'") + self.assertParse("f(x);", goal="stmts") + self.assertNoParse("f(x);", goal="expr", + message="expected 'end of input', got ';'") + self.assertParse("(FN x -> f ( x ))(x)", goal="expr") + self.assertNoParse("(FN x -> f ( x ))(x)", goal="stmts", + message="unexpected end of input") + + def testStaggeredItems(self): + """Items in a state can have different amounts of leading context.""" + # In this example grammar, after "A" "B", we're in a state that + # contains these two items (ignoring lookahead): + # goal ::= "A" "B" · y + # x ::= "B" · stars "X" + # + # Likewise, after `"A" "B" stars`, we have: + # x ::= "B" stars · "X" + # y ::= stars · "Y" + # stars ::= stars · "*" + tokenize = lexer.LexicalGrammar("A B * X Y") + grammar = Grammar({ + "goal": [ + ["A", "x"], + ["A", "B", "y"], + ], + "x": [ + ["B", "stars", "X"], + ], + "y": [ + ["stars", "Y"], + ], + "stars": [ + ["*"], + ["stars", "*"], + ], + }) + self.compile(tokenize, grammar) + self.assertParse("A B * * * X") + self.assertParse("A B * * * Y") + + def testCheckCycleFree(self): + tokenize = lexer.LexicalGrammar("!") + grammar = Grammar({ + "problem": [ + ["one", "two"], + ], + "one": [ + ["!"], + ], + "two": [ + [Optional("problem")], + ], + }) + self.compile(tokenize, grammar) + self.assertParse("! ! ! ! !") + + def testReduceActions(self): + tokenize = lexer.LexicalGrammar("+ - * / ( )", + NUM=r'[0-9]\w*', + VAR=r'[A-Za-z]\w*') + grammar = Grammar({ + "expr": [ + ["term"], + prod(["expr", "+", "term"], "add"), + prod(["expr", "-", "term"], "sub"), + ], + "term": [ + ["unary"], + prod(["term", "*", "unary"], "mul"), + prod(["term", "/", "unary"], "div"), + ], + "unary": [ + ["prim"], + prod(["-", "prim"], "neg"), + ], + "prim": [ + prod(["(", "expr", ")"], "parens"), + prod(["NUM"], "num"), + prod(["VAR"], "var"), + ], + }, goal_nts=['expr']) + + self.compile(tokenize, grammar) + self.assertParse("X", ('var', 'X')) + self.assertParse("3 + 4", ('add', ('num', '3'), '+', ('num', '4'))) + self.assertParse( + "2 * 3 + 4 * (5 + 7)", + ( + 'add', + ('mul', ('num', '2'), '*', ('num', '3')), + '+', + ( + 'mul', + ('num', '4'), + '*', + ('parens', '(', + ('add', ('num', '5'), '+', ('num', '7')), ')')))) + self.assertParse( + "1 / (1 + 1 / (1 + 1 / (1 + 1)))", + ( + 'div', ('num', '1'), '/', ( + 'parens', '(', ( + 'add', ('num', '1'), '+', ( + 'div', ('num', '1'), '/', ( + 'parens', '(', ( + 'add', ('num', '1'), '+', ( + 'div', ('num', '1'), '/', ( + 'parens', '(', ( + 'add', ('num', '1'), '+', + ('num', '1')), + ')'))), + ')'))), + ')'))) + + def testConvenienceMethodTypeInference(self): + """A method can be called only in an intermediate reduce expression.""" + + # The reduce expression `f(g($0))`. + reducer = CallMethod("f", [CallMethod("g", [0])]) + + # The grammar `goal ::= NAME => f(g($1))`. + grammar = Grammar( + { + 'goal': [Production(['NAME'], reducer)], + }, + variable_terminals=['NAME']) + + # Since the return value of f() is used as the value of a `goal`, + # we infer that f() returns a goal. + self.assertEqual( + grammar.methods['f'].return_type, + jsparagus.types.Type('goal')) + + # Since the return value of g() isn't used except as an argument, we + # just give it the type `g`. + self.assertEqual( + grammar.methods['g'].return_type, + jsparagus.types.Type('g')) + + # Since g() is passed to f(), we infer this: + self.assertEqual( + grammar.methods['f'].argument_types, + [jsparagus.types.Type('g')]) + + def testEpsilonFreeTransform(self): + tokenize = lexer.LexicalGrammar('{ } X') + grammar = Grammar({ + 'goal': [ + ['{', 'xlist', '}'], + ], + 'xlist': [ + [], + ['xlist', 'X'], + ], + }) + self.compile(tokenize, grammar) + self.assertParse("{}", ('goal', '{', ('xlist_0',), '}')) + + def compile_as_js( + self, + grammar_source: str, + goals: typing.Optional[typing.Iterable[str]] = None, + verbose: bool = False, + ) -> None: + """Like self.compile(), but generate a parser from ESGrammar, + with ASI support, using the JS lexer. + """ + from js_parser.lexer import JSLexer + from js_parser import load_es_grammar + from js_parser import generate_js_parser_tables + + grammar = parse_esgrammar( + grammar_source, + filename="es-simplified.esgrammar", + extensions=[], + goals=goals, + synthetic_terminals=load_es_grammar.ECMASCRIPT_SYNTHETIC_TERMINALS, + terminal_names=load_es_grammar.TERMINAL_NAMES_FOR_SYNTACTIC_GRAMMAR) + grammar = generate_js_parser_tables.hack_grammar(grammar) + base_parser_class = gen.compile(grammar, verbose=verbose) + + # "type: ignore" because poor mypy can't cope with the runtime codegen + # we're doing here. + class JSParser(base_parser_class): # type: ignore + def __init__(self, goal='Script', builder=None): + super().__init__(goal, builder) + self._goal = goal + # self.debug = True + + def clone(self): + return JSParser(self._goal, self.methods) + + def on_recover(self, error_code, lexer, stv): + """Check that ASI error recovery is really acceptable.""" + if error_code == 'asi': + if not self.closed and stv.term != '}' and not lexer.saw_line_terminator(): + lexer.throw("missing semicolon") + else: + assert error_code == 'do_while_asi' + + self.tokenize = JSLexer + self.parser_class = JSParser + + def testExtraGoal(self): + + grammar_source = """ +StuffToIgnore_ThisWorksAroundAnUnrelatedBug: + Identifier + IdentifierName + +Hat : + `^` + +ArrowFunction : + `^` `=>` + Hat `*` `=>` + +Script : + `?` `?` ArrowFunction + `?` `?` [lookahead <! {`async`} ] Hat `of` + +LazyArrowFunction : + ArrowFunction + """ + + def try_it(goals): + self.compile_as_js(grammar_source, goals=goals) + self.assertParse("? ? ^ =>", goal='Script') + self.assertParse("? ? ^ of", goal='Script') + + try_it(['Script', 'LazyArrowFunction']) + try_it(['Script']) + + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/rust/jsparagus/tests/test_js.py b/third_party/rust/jsparagus/tests/test_js.py new file mode 100644 index 0000000000..571232f77a --- /dev/null +++ b/third_party/rust/jsparagus/tests/test_js.py @@ -0,0 +1,207 @@ +""" Tests for the JS parser. """ + +import unittest +import jsparagus.lexer +from js_parser.parser import parse_Script, JSParser +from js_parser.lexer import JSLexer + + +class ESTestCase(unittest.TestCase): + def parse(self, s): + if isinstance(s, list): + f = JSLexer(JSParser()) + for chunk in s: + f.write(chunk) + return f.close() + else: + return parse_Script(s) + + def assert_parses(self, s): + self.parse(s) + + def assert_incomplete(self, s): + """Assert that s fails to parse with UnexpectedEndError. + + (This should be the case if `s` is a prefix of a valid Script.) + """ + self.assertRaises(jsparagus.lexer.UnexpectedEndError, + lambda: parse_Script(s)) + + def assert_syntax_error(self, s): + """Assert that s fails to parse.""" + with self.assertRaises(jsparagus.lexer.SyntaxError): + parse_Script(s) + + def assert_can_close_after(self, s): + parser = JSParser() + lexer = JSLexer(parser) + if isinstance(s, list): + for chunk in s: + lexer.write(chunk) + else: + lexer.write(s) + self.assertTrue(lexer.can_close()) + + # === Tests! + + def test_asi_at_end(self): + self.assert_parses("3 + 4") + self.assert_syntax_error("3 4") + self.assert_incomplete("3 +") + self.assert_incomplete("{") + self.assert_incomplete("{;") + + def test_asi_at_block_end(self): + self.assert_parses("{ doCrimes() }") + self.assert_parses("function f() { ok }") + + def test_asi_after_line_terminator(self): + self.assert_parses('''\ + switch (value) { + case 1: break + case 2: console.log('2'); + } + ''') + self.assert_syntax_error( + "switch (value) { case 1: break case 2: console.log('2'); }") + + def test_asi_after_no_line_terminator_here(self): + self.assert_parses('''\ + function f() { + return + x; + } + ''') + + def test_asi_suppressed(self): + # The specification says ASI does not happen in the production + # EmptyStatement : `;`. + self.assert_syntax_error("if (true)") + self.assert_syntax_error("{ for (;;) }") + + # ASI does not happen in for(;;) loops. + self.assert_syntax_error("for ( \n ; ) {}") + self.assert_syntax_error("for ( ; \n ) {}") + self.assert_syntax_error("for ( \n \n ) {}") + self.assert_syntax_error("for (var i = 0 \n i < 9; i++) {}") + self.assert_syntax_error("for (var i = 0; i < 9 \n i++) {}") + self.assert_syntax_error("for (i = 0 \n i < 9; i++) {}") + self.assert_syntax_error("for (i = 0; i < 9 \n i++) {}") + self.assert_syntax_error("for (let i = 0 \n i < 9; i++) {}") + + # ASI is suppressed in the production ClassElement[Yield, Await] : `;` + # to prevent an infinite loop of ASI. lol + self.assert_syntax_error("class Fail { \n +1; }") + + def test_if_else(self): + self.assert_parses("if (x) f();") + self.assert_incomplete("if (x)") + self.assert_parses("if (x) f(); else g();") + self.assert_incomplete("if (x) f(); else") + self.assert_parses("if (x) if (y) g(); else h();") + self.assert_parses("if (x) if (y) g(); else h(); else j();") + + def test_lexer_decimal(self): + self.assert_parses("0.") + self.assert_parses(".5") + self.assert_syntax_error(".") + + def test_arrow(self): + self.assert_parses("x => x") + self.assert_parses("f = x => x;") + self.assert_parses("(x, y) => [y, x]") + self.assert_parses("f = (x, y) => {}") + self.assert_syntax_error("(x, y) => {x: x, y: y}") + + def test_invalid_character(self): + self.assert_syntax_error("\0") + self.assert_syntax_error("—x;") + self.assert_syntax_error("const ONE_THIRD = 1 ÷ 3;") + + def test_regexp(self): + self.assert_parses(r"/\w/") + self.assert_parses("/[A-Z]/") + self.assert_parses("/[//]/") + self.assert_parses("/a*a/") + self.assert_parses("/**//x*/") + self.assert_parses("{} /x/") + self.assert_parses("of / 2") + + def test_incomplete_comments(self): + self.assert_syntax_error("/*") + self.assert_syntax_error("/* hello world") + self.assert_syntax_error("/* hello world *") + self.assert_parses(["/* hello\n", " world */"]) + self.assert_parses(["// oawfeoiawj", "ioawefoawjie"]) + self.assert_parses(["// oawfeoiawj", "ioawefoawjie\n ok();"]) + self.assert_parses(["// oawfeoiawj", "ioawefoawjie", "jiowaeawojefiw"]) + self.assert_parses(["// oawfeoiawj", "ioawefoawjie", "jiowaeawojefiw\n ok();"]) + + def test_awkward_chunks(self): + self.assert_parses(["let", "ter.head = 1;"]) + self.assert_parses(["let", " x = 1;"]) + + # `list()` here explodes the string into a list of one-character strings. + self.assert_parses(list("function f() { ok(); }")) + + self.assertEqual( + self.parse(["/xyzzy/", "g;"]), + ('script', + ('script_body', + ('statement_list_single', + ('expression_statement', + ('regexp_literal', '/xyzzy/g')))))) + + self.assertEqual( + self.parse(['x/', '=2;']), + ('script', + ('script_body', + ('statement_list_single', + ('expression_statement', + ('compound_assignment_expr', + ('identifier_expr', ('identifier_reference', 'x')), + ('box_assign_op', ('div_assign_op', '/=')), + ('numeric_literal', '2'))))))) + + def test_can_close(self): + self.assert_can_close_after([]) + self.assert_can_close_after("") + self.assert_can_close_after("2 + 2;\n") + self.assert_can_close_after("// seems ok\n") + + def test_can_close_with_asi(self): + self.assert_can_close_after("2 + 2\n") + + def test_conditional_keywords(self): + # property names + self.assert_parses("let obj = {if: 3, function: 4};") + self.assert_parses("assert(obj.if == 3);") + + # method names + self.assert_parses(""" + class C { + if() {} + function() {} + } + """) + + self.assert_parses("var let = [new Date];") # let as identifier + self.assert_parses("let v = let;") # let as keyword, then identifier + # Next line would fail because the multitoken `let [` lookahead isn't implemented yet. + # self.assert_parses("let.length;") # `let .` -> ExpressionStatement + self.assert_syntax_error("let[0].getYear();") # `let [` -> LexicalDeclaration + + self.assert_parses(""" + var of = [1, 2, 3]; + for (of of of) console.log(of); // logs 1, 2, 3 + """) + self.assert_parses("var of, let, private, target;") + self.assert_parses("class X { get y() {} }") + self.assert_parses("async: { break async; }") + self.assert_parses("var get = { get get() {}, set get(v) {}, set: 3 };") + self.assert_parses("for (async of => {};;) {}") + # self.assert_parses("for (async of []) {}") # would fail + + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/rust/jsparagus/tests/test_parse_pgen.py b/third_party/rust/jsparagus/tests/test_parse_pgen.py new file mode 100644 index 0000000000..5052f9069c --- /dev/null +++ b/third_party/rust/jsparagus/tests/test_parse_pgen.py @@ -0,0 +1,33 @@ +import unittest + +import jsparagus.gen +from jsparagus import parse_pgen, parse_pgen_generated + + +class ParsePgenTestCase(unittest.TestCase): + def test_self(self): + import os + filename = os.path.join(os.path.dirname(parse_pgen.__file__), "..", + "pgen.pgen") + grammar = parse_pgen.load_grammar(filename) + self.maxDiff = None + pgen_grammar = parse_pgen.pgen_grammar + self.assertEqual(pgen_grammar.nonterminals, grammar.nonterminals) + self.assertEqual(pgen_grammar.variable_terminals, + grammar.variable_terminals) + self.assertEqual(pgen_grammar.goals(), grammar.goals()) + + with open(parse_pgen_generated.__file__) as f: + pre_generated = f.read() + + import io + out = io.StringIO() + jsparagus.gen.generate_parser(out, grammar) + generated_from_file = out.getvalue() + + self.maxDiff = None + self.assertEqual(pre_generated, generated_from_file) + + +if __name__ == '__main__': + unittest.main() |