diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/python/ply/example | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/python/ply/example')
40 files changed, 5038 insertions, 0 deletions
diff --git a/third_party/python/ply/example/BASIC/README b/third_party/python/ply/example/BASIC/README new file mode 100644 index 0000000000..be24a3005e --- /dev/null +++ b/third_party/python/ply/example/BASIC/README @@ -0,0 +1,79 @@ +Inspired by a September 14, 2006 Salon article "Why Johnny Can't Code" by +David Brin (http://www.salon.com/tech/feature/2006/09/14/basic/index.html), +I thought that a fully working BASIC interpreter might be an interesting, +if not questionable, PLY example. Uh, okay, so maybe it's just a bad idea, +but in any case, here it is. + +In this example, you'll find a rough implementation of 1964 Dartmouth BASIC +as described in the manual at: + + http://www.bitsavers.org/pdf/dartmouth/BASIC_Oct64.pdf + +See also: + + http://en.wikipedia.org/wiki/Dartmouth_BASIC + +This dialect is downright primitive---there are no string variables +and no facilities for interactive input. Moreover, subroutines and functions +are brain-dead even more than they usually are for BASIC. Of course, +the GOTO statement is provided. + +Nevertheless, there are a few interesting aspects of this example: + + - It illustrates a fully working interpreter including lexing, parsing, + and interpretation of instructions. + + - The parser shows how to catch and report various kinds of parsing + errors in a more graceful way. + + - The example both parses files (supplied on command line) and + interactive input entered line by line. + + - It shows how you might represent parsed information. In this case, + each BASIC statement is encoded into a Python tuple containing the + statement type and parameters. These tuples are then stored in + a dictionary indexed by program line numbers. + + - Even though it's just BASIC, the parser contains more than 80 + rules and 150 parsing states. Thus, it's a little more meaty than + the calculator example. + +To use the example, run it as follows: + + % python basic.py hello.bas + HELLO WORLD + % + +or use it interactively: + + % python basic.py + [BASIC] 10 PRINT "HELLO WORLD" + [BASIC] 20 END + [BASIC] RUN + HELLO WORLD + [BASIC] + +The following files are defined: + + basic.py - High level script that controls everything + basiclex.py - BASIC tokenizer + basparse.py - BASIC parser + basinterp.py - BASIC interpreter that runs parsed programs. + +In addition, a number of sample BASIC programs (.bas suffix) are +provided. These were taken out of the Dartmouth manual. + +Disclaimer: I haven't spent a ton of time testing this and it's likely that +I've skimped here and there on a few finer details (e.g., strictly enforcing +variable naming rules). However, the interpreter seems to be able to run +the examples in the BASIC manual. + +Have fun! + +-Dave + + + + + + diff --git a/third_party/python/ply/example/BASIC/basic.py b/third_party/python/ply/example/BASIC/basic.py new file mode 100644 index 0000000000..70ac9e7c74 --- /dev/null +++ b/third_party/python/ply/example/BASIC/basic.py @@ -0,0 +1,65 @@ +# An implementation of Dartmouth BASIC (1964) +# + +import sys +sys.path.insert(0, "../..") + +if sys.version_info[0] >= 3: + raw_input = input + +import basiclex +import basparse +import basinterp + +# If a filename has been specified, we try to run it. +# If a runtime error occurs, we bail out and enter +# interactive mode below +if len(sys.argv) == 2: + data = open(sys.argv[1]).read() + prog = basparse.parse(data) + if not prog: + raise SystemExit + b = basinterp.BasicInterpreter(prog) + try: + b.run() + raise SystemExit + except RuntimeError: + pass + +else: + b = basinterp.BasicInterpreter({}) + +# Interactive mode. This incrementally adds/deletes statements +# from the program stored in the BasicInterpreter object. In +# addition, special commands 'NEW','LIST',and 'RUN' are added. +# Specifying a line number with no code deletes that line from +# the program. + +while 1: + try: + line = raw_input("[BASIC] ") + except EOFError: + raise SystemExit + if not line: + continue + line += "\n" + prog = basparse.parse(line) + if not prog: + continue + + keys = list(prog) + if keys[0] > 0: + b.add_statements(prog) + else: + stat = prog[keys[0]] + if stat[0] == 'RUN': + try: + b.run() + except RuntimeError: + pass + elif stat[0] == 'LIST': + b.list() + elif stat[0] == 'BLANK': + b.del_line(stat[1]) + elif stat[0] == 'NEW': + b.new() diff --git a/third_party/python/ply/example/BASIC/basiclex.py b/third_party/python/ply/example/BASIC/basiclex.py new file mode 100644 index 0000000000..4151f4c34f --- /dev/null +++ b/third_party/python/ply/example/BASIC/basiclex.py @@ -0,0 +1,61 @@ +# An implementation of Dartmouth BASIC (1964) + +from ply import * + +keywords = ( + 'LET', 'READ', 'DATA', 'PRINT', 'GOTO', 'IF', 'THEN', 'FOR', 'NEXT', 'TO', 'STEP', + 'END', 'STOP', 'DEF', 'GOSUB', 'DIM', 'REM', 'RETURN', 'RUN', 'LIST', 'NEW', +) + +tokens = keywords + ( + 'EQUALS', 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'POWER', + 'LPAREN', 'RPAREN', 'LT', 'LE', 'GT', 'GE', 'NE', + 'COMMA', 'SEMI', 'INTEGER', 'FLOAT', 'STRING', + 'ID', 'NEWLINE' +) + +t_ignore = ' \t' + + +def t_REM(t): + r'REM .*' + return t + + +def t_ID(t): + r'[A-Z][A-Z0-9]*' + if t.value in keywords: + t.type = t.value + return t + +t_EQUALS = r'=' +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_POWER = r'\^' +t_DIVIDE = r'/' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LT = r'<' +t_LE = r'<=' +t_GT = r'>' +t_GE = r'>=' +t_NE = r'<>' +t_COMMA = r'\,' +t_SEMI = r';' +t_INTEGER = r'\d+' +t_FLOAT = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))' +t_STRING = r'\".*?\"' + + +def t_NEWLINE(t): + r'\n' + t.lexer.lineno += 1 + return t + + +def t_error(t): + print("Illegal character %s" % t.value[0]) + t.lexer.skip(1) + +lex.lex(debug=0) diff --git a/third_party/python/ply/example/BASIC/basiclog.py b/third_party/python/ply/example/BASIC/basiclog.py new file mode 100644 index 0000000000..9dcc7feda6 --- /dev/null +++ b/third_party/python/ply/example/BASIC/basiclog.py @@ -0,0 +1,73 @@ +# An implementation of Dartmouth BASIC (1964) +# + +import sys +sys.path.insert(0, "../..") + +if sys.version_info[0] >= 3: + raw_input = input + +import logging +logging.basicConfig( + level=logging.INFO, + filename="parselog.txt", + filemode="w" +) +log = logging.getLogger() + +import basiclex +import basparse +import basinterp + +# If a filename has been specified, we try to run it. +# If a runtime error occurs, we bail out and enter +# interactive mode below +if len(sys.argv) == 2: + data = open(sys.argv[1]).read() + prog = basparse.parse(data, debug=log) + if not prog: + raise SystemExit + b = basinterp.BasicInterpreter(prog) + try: + b.run() + raise SystemExit + except RuntimeError: + pass + +else: + b = basinterp.BasicInterpreter({}) + +# Interactive mode. This incrementally adds/deletes statements +# from the program stored in the BasicInterpreter object. In +# addition, special commands 'NEW','LIST',and 'RUN' are added. +# Specifying a line number with no code deletes that line from +# the program. + +while 1: + try: + line = raw_input("[BASIC] ") + except EOFError: + raise SystemExit + if not line: + continue + line += "\n" + prog = basparse.parse(line, debug=log) + if not prog: + continue + + keys = list(prog) + if keys[0] > 0: + b.add_statements(prog) + else: + stat = prog[keys[0]] + if stat[0] == 'RUN': + try: + b.run() + except RuntimeError: + pass + elif stat[0] == 'LIST': + b.list() + elif stat[0] == 'BLANK': + b.del_line(stat[1]) + elif stat[0] == 'NEW': + b.new() diff --git a/third_party/python/ply/example/BASIC/basinterp.py b/third_party/python/ply/example/BASIC/basinterp.py new file mode 100644 index 0000000000..67762c797b --- /dev/null +++ b/third_party/python/ply/example/BASIC/basinterp.py @@ -0,0 +1,496 @@ +# This file provides the runtime support for running a basic program +# Assumes the program has been parsed using basparse.py + +import sys +import math +import random + + +class BasicInterpreter: + + # Initialize the interpreter. prog is a dictionary + # containing (line,statement) mappings + def __init__(self, prog): + self.prog = prog + + self.functions = { # Built-in function table + 'SIN': lambda z: math.sin(self.eval(z)), + 'COS': lambda z: math.cos(self.eval(z)), + 'TAN': lambda z: math.tan(self.eval(z)), + 'ATN': lambda z: math.atan(self.eval(z)), + 'EXP': lambda z: math.exp(self.eval(z)), + 'ABS': lambda z: abs(self.eval(z)), + 'LOG': lambda z: math.log(self.eval(z)), + 'SQR': lambda z: math.sqrt(self.eval(z)), + 'INT': lambda z: int(self.eval(z)), + 'RND': lambda z: random.random() + } + + # Collect all data statements + def collect_data(self): + self.data = [] + for lineno in self.stat: + if self.prog[lineno][0] == 'DATA': + self.data = self.data + self.prog[lineno][1] + self.dc = 0 # Initialize the data counter + + # Check for end statements + def check_end(self): + has_end = 0 + for lineno in self.stat: + if self.prog[lineno][0] == 'END' and not has_end: + has_end = lineno + if not has_end: + print("NO END INSTRUCTION") + self.error = 1 + return + if has_end != lineno: + print("END IS NOT LAST") + self.error = 1 + + # Check loops + def check_loops(self): + for pc in range(len(self.stat)): + lineno = self.stat[pc] + if self.prog[lineno][0] == 'FOR': + forinst = self.prog[lineno] + loopvar = forinst[1] + for i in range(pc + 1, len(self.stat)): + if self.prog[self.stat[i]][0] == 'NEXT': + nextvar = self.prog[self.stat[i]][1] + if nextvar != loopvar: + continue + self.loopend[pc] = i + break + else: + print("FOR WITHOUT NEXT AT LINE %s" % self.stat[pc]) + self.error = 1 + + # Evaluate an expression + def eval(self, expr): + etype = expr[0] + if etype == 'NUM': + return expr[1] + elif etype == 'GROUP': + return self.eval(expr[1]) + elif etype == 'UNARY': + if expr[1] == '-': + return -self.eval(expr[2]) + elif etype == 'BINOP': + if expr[1] == '+': + return self.eval(expr[2]) + self.eval(expr[3]) + elif expr[1] == '-': + return self.eval(expr[2]) - self.eval(expr[3]) + elif expr[1] == '*': + return self.eval(expr[2]) * self.eval(expr[3]) + elif expr[1] == '/': + return float(self.eval(expr[2])) / self.eval(expr[3]) + elif expr[1] == '^': + return abs(self.eval(expr[2]))**self.eval(expr[3]) + elif etype == 'VAR': + var, dim1, dim2 = expr[1] + if not dim1 and not dim2: + if var in self.vars: + return self.vars[var] + else: + print("UNDEFINED VARIABLE %s AT LINE %s" % + (var, self.stat[self.pc])) + raise RuntimeError + # May be a list lookup or a function evaluation + if dim1 and not dim2: + if var in self.functions: + # A function + return self.functions[var](dim1) + else: + # A list evaluation + if var in self.lists: + dim1val = self.eval(dim1) + if dim1val < 1 or dim1val > len(self.lists[var]): + print("LIST INDEX OUT OF BOUNDS AT LINE %s" % + self.stat[self.pc]) + raise RuntimeError + return self.lists[var][dim1val - 1] + if dim1 and dim2: + if var in self.tables: + dim1val = self.eval(dim1) + dim2val = self.eval(dim2) + if dim1val < 1 or dim1val > len(self.tables[var]) or dim2val < 1 or dim2val > len(self.tables[var][0]): + print("TABLE INDEX OUT OUT BOUNDS AT LINE %s" % + self.stat[self.pc]) + raise RuntimeError + return self.tables[var][dim1val - 1][dim2val - 1] + print("UNDEFINED VARIABLE %s AT LINE %s" % + (var, self.stat[self.pc])) + raise RuntimeError + + # Evaluate a relational expression + def releval(self, expr): + etype = expr[1] + lhs = self.eval(expr[2]) + rhs = self.eval(expr[3]) + if etype == '<': + if lhs < rhs: + return 1 + else: + return 0 + + elif etype == '<=': + if lhs <= rhs: + return 1 + else: + return 0 + + elif etype == '>': + if lhs > rhs: + return 1 + else: + return 0 + + elif etype == '>=': + if lhs >= rhs: + return 1 + else: + return 0 + + elif etype == '=': + if lhs == rhs: + return 1 + else: + return 0 + + elif etype == '<>': + if lhs != rhs: + return 1 + else: + return 0 + + # Assignment + def assign(self, target, value): + var, dim1, dim2 = target + if not dim1 and not dim2: + self.vars[var] = self.eval(value) + elif dim1 and not dim2: + # List assignment + dim1val = self.eval(dim1) + if not var in self.lists: + self.lists[var] = [0] * 10 + + if dim1val > len(self.lists[var]): + print ("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc]) + raise RuntimeError + self.lists[var][dim1val - 1] = self.eval(value) + elif dim1 and dim2: + dim1val = self.eval(dim1) + dim2val = self.eval(dim2) + if not var in self.tables: + temp = [0] * 10 + v = [] + for i in range(10): + v.append(temp[:]) + self.tables[var] = v + # Variable already exists + if dim1val > len(self.tables[var]) or dim2val > len(self.tables[var][0]): + print("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc]) + raise RuntimeError + self.tables[var][dim1val - 1][dim2val - 1] = self.eval(value) + + # Change the current line number + def goto(self, linenum): + if not linenum in self.prog: + print("UNDEFINED LINE NUMBER %d AT LINE %d" % + (linenum, self.stat[self.pc])) + raise RuntimeError + self.pc = self.stat.index(linenum) + + # Run it + def run(self): + self.vars = {} # All variables + self.lists = {} # List variables + self.tables = {} # Tables + self.loops = [] # Currently active loops + self.loopend = {} # Mapping saying where loops end + self.gosub = None # Gosub return point (if any) + self.error = 0 # Indicates program error + + self.stat = list(self.prog) # Ordered list of all line numbers + self.stat.sort() + self.pc = 0 # Current program counter + + # Processing prior to running + + self.collect_data() # Collect all of the data statements + self.check_end() + self.check_loops() + + if self.error: + raise RuntimeError + + while 1: + line = self.stat[self.pc] + instr = self.prog[line] + + op = instr[0] + + # END and STOP statements + if op == 'END' or op == 'STOP': + break # We're done + + # GOTO statement + elif op == 'GOTO': + newline = instr[1] + self.goto(newline) + continue + + # PRINT statement + elif op == 'PRINT': + plist = instr[1] + out = "" + for label, val in plist: + if out: + out += ' ' * (15 - (len(out) % 15)) + out += label + if val: + if label: + out += " " + eval = self.eval(val) + out += str(eval) + sys.stdout.write(out) + end = instr[2] + if not (end == ',' or end == ';'): + sys.stdout.write("\n") + if end == ',': + sys.stdout.write(" " * (15 - (len(out) % 15))) + if end == ';': + sys.stdout.write(" " * (3 - (len(out) % 3))) + + # LET statement + elif op == 'LET': + target = instr[1] + value = instr[2] + self.assign(target, value) + + # READ statement + elif op == 'READ': + for target in instr[1]: + if self.dc < len(self.data): + value = ('NUM', self.data[self.dc]) + self.assign(target, value) + self.dc += 1 + else: + # No more data. Program ends + return + elif op == 'IF': + relop = instr[1] + newline = instr[2] + if (self.releval(relop)): + self.goto(newline) + continue + + elif op == 'FOR': + loopvar = instr[1] + initval = instr[2] + finval = instr[3] + stepval = instr[4] + + # Check to see if this is a new loop + if not self.loops or self.loops[-1][0] != self.pc: + # Looks like a new loop. Make the initial assignment + newvalue = initval + self.assign((loopvar, None, None), initval) + if not stepval: + stepval = ('NUM', 1) + stepval = self.eval(stepval) # Evaluate step here + self.loops.append((self.pc, stepval)) + else: + # It's a repeat of the previous loop + # Update the value of the loop variable according to the + # step + stepval = ('NUM', self.loops[-1][1]) + newvalue = ( + 'BINOP', '+', ('VAR', (loopvar, None, None)), stepval) + + if self.loops[-1][1] < 0: + relop = '>=' + else: + relop = '<=' + if not self.releval(('RELOP', relop, newvalue, finval)): + # Loop is done. Jump to the NEXT + self.pc = self.loopend[self.pc] + self.loops.pop() + else: + self.assign((loopvar, None, None), newvalue) + + elif op == 'NEXT': + if not self.loops: + print("NEXT WITHOUT FOR AT LINE %s" % line) + return + + nextvar = instr[1] + self.pc = self.loops[-1][0] + loopinst = self.prog[self.stat[self.pc]] + forvar = loopinst[1] + if nextvar != forvar: + print("NEXT DOESN'T MATCH FOR AT LINE %s" % line) + return + continue + elif op == 'GOSUB': + newline = instr[1] + if self.gosub: + print("ALREADY IN A SUBROUTINE AT LINE %s" % line) + return + self.gosub = self.stat[self.pc] + self.goto(newline) + continue + + elif op == 'RETURN': + if not self.gosub: + print("RETURN WITHOUT A GOSUB AT LINE %s" % line) + return + self.goto(self.gosub) + self.gosub = None + + elif op == 'FUNC': + fname = instr[1] + pname = instr[2] + expr = instr[3] + + def eval_func(pvalue, name=pname, self=self, expr=expr): + self.assign((pname, None, None), pvalue) + return self.eval(expr) + self.functions[fname] = eval_func + + elif op == 'DIM': + for vname, x, y in instr[1]: + if y == 0: + # Single dimension variable + self.lists[vname] = [0] * x + else: + # Double dimension variable + temp = [0] * y + v = [] + for i in range(x): + v.append(temp[:]) + self.tables[vname] = v + + self.pc += 1 + + # Utility functions for program listing + def expr_str(self, expr): + etype = expr[0] + if etype == 'NUM': + return str(expr[1]) + elif etype == 'GROUP': + return "(%s)" % self.expr_str(expr[1]) + elif etype == 'UNARY': + if expr[1] == '-': + return "-" + str(expr[2]) + elif etype == 'BINOP': + return "%s %s %s" % (self.expr_str(expr[2]), expr[1], self.expr_str(expr[3])) + elif etype == 'VAR': + return self.var_str(expr[1]) + + def relexpr_str(self, expr): + return "%s %s %s" % (self.expr_str(expr[2]), expr[1], self.expr_str(expr[3])) + + def var_str(self, var): + varname, dim1, dim2 = var + if not dim1 and not dim2: + return varname + if dim1 and not dim2: + return "%s(%s)" % (varname, self.expr_str(dim1)) + return "%s(%s,%s)" % (varname, self.expr_str(dim1), self.expr_str(dim2)) + + # Create a program listing + def list(self): + stat = list(self.prog) # Ordered list of all line numbers + stat.sort() + for line in stat: + instr = self.prog[line] + op = instr[0] + if op in ['END', 'STOP', 'RETURN']: + print("%s %s" % (line, op)) + continue + elif op == 'REM': + print("%s %s" % (line, instr[1])) + elif op == 'PRINT': + _out = "%s %s " % (line, op) + first = 1 + for p in instr[1]: + if not first: + _out += ", " + if p[0] and p[1]: + _out += '"%s"%s' % (p[0], self.expr_str(p[1])) + elif p[1]: + _out += self.expr_str(p[1]) + else: + _out += '"%s"' % (p[0],) + first = 0 + if instr[2]: + _out += instr[2] + print(_out) + elif op == 'LET': + print("%s LET %s = %s" % + (line, self.var_str(instr[1]), self.expr_str(instr[2]))) + elif op == 'READ': + _out = "%s READ " % line + first = 1 + for r in instr[1]: + if not first: + _out += "," + _out += self.var_str(r) + first = 0 + print(_out) + elif op == 'IF': + print("%s IF %s THEN %d" % + (line, self.relexpr_str(instr[1]), instr[2])) + elif op == 'GOTO' or op == 'GOSUB': + print("%s %s %s" % (line, op, instr[1])) + elif op == 'FOR': + _out = "%s FOR %s = %s TO %s" % ( + line, instr[1], self.expr_str(instr[2]), self.expr_str(instr[3])) + if instr[4]: + _out += " STEP %s" % (self.expr_str(instr[4])) + print(_out) + elif op == 'NEXT': + print("%s NEXT %s" % (line, instr[1])) + elif op == 'FUNC': + print("%s DEF %s(%s) = %s" % + (line, instr[1], instr[2], self.expr_str(instr[3]))) + elif op == 'DIM': + _out = "%s DIM " % line + first = 1 + for vname, x, y in instr[1]: + if not first: + _out += "," + first = 0 + if y == 0: + _out += "%s(%d)" % (vname, x) + else: + _out += "%s(%d,%d)" % (vname, x, y) + + print(_out) + elif op == 'DATA': + _out = "%s DATA " % line + first = 1 + for v in instr[1]: + if not first: + _out += "," + first = 0 + _out += v + print(_out) + + # Erase the current program + def new(self): + self.prog = {} + + # Insert statements + def add_statements(self, prog): + for line, stat in prog.items(): + self.prog[line] = stat + + # Delete a statement + def del_line(self, lineno): + try: + del self.prog[lineno] + except KeyError: + pass diff --git a/third_party/python/ply/example/BASIC/basparse.py b/third_party/python/ply/example/BASIC/basparse.py new file mode 100644 index 0000000000..d610c7d909 --- /dev/null +++ b/third_party/python/ply/example/BASIC/basparse.py @@ -0,0 +1,474 @@ +# An implementation of Dartmouth BASIC (1964) +# + +from ply import * +import basiclex + +tokens = basiclex.tokens + +precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('left', 'POWER'), + ('right', 'UMINUS') +) + +# A BASIC program is a series of statements. We represent the program as a +# dictionary of tuples indexed by line number. + + +def p_program(p): + '''program : program statement + | statement''' + + if len(p) == 2 and p[1]: + p[0] = {} + line, stat = p[1] + p[0][line] = stat + elif len(p) == 3: + p[0] = p[1] + if not p[0]: + p[0] = {} + if p[2]: + line, stat = p[2] + p[0][line] = stat + +# This catch-all rule is used for any catastrophic errors. In this case, +# we simply return nothing + + +def p_program_error(p): + '''program : error''' + p[0] = None + p.parser.error = 1 + +# Format of all BASIC statements. + + +def p_statement(p): + '''statement : INTEGER command NEWLINE''' + if isinstance(p[2], str): + print("%s %s %s" % (p[2], "AT LINE", p[1])) + p[0] = None + p.parser.error = 1 + else: + lineno = int(p[1]) + p[0] = (lineno, p[2]) + +# Interactive statements. + + +def p_statement_interactive(p): + '''statement : RUN NEWLINE + | LIST NEWLINE + | NEW NEWLINE''' + p[0] = (0, (p[1], 0)) + +# Blank line number + + +def p_statement_blank(p): + '''statement : INTEGER NEWLINE''' + p[0] = (0, ('BLANK', int(p[1]))) + +# Error handling for malformed statements + + +def p_statement_bad(p): + '''statement : INTEGER error NEWLINE''' + print("MALFORMED STATEMENT AT LINE %s" % p[1]) + p[0] = None + p.parser.error = 1 + +# Blank line + + +def p_statement_newline(p): + '''statement : NEWLINE''' + p[0] = None + +# LET statement + + +def p_command_let(p): + '''command : LET variable EQUALS expr''' + p[0] = ('LET', p[2], p[4]) + + +def p_command_let_bad(p): + '''command : LET variable EQUALS error''' + p[0] = "BAD EXPRESSION IN LET" + +# READ statement + + +def p_command_read(p): + '''command : READ varlist''' + p[0] = ('READ', p[2]) + + +def p_command_read_bad(p): + '''command : READ error''' + p[0] = "MALFORMED VARIABLE LIST IN READ" + +# DATA statement + + +def p_command_data(p): + '''command : DATA numlist''' + p[0] = ('DATA', p[2]) + + +def p_command_data_bad(p): + '''command : DATA error''' + p[0] = "MALFORMED NUMBER LIST IN DATA" + +# PRINT statement + + +def p_command_print(p): + '''command : PRINT plist optend''' + p[0] = ('PRINT', p[2], p[3]) + + +def p_command_print_bad(p): + '''command : PRINT error''' + p[0] = "MALFORMED PRINT STATEMENT" + +# Optional ending on PRINT. Either a comma (,) or semicolon (;) + + +def p_optend(p): + '''optend : COMMA + | SEMI + |''' + if len(p) == 2: + p[0] = p[1] + else: + p[0] = None + +# PRINT statement with no arguments + + +def p_command_print_empty(p): + '''command : PRINT''' + p[0] = ('PRINT', [], None) + +# GOTO statement + + +def p_command_goto(p): + '''command : GOTO INTEGER''' + p[0] = ('GOTO', int(p[2])) + + +def p_command_goto_bad(p): + '''command : GOTO error''' + p[0] = "INVALID LINE NUMBER IN GOTO" + +# IF-THEN statement + + +def p_command_if(p): + '''command : IF relexpr THEN INTEGER''' + p[0] = ('IF', p[2], int(p[4])) + + +def p_command_if_bad(p): + '''command : IF error THEN INTEGER''' + p[0] = "BAD RELATIONAL EXPRESSION" + + +def p_command_if_bad2(p): + '''command : IF relexpr THEN error''' + p[0] = "INVALID LINE NUMBER IN THEN" + +# FOR statement + + +def p_command_for(p): + '''command : FOR ID EQUALS expr TO expr optstep''' + p[0] = ('FOR', p[2], p[4], p[6], p[7]) + + +def p_command_for_bad_initial(p): + '''command : FOR ID EQUALS error TO expr optstep''' + p[0] = "BAD INITIAL VALUE IN FOR STATEMENT" + + +def p_command_for_bad_final(p): + '''command : FOR ID EQUALS expr TO error optstep''' + p[0] = "BAD FINAL VALUE IN FOR STATEMENT" + + +def p_command_for_bad_step(p): + '''command : FOR ID EQUALS expr TO expr STEP error''' + p[0] = "MALFORMED STEP IN FOR STATEMENT" + +# Optional STEP qualifier on FOR statement + + +def p_optstep(p): + '''optstep : STEP expr + | empty''' + if len(p) == 3: + p[0] = p[2] + else: + p[0] = None + +# NEXT statement + + +def p_command_next(p): + '''command : NEXT ID''' + + p[0] = ('NEXT', p[2]) + + +def p_command_next_bad(p): + '''command : NEXT error''' + p[0] = "MALFORMED NEXT" + +# END statement + + +def p_command_end(p): + '''command : END''' + p[0] = ('END',) + +# REM statement + + +def p_command_rem(p): + '''command : REM''' + p[0] = ('REM', p[1]) + +# STOP statement + + +def p_command_stop(p): + '''command : STOP''' + p[0] = ('STOP',) + +# DEF statement + + +def p_command_def(p): + '''command : DEF ID LPAREN ID RPAREN EQUALS expr''' + p[0] = ('FUNC', p[2], p[4], p[7]) + + +def p_command_def_bad_rhs(p): + '''command : DEF ID LPAREN ID RPAREN EQUALS error''' + p[0] = "BAD EXPRESSION IN DEF STATEMENT" + + +def p_command_def_bad_arg(p): + '''command : DEF ID LPAREN error RPAREN EQUALS expr''' + p[0] = "BAD ARGUMENT IN DEF STATEMENT" + +# GOSUB statement + + +def p_command_gosub(p): + '''command : GOSUB INTEGER''' + p[0] = ('GOSUB', int(p[2])) + + +def p_command_gosub_bad(p): + '''command : GOSUB error''' + p[0] = "INVALID LINE NUMBER IN GOSUB" + +# RETURN statement + + +def p_command_return(p): + '''command : RETURN''' + p[0] = ('RETURN',) + +# DIM statement + + +def p_command_dim(p): + '''command : DIM dimlist''' + p[0] = ('DIM', p[2]) + + +def p_command_dim_bad(p): + '''command : DIM error''' + p[0] = "MALFORMED VARIABLE LIST IN DIM" + +# List of variables supplied to DIM statement + + +def p_dimlist(p): + '''dimlist : dimlist COMMA dimitem + | dimitem''' + if len(p) == 4: + p[0] = p[1] + p[0].append(p[3]) + else: + p[0] = [p[1]] + +# DIM items + + +def p_dimitem_single(p): + '''dimitem : ID LPAREN INTEGER RPAREN''' + p[0] = (p[1], eval(p[3]), 0) + + +def p_dimitem_double(p): + '''dimitem : ID LPAREN INTEGER COMMA INTEGER RPAREN''' + p[0] = (p[1], eval(p[3]), eval(p[5])) + +# Arithmetic expressions + + +def p_expr_binary(p): + '''expr : expr PLUS expr + | expr MINUS expr + | expr TIMES expr + | expr DIVIDE expr + | expr POWER expr''' + + p[0] = ('BINOP', p[2], p[1], p[3]) + + +def p_expr_number(p): + '''expr : INTEGER + | FLOAT''' + p[0] = ('NUM', eval(p[1])) + + +def p_expr_variable(p): + '''expr : variable''' + p[0] = ('VAR', p[1]) + + +def p_expr_group(p): + '''expr : LPAREN expr RPAREN''' + p[0] = ('GROUP', p[2]) + + +def p_expr_unary(p): + '''expr : MINUS expr %prec UMINUS''' + p[0] = ('UNARY', '-', p[2]) + +# Relational expressions + + +def p_relexpr(p): + '''relexpr : expr LT expr + | expr LE expr + | expr GT expr + | expr GE expr + | expr EQUALS expr + | expr NE expr''' + p[0] = ('RELOP', p[2], p[1], p[3]) + +# Variables + + +def p_variable(p): + '''variable : ID + | ID LPAREN expr RPAREN + | ID LPAREN expr COMMA expr RPAREN''' + if len(p) == 2: + p[0] = (p[1], None, None) + elif len(p) == 5: + p[0] = (p[1], p[3], None) + else: + p[0] = (p[1], p[3], p[5]) + +# Builds a list of variable targets as a Python list + + +def p_varlist(p): + '''varlist : varlist COMMA variable + | variable''' + if len(p) > 2: + p[0] = p[1] + p[0].append(p[3]) + else: + p[0] = [p[1]] + + +# Builds a list of numbers as a Python list + +def p_numlist(p): + '''numlist : numlist COMMA number + | number''' + + if len(p) > 2: + p[0] = p[1] + p[0].append(p[3]) + else: + p[0] = [p[1]] + +# A number. May be an integer or a float + + +def p_number(p): + '''number : INTEGER + | FLOAT''' + p[0] = eval(p[1]) + +# A signed number. + + +def p_number_signed(p): + '''number : MINUS INTEGER + | MINUS FLOAT''' + p[0] = eval("-" + p[2]) + +# List of targets for a print statement +# Returns a list of tuples (label,expr) + + +def p_plist(p): + '''plist : plist COMMA pitem + | pitem''' + if len(p) > 3: + p[0] = p[1] + p[0].append(p[3]) + else: + p[0] = [p[1]] + + +def p_item_string(p): + '''pitem : STRING''' + p[0] = (p[1][1:-1], None) + + +def p_item_string_expr(p): + '''pitem : STRING expr''' + p[0] = (p[1][1:-1], p[2]) + + +def p_item_expr(p): + '''pitem : expr''' + p[0] = ("", p[1]) + +# Empty + + +def p_empty(p): + '''empty : ''' + +# Catastrophic error handler + + +def p_error(p): + if not p: + print("SYNTAX ERROR AT EOF") + +bparser = yacc.yacc() + + +def parse(data, debug=0): + bparser.error = 0 + p = bparser.parse(data, debug=debug) + if bparser.error: + return None + return p diff --git a/third_party/python/ply/example/BASIC/dim.bas b/third_party/python/ply/example/BASIC/dim.bas new file mode 100644 index 0000000000..87bd95b32e --- /dev/null +++ b/third_party/python/ply/example/BASIC/dim.bas @@ -0,0 +1,14 @@ +5 DIM A(50,15) +10 FOR I = 1 TO 50 +20 FOR J = 1 TO 15 +30 LET A(I,J) = I + J +35 REM PRINT I,J, A(I,J) +40 NEXT J +50 NEXT I +100 FOR I = 1 TO 50 +110 FOR J = 1 TO 15 +120 PRINT A(I,J), +130 NEXT J +140 PRINT +150 NEXT I +999 END diff --git a/third_party/python/ply/example/BASIC/func.bas b/third_party/python/ply/example/BASIC/func.bas new file mode 100644 index 0000000000..447ee16a92 --- /dev/null +++ b/third_party/python/ply/example/BASIC/func.bas @@ -0,0 +1,5 @@ +10 DEF FDX(X) = 2*X +20 FOR I = 0 TO 100 +30 PRINT FDX(I) +40 NEXT I +50 END diff --git a/third_party/python/ply/example/BASIC/gcd.bas b/third_party/python/ply/example/BASIC/gcd.bas new file mode 100644 index 0000000000..d0b7746089 --- /dev/null +++ b/third_party/python/ply/example/BASIC/gcd.bas @@ -0,0 +1,22 @@ +10 PRINT "A","B","C","GCD" +20 READ A,B,C +30 LET X = A +40 LET Y = B +50 GOSUB 200 +60 LET X = G +70 LET Y = C +80 GOSUB 200 +90 PRINT A, B, C, G +100 GOTO 20 +110 DATA 60, 90, 120 +120 DATA 38456, 64872, 98765 +130 DATA 32, 384, 72 +200 LET Q = INT(X/Y) +210 LET R = X - Q*Y +220 IF R = 0 THEN 300 +230 LET X = Y +240 LET Y = R +250 GOTO 200 +300 LET G = Y +310 RETURN +999 END diff --git a/third_party/python/ply/example/BASIC/gosub.bas b/third_party/python/ply/example/BASIC/gosub.bas new file mode 100644 index 0000000000..99737b16f1 --- /dev/null +++ b/third_party/python/ply/example/BASIC/gosub.bas @@ -0,0 +1,13 @@ +100 LET X = 3 +110 GOSUB 400 +120 PRINT U, V, W +200 LET X = 5 +210 GOSUB 400 +220 LET Z = U + 2*V + 3*W +230 PRINT Z +240 GOTO 999 +400 LET U = X*X +410 LET V = X*X*X +420 LET W = X*X*X*X + X*X*X + X*X + X +430 RETURN +999 END diff --git a/third_party/python/ply/example/BASIC/hello.bas b/third_party/python/ply/example/BASIC/hello.bas new file mode 100644 index 0000000000..cc6f0b0b51 --- /dev/null +++ b/third_party/python/ply/example/BASIC/hello.bas @@ -0,0 +1,4 @@ +5 REM HELLO WORLD PROGAM +10 PRINT "HELLO WORLD" +99 END + diff --git a/third_party/python/ply/example/BASIC/linear.bas b/third_party/python/ply/example/BASIC/linear.bas new file mode 100644 index 0000000000..56c08220b3 --- /dev/null +++ b/third_party/python/ply/example/BASIC/linear.bas @@ -0,0 +1,17 @@ +1 REM ::: SOLVE A SYSTEM OF LINEAR EQUATIONS +2 REM ::: A1*X1 + A2*X2 = B1 +3 REM ::: A3*X1 + A4*X2 = B2 +4 REM -------------------------------------- +10 READ A1, A2, A3, A4 +15 LET D = A1 * A4 - A3 * A2 +20 IF D = 0 THEN 65 +30 READ B1, B2 +37 LET X1 = (B1*A4 - B2*A2) / D +42 LET X2 = (A1*B2 - A3*B1) / D +55 PRINT X1, X2 +60 GOTO 30 +65 PRINT "NO UNIQUE SOLUTION" +70 DATA 1, 2, 4 +80 DATA 2, -7, 5 +85 DATA 1, 3, 4, -7 +90 END diff --git a/third_party/python/ply/example/BASIC/maxsin.bas b/third_party/python/ply/example/BASIC/maxsin.bas new file mode 100644 index 0000000000..b96901530c --- /dev/null +++ b/third_party/python/ply/example/BASIC/maxsin.bas @@ -0,0 +1,12 @@ +5 PRINT "X VALUE", "SINE", "RESOLUTION" +10 READ D +20 LET M = -1 +30 FOR X = 0 TO 3 STEP D +40 IF SIN(X) <= M THEN 80 +50 LET X0 = X +60 LET M = SIN(X) +80 NEXT X +85 PRINT X0, M, D +90 GOTO 10 +100 DATA .1, .01, .001 +110 END diff --git a/third_party/python/ply/example/BASIC/powers.bas b/third_party/python/ply/example/BASIC/powers.bas new file mode 100644 index 0000000000..a454dc3e21 --- /dev/null +++ b/third_party/python/ply/example/BASIC/powers.bas @@ -0,0 +1,13 @@ +5 PRINT "THIS PROGRAM COMPUTES AND PRINTS THE NTH POWERS" +6 PRINT "OF THE NUMBERS LESS THAN OR EQUAL TO N FOR VARIOUS" +7 PRINT "N FROM 1 THROUGH 7" +8 PRINT +10 FOR N = 1 TO 7 +15 PRINT "N = "N +20 FOR I = 1 TO N +30 PRINT I^N, +40 NEXT I +50 PRINT +60 PRINT +70 NEXT N +80 END diff --git a/third_party/python/ply/example/BASIC/rand.bas b/third_party/python/ply/example/BASIC/rand.bas new file mode 100644 index 0000000000..4ff7a14670 --- /dev/null +++ b/third_party/python/ply/example/BASIC/rand.bas @@ -0,0 +1,4 @@ +10 FOR I = 1 TO 20 +20 PRINT INT(10*RND(0)) +30 NEXT I +40 END diff --git a/third_party/python/ply/example/BASIC/sales.bas b/third_party/python/ply/example/BASIC/sales.bas new file mode 100644 index 0000000000..a39aefb762 --- /dev/null +++ b/third_party/python/ply/example/BASIC/sales.bas @@ -0,0 +1,20 @@ +10 FOR I = 1 TO 3 +20 READ P(I) +30 NEXT I +40 FOR I = 1 TO 3 +50 FOR J = 1 TO 5 +60 READ S(I,J) +70 NEXT J +80 NEXT I +90 FOR J = 1 TO 5 +100 LET S = 0 +110 FOR I = 1 TO 3 +120 LET S = S + P(I) * S(I,J) +130 NEXT I +140 PRINT "TOTAL SALES FOR SALESMAN"J, "$"S +150 NEXT J +200 DATA 1.25, 4.30, 2.50 +210 DATA 40, 20, 37, 29, 42 +220 DATA 10, 16, 3, 21, 8 +230 DATA 35, 47, 29, 16, 33 +300 END diff --git a/third_party/python/ply/example/BASIC/sears.bas b/third_party/python/ply/example/BASIC/sears.bas new file mode 100644 index 0000000000..5ced3974e2 --- /dev/null +++ b/third_party/python/ply/example/BASIC/sears.bas @@ -0,0 +1,18 @@ +1 REM :: THIS PROGRAM COMPUTES HOW MANY TIMES YOU HAVE TO FOLD +2 REM :: A PIECE OF PAPER SO THAT IT IS TALLER THAN THE +3 REM :: SEARS TOWER. +4 REM :: S = HEIGHT OF TOWER (METERS) +5 REM :: T = THICKNESS OF PAPER (MILLIMETERS) +10 LET S = 442 +20 LET T = 0.1 +30 REM CONVERT T TO METERS +40 LET T = T * .001 +50 LET F = 1 +60 LET H = T +100 IF H > S THEN 200 +120 LET H = 2 * H +125 LET F = F + 1 +130 GOTO 100 +200 PRINT "NUMBER OF FOLDS ="F +220 PRINT "FINAL HEIGHT ="H +999 END diff --git a/third_party/python/ply/example/BASIC/sqrt1.bas b/third_party/python/ply/example/BASIC/sqrt1.bas new file mode 100644 index 0000000000..6673a91524 --- /dev/null +++ b/third_party/python/ply/example/BASIC/sqrt1.bas @@ -0,0 +1,5 @@ +10 LET X = 0 +20 LET X = X + 1 +30 PRINT X, SQR(X) +40 IF X < 100 THEN 20 +50 END diff --git a/third_party/python/ply/example/BASIC/sqrt2.bas b/third_party/python/ply/example/BASIC/sqrt2.bas new file mode 100644 index 0000000000..862d85ef26 --- /dev/null +++ b/third_party/python/ply/example/BASIC/sqrt2.bas @@ -0,0 +1,4 @@ +10 FOR X = 1 TO 100 +20 PRINT X, SQR(X) +30 NEXT X +40 END diff --git a/third_party/python/ply/example/GardenSnake/GardenSnake.py b/third_party/python/ply/example/GardenSnake/GardenSnake.py new file mode 100644 index 0000000000..8b493b40dc --- /dev/null +++ b/third_party/python/ply/example/GardenSnake/GardenSnake.py @@ -0,0 +1,777 @@ +# GardenSnake - a parser generator demonstration program +# +# This implements a modified version of a subset of Python: +# - only 'def', 'return' and 'if' statements +# - 'if' only has 'then' clause (no elif nor else) +# - single-quoted strings only, content in raw format +# - numbers are decimal.Decimal instances (not integers or floats) +# - no print statment; use the built-in 'print' function +# - only < > == + - / * implemented (and unary + -) +# - assignment and tuple assignment work +# - no generators of any sort +# - no ... well, no quite a lot + +# Why? I'm thinking about a new indentation-based configuration +# language for a project and wanted to figure out how to do it. Once +# I got that working I needed a way to test it out. My original AST +# was dumb so I decided to target Python's AST and compile it into +# Python code. Plus, it's pretty cool that it only took a day or so +# from sitting down with Ply to having working code. + +# This uses David Beazley's Ply from http://www.dabeaz.com/ply/ + +# This work is hereby released into the Public Domain. To view a copy of +# the public domain dedication, visit +# http://creativecommons.org/licenses/publicdomain/ or send a letter to +# Creative Commons, 543 Howard Street, 5th Floor, San Francisco, +# California, 94105, USA. +# +# Portions of this work are derived from Python's Grammar definition +# and may be covered under the Python copyright and license +# +# Andrew Dalke / Dalke Scientific Software, LLC +# 30 August 2006 / Cape Town, South Africa + +# Changelog: +# 30 August - added link to CC license; removed the "swapcase" encoding + +# Modifications for inclusion in PLY distribution +import sys +sys.path.insert(0, "../..") +from ply import * + +##### Lexer ###### +#import lex +import decimal + +tokens = ( + 'DEF', + 'IF', + 'NAME', + 'NUMBER', # Python decimals + 'STRING', # single quoted strings only; syntax of raw strings + 'LPAR', + 'RPAR', + 'COLON', + 'EQ', + 'ASSIGN', + 'LT', + 'GT', + 'PLUS', + 'MINUS', + 'MULT', + 'DIV', + 'RETURN', + 'WS', + 'NEWLINE', + 'COMMA', + 'SEMICOLON', + 'INDENT', + 'DEDENT', + 'ENDMARKER', +) + +#t_NUMBER = r'\d+' +# taken from decmial.py but without the leading sign + + +def t_NUMBER(t): + r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?""" + t.value = decimal.Decimal(t.value) + return t + + +def t_STRING(t): + r"'([^\\']+|\\'|\\\\)*'" # I think this is right ... + t.value = t.value[1:-1].decode("string-escape") # .swapcase() # for fun + return t + +t_COLON = r':' +t_EQ = r'==' +t_ASSIGN = r'=' +t_LT = r'<' +t_GT = r'>' +t_PLUS = r'\+' +t_MINUS = r'-' +t_MULT = r'\*' +t_DIV = r'/' +t_COMMA = r',' +t_SEMICOLON = r';' + +# Ply nicely documented how to do this. + +RESERVED = { + "def": "DEF", + "if": "IF", + "return": "RETURN", +} + + +def t_NAME(t): + r'[a-zA-Z_][a-zA-Z0-9_]*' + t.type = RESERVED.get(t.value, "NAME") + return t + +# Putting this before t_WS let it consume lines with only comments in +# them so the latter code never sees the WS part. Not consuming the +# newline. Needed for "if 1: #comment" + + +def t_comment(t): + r"[ ]*\043[^\n]*" # \043 is '#' + pass + + +# Whitespace +def t_WS(t): + r' [ ]+ ' + if t.lexer.at_line_start and t.lexer.paren_count == 0: + return t + +# Don't generate newline tokens when inside of parenthesis, eg +# a = (1, +# 2, 3) + + +def t_newline(t): + r'\n+' + t.lexer.lineno += len(t.value) + t.type = "NEWLINE" + if t.lexer.paren_count == 0: + return t + + +def t_LPAR(t): + r'\(' + t.lexer.paren_count += 1 + return t + + +def t_RPAR(t): + r'\)' + # check for underflow? should be the job of the parser + t.lexer.paren_count -= 1 + return t + + +def t_error(t): + raise SyntaxError("Unknown symbol %r" % (t.value[0],)) + print "Skipping", repr(t.value[0]) + t.lexer.skip(1) + +# I implemented INDENT / DEDENT generation as a post-processing filter + +# The original lex token stream contains WS and NEWLINE characters. +# WS will only occur before any other tokens on a line. + +# I have three filters. One tags tokens by adding two attributes. +# "must_indent" is True if the token must be indented from the +# previous code. The other is "at_line_start" which is True for WS +# and the first non-WS/non-NEWLINE on a line. It flags the check so +# see if the new line has changed indication level. + +# Python's syntax has three INDENT states +# 0) no colon hence no need to indent +# 1) "if 1: go()" - simple statements have a COLON but no need for an indent +# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent +NO_INDENT = 0 +MAY_INDENT = 1 +MUST_INDENT = 2 + +# only care about whitespace at the start of a line + + +def track_tokens_filter(lexer, tokens): + lexer.at_line_start = at_line_start = True + indent = NO_INDENT + saw_colon = False + for token in tokens: + token.at_line_start = at_line_start + + if token.type == "COLON": + at_line_start = False + indent = MAY_INDENT + token.must_indent = False + + elif token.type == "NEWLINE": + at_line_start = True + if indent == MAY_INDENT: + indent = MUST_INDENT + token.must_indent = False + + elif token.type == "WS": + assert token.at_line_start == True + at_line_start = True + token.must_indent = False + + else: + # A real token; only indent after COLON NEWLINE + if indent == MUST_INDENT: + token.must_indent = True + else: + token.must_indent = False + at_line_start = False + indent = NO_INDENT + + yield token + lexer.at_line_start = at_line_start + + +def _new_token(type, lineno): + tok = lex.LexToken() + tok.type = type + tok.value = None + tok.lineno = lineno + return tok + +# Synthesize a DEDENT tag + + +def DEDENT(lineno): + return _new_token("DEDENT", lineno) + +# Synthesize an INDENT tag + + +def INDENT(lineno): + return _new_token("INDENT", lineno) + + +# Track the indentation level and emit the right INDENT / DEDENT events. +def indentation_filter(tokens): + # A stack of indentation levels; will never pop item 0 + levels = [0] + token = None + depth = 0 + prev_was_ws = False + for token in tokens: + # if 1: + # print "Process", token, + # if token.at_line_start: + # print "at_line_start", + # if token.must_indent: + # print "must_indent", + # print + + # WS only occurs at the start of the line + # There may be WS followed by NEWLINE so + # only track the depth here. Don't indent/dedent + # until there's something real. + if token.type == "WS": + assert depth == 0 + depth = len(token.value) + prev_was_ws = True + # WS tokens are never passed to the parser + continue + + if token.type == "NEWLINE": + depth = 0 + if prev_was_ws or token.at_line_start: + # ignore blank lines + continue + # pass the other cases on through + yield token + continue + + # then it must be a real token (not WS, not NEWLINE) + # which can affect the indentation level + + prev_was_ws = False + if token.must_indent: + # The current depth must be larger than the previous level + if not (depth > levels[-1]): + raise IndentationError("expected an indented block") + + levels.append(depth) + yield INDENT(token.lineno) + + elif token.at_line_start: + # Must be on the same level or one of the previous levels + if depth == levels[-1]: + # At the same level + pass + elif depth > levels[-1]: + raise IndentationError( + "indentation increase but not in new block") + else: + # Back up; but only if it matches a previous level + try: + i = levels.index(depth) + except ValueError: + raise IndentationError("inconsistent indentation") + for _ in range(i + 1, len(levels)): + yield DEDENT(token.lineno) + levels.pop() + + yield token + + ### Finished processing ### + + # Must dedent any remaining levels + if len(levels) > 1: + assert token is not None + for _ in range(1, len(levels)): + yield DEDENT(token.lineno) + + +# The top-level filter adds an ENDMARKER, if requested. +# Python's grammar uses it. +def filter(lexer, add_endmarker=True): + token = None + tokens = iter(lexer.token, None) + tokens = track_tokens_filter(lexer, tokens) + for token in indentation_filter(tokens): + yield token + + if add_endmarker: + lineno = 1 + if token is not None: + lineno = token.lineno + yield _new_token("ENDMARKER", lineno) + +# Combine Ply and my filters into a new lexer + + +class IndentLexer(object): + + def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): + self.lexer = lex.lex(debug=debug, optimize=optimize, + lextab=lextab, reflags=reflags) + self.token_stream = None + + def input(self, s, add_endmarker=True): + self.lexer.paren_count = 0 + self.lexer.input(s) + self.token_stream = filter(self.lexer, add_endmarker) + + def token(self): + try: + return self.token_stream.next() + except StopIteration: + return None + +########## Parser (tokens -> AST) ###### + +# also part of Ply +#import yacc + +# I use the Python AST +from compiler import ast + +# Helper function + + +def Assign(left, right): + names = [] + if isinstance(left, ast.Name): + # Single assignment on left + return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right) + elif isinstance(left, ast.Tuple): + # List of things - make sure they are Name nodes + names = [] + for child in left.getChildren(): + if not isinstance(child, ast.Name): + raise SyntaxError("that assignment not supported") + names.append(child.name) + ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names] + return ast.Assign([ast.AssTuple(ass_list)], right) + else: + raise SyntaxError("Can't do that yet") + + +# The grammar comments come from Python's Grammar/Grammar file + +# NB: compound_stmt in single_input is followed by extra NEWLINE! +# file_input: (NEWLINE | stmt)* ENDMARKER +def p_file_input_end(p): + """file_input_end : file_input ENDMARKER""" + p[0] = ast.Stmt(p[1]) + + +def p_file_input(p): + """file_input : file_input NEWLINE + | file_input stmt + | NEWLINE + | stmt""" + if isinstance(p[len(p) - 1], basestring): + if len(p) == 3: + p[0] = p[1] + else: + p[0] = [] # p == 2 --> only a blank line + else: + if len(p) == 3: + p[0] = p[1] + p[2] + else: + p[0] = p[1] + + +# funcdef: [decorators] 'def' NAME parameters ':' suite +# ignoring decorators +def p_funcdef(p): + "funcdef : DEF NAME parameters COLON suite" + p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5]) + +# parameters: '(' [varargslist] ')' + + +def p_parameters(p): + """parameters : LPAR RPAR + | LPAR varargslist RPAR""" + if len(p) == 3: + p[0] = [] + else: + p[0] = p[2] + + +# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | +# highly simplified +def p_varargslist(p): + """varargslist : varargslist COMMA NAME + | NAME""" + if len(p) == 4: + p[0] = p[1] + p[3] + else: + p[0] = [p[1]] + +# stmt: simple_stmt | compound_stmt + + +def p_stmt_simple(p): + """stmt : simple_stmt""" + # simple_stmt is a list + p[0] = p[1] + + +def p_stmt_compound(p): + """stmt : compound_stmt""" + p[0] = [p[1]] + +# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE + + +def p_simple_stmt(p): + """simple_stmt : small_stmts NEWLINE + | small_stmts SEMICOLON NEWLINE""" + p[0] = p[1] + + +def p_small_stmts(p): + """small_stmts : small_stmts SEMICOLON small_stmt + | small_stmt""" + if len(p) == 4: + p[0] = p[1] + [p[3]] + else: + p[0] = [p[1]] + +# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | +# import_stmt | global_stmt | exec_stmt | assert_stmt + + +def p_small_stmt(p): + """small_stmt : flow_stmt + | expr_stmt""" + p[0] = p[1] + +# expr_stmt: testlist (augassign (yield_expr|testlist) | +# ('=' (yield_expr|testlist))*) +# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | +# '<<=' | '>>=' | '**=' | '//=') + + +def p_expr_stmt(p): + """expr_stmt : testlist ASSIGN testlist + | testlist """ + if len(p) == 2: + # a list of expressions + p[0] = ast.Discard(p[1]) + else: + p[0] = Assign(p[1], p[3]) + + +def p_flow_stmt(p): + "flow_stmt : return_stmt" + p[0] = p[1] + +# return_stmt: 'return' [testlist] + + +def p_return_stmt(p): + "return_stmt : RETURN testlist" + p[0] = ast.Return(p[2]) + + +def p_compound_stmt(p): + """compound_stmt : if_stmt + | funcdef""" + p[0] = p[1] + + +def p_if_stmt(p): + 'if_stmt : IF test COLON suite' + p[0] = ast.If([(p[2], p[4])], None) + + +def p_suite(p): + """suite : simple_stmt + | NEWLINE INDENT stmts DEDENT""" + if len(p) == 2: + p[0] = ast.Stmt(p[1]) + else: + p[0] = ast.Stmt(p[3]) + + +def p_stmts(p): + """stmts : stmts stmt + | stmt""" + if len(p) == 3: + p[0] = p[1] + p[2] + else: + p[0] = p[1] + +# No using Python's approach because Ply supports precedence + +# comparison: expr (comp_op expr)* +# arith_expr: term (('+'|'-') term)* +# term: factor (('*'|'/'|'%'|'//') factor)* +# factor: ('+'|'-'|'~') factor | power +# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' + + +def make_lt_compare((left, right)): + return ast.Compare(left, [('<', right), ]) + + +def make_gt_compare((left, right)): + return ast.Compare(left, [('>', right), ]) + + +def make_eq_compare((left, right)): + return ast.Compare(left, [('==', right), ]) + + +binary_ops = { + "+": ast.Add, + "-": ast.Sub, + "*": ast.Mul, + "/": ast.Div, + "<": make_lt_compare, + ">": make_gt_compare, + "==": make_eq_compare, +} +unary_ops = { + "+": ast.UnaryAdd, + "-": ast.UnarySub, +} +precedence = ( + ("left", "EQ", "GT", "LT"), + ("left", "PLUS", "MINUS"), + ("left", "MULT", "DIV"), +) + + +def p_comparison(p): + """comparison : comparison PLUS comparison + | comparison MINUS comparison + | comparison MULT comparison + | comparison DIV comparison + | comparison LT comparison + | comparison EQ comparison + | comparison GT comparison + | PLUS comparison + | MINUS comparison + | power""" + if len(p) == 4: + p[0] = binary_ops[p[2]]((p[1], p[3])) + elif len(p) == 3: + p[0] = unary_ops[p[1]](p[2]) + else: + p[0] = p[1] + +# power: atom trailer* ['**' factor] +# trailers enables function calls. I only allow one level of calls +# so this is 'trailer' + + +def p_power(p): + """power : atom + | atom trailer""" + if len(p) == 2: + p[0] = p[1] + else: + if p[2][0] == "CALL": + p[0] = ast.CallFunc(p[1], p[2][1], None, None) + else: + raise AssertionError("not implemented") + + +def p_atom_name(p): + """atom : NAME""" + p[0] = ast.Name(p[1]) + + +def p_atom_number(p): + """atom : NUMBER + | STRING""" + p[0] = ast.Const(p[1]) + + +def p_atom_tuple(p): + """atom : LPAR testlist RPAR""" + p[0] = p[2] + +# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME + + +def p_trailer(p): + "trailer : LPAR arglist RPAR" + p[0] = ("CALL", p[2]) + +# testlist: test (',' test)* [','] +# Contains shift/reduce error + + +def p_testlist(p): + """testlist : testlist_multi COMMA + | testlist_multi """ + if len(p) == 2: + p[0] = p[1] + else: + # May need to promote singleton to tuple + if isinstance(p[1], list): + p[0] = p[1] + else: + p[0] = [p[1]] + # Convert into a tuple? + if isinstance(p[0], list): + p[0] = ast.Tuple(p[0]) + + +def p_testlist_multi(p): + """testlist_multi : testlist_multi COMMA test + | test""" + if len(p) == 2: + # singleton + p[0] = p[1] + else: + if isinstance(p[1], list): + p[0] = p[1] + [p[3]] + else: + # singleton -> tuple + p[0] = [p[1], p[3]] + + +# test: or_test ['if' or_test 'else' test] | lambdef +# as I don't support 'and', 'or', and 'not' this works down to 'comparison' +def p_test(p): + "test : comparison" + p[0] = p[1] + + +# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) +# XXX INCOMPLETE: this doesn't allow the trailing comma +def p_arglist(p): + """arglist : arglist COMMA argument + | argument""" + if len(p) == 4: + p[0] = p[1] + [p[3]] + else: + p[0] = [p[1]] + +# argument: test [gen_for] | test '=' test # Really [keyword '='] test + + +def p_argument(p): + "argument : test" + p[0] = p[1] + + +def p_error(p): + # print "Error!", repr(p) + raise SyntaxError(p) + + +class GardenSnakeParser(object): + + def __init__(self, lexer=None): + if lexer is None: + lexer = IndentLexer() + self.lexer = lexer + self.parser = yacc.yacc(start="file_input_end") + + def parse(self, code): + self.lexer.input(code) + result = self.parser.parse(lexer=self.lexer) + return ast.Module(None, result) + + +###### Code generation ###### + +from compiler import misc, syntax, pycodegen + + +class GardenSnakeCompiler(object): + + def __init__(self): + self.parser = GardenSnakeParser() + + def compile(self, code, filename="<string>"): + tree = self.parser.parse(code) + # print tree + misc.set_filename(filename, tree) + syntax.check(tree) + gen = pycodegen.ModuleCodeGenerator(tree) + code = gen.getCode() + return code + +####### Test code ####### + +compile = GardenSnakeCompiler().compile + +code = r""" + +print('LET\'S TRY THIS \\OUT') + +#Comment here +def x(a): + print('called with',a) + if a == 1: + return 2 + if a*2 > 10: return 999 / 4 + # Another comment here + + return a+2*3 + +ints = (1, 2, + 3, 4, +5) +print('mutiline-expression', ints) + +t = 4+1/3*2+6*(9-5+1) +print('predence test; should be 34+2/3:', t, t==(34+2/3)) + +print('numbers', 1,2,3,4,5) +if 1: + 8 + a=9 + print(x(a)) + +print(x(1)) +print(x(2)) +print(x(8),'3') +print('this is decimal', 1/5) +print('BIG DECIMAL', 1.234567891234567e12345) + +""" + +# Set up the GardenSnake run-time environment + + +def print_(*args): + print "-->", " ".join(map(str, args)) + +globals()["print"] = print_ + +compiled_code = compile(code) + +exec compiled_code in globals() +print "Done" diff --git a/third_party/python/ply/example/GardenSnake/README b/third_party/python/ply/example/GardenSnake/README new file mode 100644 index 0000000000..4d8be2db05 --- /dev/null +++ b/third_party/python/ply/example/GardenSnake/README @@ -0,0 +1,5 @@ +This example is Andrew Dalke's GardenSnake language. It shows how to process an +indentation-like language like Python. Further details can be found here: + +http://dalkescientific.com/writings/diary/archive/2006/08/30/gardensnake_language.html + diff --git a/third_party/python/ply/example/README b/third_party/python/ply/example/README new file mode 100644 index 0000000000..63519b557f --- /dev/null +++ b/third_party/python/ply/example/README @@ -0,0 +1,10 @@ +Simple examples: + calc - Simple calculator + classcalc - Simple calculate defined as a class + +Complex examples + ansic - ANSI C grammar from K&R + BASIC - A small BASIC interpreter + GardenSnake - A simple python-like language + yply - Converts Unix yacc files to PLY programs. + diff --git a/third_party/python/ply/example/ansic/README b/third_party/python/ply/example/ansic/README new file mode 100644 index 0000000000..e049d3b4e4 --- /dev/null +++ b/third_party/python/ply/example/ansic/README @@ -0,0 +1,2 @@ +This example is incomplete. Was going to specify an ANSI C parser. +This is part of it. diff --git a/third_party/python/ply/example/ansic/clex.py b/third_party/python/ply/example/ansic/clex.py new file mode 100644 index 0000000000..4bde1d730b --- /dev/null +++ b/third_party/python/ply/example/ansic/clex.py @@ -0,0 +1,168 @@ +# ---------------------------------------------------------------------- +# clex.py +# +# A lexer for ANSI C. +# ---------------------------------------------------------------------- + +import sys +sys.path.insert(0, "../..") + +import ply.lex as lex + +# Reserved words +reserved = ( + 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', + 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', + 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', + 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', +) + +tokens = reserved + ( + # Literals (identifier, integer constant, float constant, string constant, + # char const) + 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', + + # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', + 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', + 'LOR', 'LAND', 'LNOT', + 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', + + # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) + 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', + 'LSHIFTEQUAL', 'RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', + + # Increment/decrement (++,--) + 'PLUSPLUS', 'MINUSMINUS', + + # Structure dereference (->) + 'ARROW', + + # Conditional operator (?) + 'CONDOP', + + # Delimeters ( ) [ ] { } , . ; : + 'LPAREN', 'RPAREN', + 'LBRACKET', 'RBRACKET', + 'LBRACE', 'RBRACE', + 'COMMA', 'PERIOD', 'SEMI', 'COLON', + + # Ellipsis (...) + 'ELLIPSIS', +) + +# Completely ignored characters +t_ignore = ' \t\x0c' + +# Newlines + + +def t_NEWLINE(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +# Operators +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_MOD = r'%' +t_OR = r'\|' +t_AND = r'&' +t_NOT = r'~' +t_XOR = r'\^' +t_LSHIFT = r'<<' +t_RSHIFT = r'>>' +t_LOR = r'\|\|' +t_LAND = r'&&' +t_LNOT = r'!' +t_LT = r'<' +t_GT = r'>' +t_LE = r'<=' +t_GE = r'>=' +t_EQ = r'==' +t_NE = r'!=' + +# Assignment operators + +t_EQUALS = r'=' +t_TIMESEQUAL = r'\*=' +t_DIVEQUAL = r'/=' +t_MODEQUAL = r'%=' +t_PLUSEQUAL = r'\+=' +t_MINUSEQUAL = r'-=' +t_LSHIFTEQUAL = r'<<=' +t_RSHIFTEQUAL = r'>>=' +t_ANDEQUAL = r'&=' +t_OREQUAL = r'\|=' +t_XOREQUAL = r'\^=' + +# Increment/decrement +t_PLUSPLUS = r'\+\+' +t_MINUSMINUS = r'--' + +# -> +t_ARROW = r'->' + +# ? +t_CONDOP = r'\?' + +# Delimeters +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_LBRACE = r'\{' +t_RBRACE = r'\}' +t_COMMA = r',' +t_PERIOD = r'\.' +t_SEMI = r';' +t_COLON = r':' +t_ELLIPSIS = r'\.\.\.' + +# Identifiers and reserved words + +reserved_map = {} +for r in reserved: + reserved_map[r.lower()] = r + + +def t_ID(t): + r'[A-Za-z_][\w_]*' + t.type = reserved_map.get(t.value, "ID") + return t + +# Integer literal +t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' + +# Floating literal +t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' + +# String literal +t_SCONST = r'\"([^\\\n]|(\\.))*?\"' + +# Character constant 'c' or L'c' +t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' + +# Comments + + +def t_comment(t): + r'/\*(.|\n)*?\*/' + t.lexer.lineno += t.value.count('\n') + +# Preprocessor directive (ignored) + + +def t_preprocessor(t): + r'\#(.)*?\n' + t.lexer.lineno += 1 + + +def t_error(t): + print("Illegal character %s" % repr(t.value[0])) + t.lexer.skip(1) + +lexer = lex.lex() +if __name__ == "__main__": + lex.runmain(lexer) diff --git a/third_party/python/ply/example/ansic/cparse.py b/third_party/python/ply/example/ansic/cparse.py new file mode 100644 index 0000000000..5fe9bce042 --- /dev/null +++ b/third_party/python/ply/example/ansic/cparse.py @@ -0,0 +1,1048 @@ +# ----------------------------------------------------------------------------- +# cparse.py +# +# Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed. +# ----------------------------------------------------------------------------- + +import sys +import clex +import ply.yacc as yacc + +# Get the token map +tokens = clex.tokens + +# translation-unit: + + +def p_translation_unit_1(t): + 'translation_unit : external_declaration' + pass + + +def p_translation_unit_2(t): + 'translation_unit : translation_unit external_declaration' + pass + +# external-declaration: + + +def p_external_declaration_1(t): + 'external_declaration : function_definition' + pass + + +def p_external_declaration_2(t): + 'external_declaration : declaration' + pass + +# function-definition: + + +def p_function_definition_1(t): + 'function_definition : declaration_specifiers declarator declaration_list compound_statement' + pass + + +def p_function_definition_2(t): + 'function_definition : declarator declaration_list compound_statement' + pass + + +def p_function_definition_3(t): + 'function_definition : declarator compound_statement' + pass + + +def p_function_definition_4(t): + 'function_definition : declaration_specifiers declarator compound_statement' + pass + +# declaration: + + +def p_declaration_1(t): + 'declaration : declaration_specifiers init_declarator_list SEMI' + pass + + +def p_declaration_2(t): + 'declaration : declaration_specifiers SEMI' + pass + +# declaration-list: + + +def p_declaration_list_1(t): + 'declaration_list : declaration' + pass + + +def p_declaration_list_2(t): + 'declaration_list : declaration_list declaration ' + pass + +# declaration-specifiers + + +def p_declaration_specifiers_1(t): + 'declaration_specifiers : storage_class_specifier declaration_specifiers' + pass + + +def p_declaration_specifiers_2(t): + 'declaration_specifiers : type_specifier declaration_specifiers' + pass + + +def p_declaration_specifiers_3(t): + 'declaration_specifiers : type_qualifier declaration_specifiers' + pass + + +def p_declaration_specifiers_4(t): + 'declaration_specifiers : storage_class_specifier' + pass + + +def p_declaration_specifiers_5(t): + 'declaration_specifiers : type_specifier' + pass + + +def p_declaration_specifiers_6(t): + 'declaration_specifiers : type_qualifier' + pass + +# storage-class-specifier + + +def p_storage_class_specifier(t): + '''storage_class_specifier : AUTO + | REGISTER + | STATIC + | EXTERN + | TYPEDEF + ''' + pass + +# type-specifier: + + +def p_type_specifier(t): + '''type_specifier : VOID + | CHAR + | SHORT + | INT + | LONG + | FLOAT + | DOUBLE + | SIGNED + | UNSIGNED + | struct_or_union_specifier + | enum_specifier + | TYPEID + ''' + pass + +# type-qualifier: + + +def p_type_qualifier(t): + '''type_qualifier : CONST + | VOLATILE''' + pass + +# struct-or-union-specifier + + +def p_struct_or_union_specifier_1(t): + 'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE' + pass + + +def p_struct_or_union_specifier_2(t): + 'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE' + pass + + +def p_struct_or_union_specifier_3(t): + 'struct_or_union_specifier : struct_or_union ID' + pass + +# struct-or-union: + + +def p_struct_or_union(t): + '''struct_or_union : STRUCT + | UNION + ''' + pass + +# struct-declaration-list: + + +def p_struct_declaration_list_1(t): + 'struct_declaration_list : struct_declaration' + pass + + +def p_struct_declaration_list_2(t): + 'struct_declaration_list : struct_declaration_list struct_declaration' + pass + +# init-declarator-list: + + +def p_init_declarator_list_1(t): + 'init_declarator_list : init_declarator' + pass + + +def p_init_declarator_list_2(t): + 'init_declarator_list : init_declarator_list COMMA init_declarator' + pass + +# init-declarator + + +def p_init_declarator_1(t): + 'init_declarator : declarator' + pass + + +def p_init_declarator_2(t): + 'init_declarator : declarator EQUALS initializer' + pass + +# struct-declaration: + + +def p_struct_declaration(t): + 'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI' + pass + +# specifier-qualifier-list: + + +def p_specifier_qualifier_list_1(t): + 'specifier_qualifier_list : type_specifier specifier_qualifier_list' + pass + + +def p_specifier_qualifier_list_2(t): + 'specifier_qualifier_list : type_specifier' + pass + + +def p_specifier_qualifier_list_3(t): + 'specifier_qualifier_list : type_qualifier specifier_qualifier_list' + pass + + +def p_specifier_qualifier_list_4(t): + 'specifier_qualifier_list : type_qualifier' + pass + +# struct-declarator-list: + + +def p_struct_declarator_list_1(t): + 'struct_declarator_list : struct_declarator' + pass + + +def p_struct_declarator_list_2(t): + 'struct_declarator_list : struct_declarator_list COMMA struct_declarator' + pass + +# struct-declarator: + + +def p_struct_declarator_1(t): + 'struct_declarator : declarator' + pass + + +def p_struct_declarator_2(t): + 'struct_declarator : declarator COLON constant_expression' + pass + + +def p_struct_declarator_3(t): + 'struct_declarator : COLON constant_expression' + pass + +# enum-specifier: + + +def p_enum_specifier_1(t): + 'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE' + pass + + +def p_enum_specifier_2(t): + 'enum_specifier : ENUM LBRACE enumerator_list RBRACE' + pass + + +def p_enum_specifier_3(t): + 'enum_specifier : ENUM ID' + pass + +# enumerator_list: + + +def p_enumerator_list_1(t): + 'enumerator_list : enumerator' + pass + + +def p_enumerator_list_2(t): + 'enumerator_list : enumerator_list COMMA enumerator' + pass + +# enumerator: + + +def p_enumerator_1(t): + 'enumerator : ID' + pass + + +def p_enumerator_2(t): + 'enumerator : ID EQUALS constant_expression' + pass + +# declarator: + + +def p_declarator_1(t): + 'declarator : pointer direct_declarator' + pass + + +def p_declarator_2(t): + 'declarator : direct_declarator' + pass + +# direct-declarator: + + +def p_direct_declarator_1(t): + 'direct_declarator : ID' + pass + + +def p_direct_declarator_2(t): + 'direct_declarator : LPAREN declarator RPAREN' + pass + + +def p_direct_declarator_3(t): + 'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET' + pass + + +def p_direct_declarator_4(t): + 'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN ' + pass + + +def p_direct_declarator_5(t): + 'direct_declarator : direct_declarator LPAREN identifier_list RPAREN ' + pass + + +def p_direct_declarator_6(t): + 'direct_declarator : direct_declarator LPAREN RPAREN ' + pass + +# pointer: + + +def p_pointer_1(t): + 'pointer : TIMES type_qualifier_list' + pass + + +def p_pointer_2(t): + 'pointer : TIMES' + pass + + +def p_pointer_3(t): + 'pointer : TIMES type_qualifier_list pointer' + pass + + +def p_pointer_4(t): + 'pointer : TIMES pointer' + pass + +# type-qualifier-list: + + +def p_type_qualifier_list_1(t): + 'type_qualifier_list : type_qualifier' + pass + + +def p_type_qualifier_list_2(t): + 'type_qualifier_list : type_qualifier_list type_qualifier' + pass + +# parameter-type-list: + + +def p_parameter_type_list_1(t): + 'parameter_type_list : parameter_list' + pass + + +def p_parameter_type_list_2(t): + 'parameter_type_list : parameter_list COMMA ELLIPSIS' + pass + +# parameter-list: + + +def p_parameter_list_1(t): + 'parameter_list : parameter_declaration' + pass + + +def p_parameter_list_2(t): + 'parameter_list : parameter_list COMMA parameter_declaration' + pass + +# parameter-declaration: + + +def p_parameter_declaration_1(t): + 'parameter_declaration : declaration_specifiers declarator' + pass + + +def p_parameter_declaration_2(t): + 'parameter_declaration : declaration_specifiers abstract_declarator_opt' + pass + +# identifier-list: + + +def p_identifier_list_1(t): + 'identifier_list : ID' + pass + + +def p_identifier_list_2(t): + 'identifier_list : identifier_list COMMA ID' + pass + +# initializer: + + +def p_initializer_1(t): + 'initializer : assignment_expression' + pass + + +def p_initializer_2(t): + '''initializer : LBRACE initializer_list RBRACE + | LBRACE initializer_list COMMA RBRACE''' + pass + +# initializer-list: + + +def p_initializer_list_1(t): + 'initializer_list : initializer' + pass + + +def p_initializer_list_2(t): + 'initializer_list : initializer_list COMMA initializer' + pass + +# type-name: + + +def p_type_name(t): + 'type_name : specifier_qualifier_list abstract_declarator_opt' + pass + + +def p_abstract_declarator_opt_1(t): + 'abstract_declarator_opt : empty' + pass + + +def p_abstract_declarator_opt_2(t): + 'abstract_declarator_opt : abstract_declarator' + pass + +# abstract-declarator: + + +def p_abstract_declarator_1(t): + 'abstract_declarator : pointer ' + pass + + +def p_abstract_declarator_2(t): + 'abstract_declarator : pointer direct_abstract_declarator' + pass + + +def p_abstract_declarator_3(t): + 'abstract_declarator : direct_abstract_declarator' + pass + +# direct-abstract-declarator: + + +def p_direct_abstract_declarator_1(t): + 'direct_abstract_declarator : LPAREN abstract_declarator RPAREN' + pass + + +def p_direct_abstract_declarator_2(t): + 'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET' + pass + + +def p_direct_abstract_declarator_3(t): + 'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET' + pass + + +def p_direct_abstract_declarator_4(t): + 'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN' + pass + + +def p_direct_abstract_declarator_5(t): + 'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN' + pass + +# Optional fields in abstract declarators + + +def p_constant_expression_opt_1(t): + 'constant_expression_opt : empty' + pass + + +def p_constant_expression_opt_2(t): + 'constant_expression_opt : constant_expression' + pass + + +def p_parameter_type_list_opt_1(t): + 'parameter_type_list_opt : empty' + pass + + +def p_parameter_type_list_opt_2(t): + 'parameter_type_list_opt : parameter_type_list' + pass + +# statement: + + +def p_statement(t): + ''' + statement : labeled_statement + | expression_statement + | compound_statement + | selection_statement + | iteration_statement + | jump_statement + ''' + pass + +# labeled-statement: + + +def p_labeled_statement_1(t): + 'labeled_statement : ID COLON statement' + pass + + +def p_labeled_statement_2(t): + 'labeled_statement : CASE constant_expression COLON statement' + pass + + +def p_labeled_statement_3(t): + 'labeled_statement : DEFAULT COLON statement' + pass + +# expression-statement: + + +def p_expression_statement(t): + 'expression_statement : expression_opt SEMI' + pass + +# compound-statement: + + +def p_compound_statement_1(t): + 'compound_statement : LBRACE declaration_list statement_list RBRACE' + pass + + +def p_compound_statement_2(t): + 'compound_statement : LBRACE statement_list RBRACE' + pass + + +def p_compound_statement_3(t): + 'compound_statement : LBRACE declaration_list RBRACE' + pass + + +def p_compound_statement_4(t): + 'compound_statement : LBRACE RBRACE' + pass + +# statement-list: + + +def p_statement_list_1(t): + 'statement_list : statement' + pass + + +def p_statement_list_2(t): + 'statement_list : statement_list statement' + pass + +# selection-statement + + +def p_selection_statement_1(t): + 'selection_statement : IF LPAREN expression RPAREN statement' + pass + + +def p_selection_statement_2(t): + 'selection_statement : IF LPAREN expression RPAREN statement ELSE statement ' + pass + + +def p_selection_statement_3(t): + 'selection_statement : SWITCH LPAREN expression RPAREN statement ' + pass + +# iteration_statement: + + +def p_iteration_statement_1(t): + 'iteration_statement : WHILE LPAREN expression RPAREN statement' + pass + + +def p_iteration_statement_2(t): + 'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement ' + pass + + +def p_iteration_statement_3(t): + 'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI' + pass + +# jump_statement: + + +def p_jump_statement_1(t): + 'jump_statement : GOTO ID SEMI' + pass + + +def p_jump_statement_2(t): + 'jump_statement : CONTINUE SEMI' + pass + + +def p_jump_statement_3(t): + 'jump_statement : BREAK SEMI' + pass + + +def p_jump_statement_4(t): + 'jump_statement : RETURN expression_opt SEMI' + pass + + +def p_expression_opt_1(t): + 'expression_opt : empty' + pass + + +def p_expression_opt_2(t): + 'expression_opt : expression' + pass + +# expression: + + +def p_expression_1(t): + 'expression : assignment_expression' + pass + + +def p_expression_2(t): + 'expression : expression COMMA assignment_expression' + pass + +# assigment_expression: + + +def p_assignment_expression_1(t): + 'assignment_expression : conditional_expression' + pass + + +def p_assignment_expression_2(t): + 'assignment_expression : unary_expression assignment_operator assignment_expression' + pass + +# assignment_operator: + + +def p_assignment_operator(t): + ''' + assignment_operator : EQUALS + | TIMESEQUAL + | DIVEQUAL + | MODEQUAL + | PLUSEQUAL + | MINUSEQUAL + | LSHIFTEQUAL + | RSHIFTEQUAL + | ANDEQUAL + | OREQUAL + | XOREQUAL + ''' + pass + +# conditional-expression + + +def p_conditional_expression_1(t): + 'conditional_expression : logical_or_expression' + pass + + +def p_conditional_expression_2(t): + 'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression ' + pass + +# constant-expression + + +def p_constant_expression(t): + 'constant_expression : conditional_expression' + pass + +# logical-or-expression + + +def p_logical_or_expression_1(t): + 'logical_or_expression : logical_and_expression' + pass + + +def p_logical_or_expression_2(t): + 'logical_or_expression : logical_or_expression LOR logical_and_expression' + pass + +# logical-and-expression + + +def p_logical_and_expression_1(t): + 'logical_and_expression : inclusive_or_expression' + pass + + +def p_logical_and_expression_2(t): + 'logical_and_expression : logical_and_expression LAND inclusive_or_expression' + pass + +# inclusive-or-expression: + + +def p_inclusive_or_expression_1(t): + 'inclusive_or_expression : exclusive_or_expression' + pass + + +def p_inclusive_or_expression_2(t): + 'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression' + pass + +# exclusive-or-expression: + + +def p_exclusive_or_expression_1(t): + 'exclusive_or_expression : and_expression' + pass + + +def p_exclusive_or_expression_2(t): + 'exclusive_or_expression : exclusive_or_expression XOR and_expression' + pass + +# AND-expression + + +def p_and_expression_1(t): + 'and_expression : equality_expression' + pass + + +def p_and_expression_2(t): + 'and_expression : and_expression AND equality_expression' + pass + + +# equality-expression: +def p_equality_expression_1(t): + 'equality_expression : relational_expression' + pass + + +def p_equality_expression_2(t): + 'equality_expression : equality_expression EQ relational_expression' + pass + + +def p_equality_expression_3(t): + 'equality_expression : equality_expression NE relational_expression' + pass + + +# relational-expression: +def p_relational_expression_1(t): + 'relational_expression : shift_expression' + pass + + +def p_relational_expression_2(t): + 'relational_expression : relational_expression LT shift_expression' + pass + + +def p_relational_expression_3(t): + 'relational_expression : relational_expression GT shift_expression' + pass + + +def p_relational_expression_4(t): + 'relational_expression : relational_expression LE shift_expression' + pass + + +def p_relational_expression_5(t): + 'relational_expression : relational_expression GE shift_expression' + pass + +# shift-expression + + +def p_shift_expression_1(t): + 'shift_expression : additive_expression' + pass + + +def p_shift_expression_2(t): + 'shift_expression : shift_expression LSHIFT additive_expression' + pass + + +def p_shift_expression_3(t): + 'shift_expression : shift_expression RSHIFT additive_expression' + pass + +# additive-expression + + +def p_additive_expression_1(t): + 'additive_expression : multiplicative_expression' + pass + + +def p_additive_expression_2(t): + 'additive_expression : additive_expression PLUS multiplicative_expression' + pass + + +def p_additive_expression_3(t): + 'additive_expression : additive_expression MINUS multiplicative_expression' + pass + +# multiplicative-expression + + +def p_multiplicative_expression_1(t): + 'multiplicative_expression : cast_expression' + pass + + +def p_multiplicative_expression_2(t): + 'multiplicative_expression : multiplicative_expression TIMES cast_expression' + pass + + +def p_multiplicative_expression_3(t): + 'multiplicative_expression : multiplicative_expression DIVIDE cast_expression' + pass + + +def p_multiplicative_expression_4(t): + 'multiplicative_expression : multiplicative_expression MOD cast_expression' + pass + +# cast-expression: + + +def p_cast_expression_1(t): + 'cast_expression : unary_expression' + pass + + +def p_cast_expression_2(t): + 'cast_expression : LPAREN type_name RPAREN cast_expression' + pass + +# unary-expression: + + +def p_unary_expression_1(t): + 'unary_expression : postfix_expression' + pass + + +def p_unary_expression_2(t): + 'unary_expression : PLUSPLUS unary_expression' + pass + + +def p_unary_expression_3(t): + 'unary_expression : MINUSMINUS unary_expression' + pass + + +def p_unary_expression_4(t): + 'unary_expression : unary_operator cast_expression' + pass + + +def p_unary_expression_5(t): + 'unary_expression : SIZEOF unary_expression' + pass + + +def p_unary_expression_6(t): + 'unary_expression : SIZEOF LPAREN type_name RPAREN' + pass + +# unary-operator + + +def p_unary_operator(t): + '''unary_operator : AND + | TIMES + | PLUS + | MINUS + | NOT + | LNOT ''' + pass + +# postfix-expression: + + +def p_postfix_expression_1(t): + 'postfix_expression : primary_expression' + pass + + +def p_postfix_expression_2(t): + 'postfix_expression : postfix_expression LBRACKET expression RBRACKET' + pass + + +def p_postfix_expression_3(t): + 'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN' + pass + + +def p_postfix_expression_4(t): + 'postfix_expression : postfix_expression LPAREN RPAREN' + pass + + +def p_postfix_expression_5(t): + 'postfix_expression : postfix_expression PERIOD ID' + pass + + +def p_postfix_expression_6(t): + 'postfix_expression : postfix_expression ARROW ID' + pass + + +def p_postfix_expression_7(t): + 'postfix_expression : postfix_expression PLUSPLUS' + pass + + +def p_postfix_expression_8(t): + 'postfix_expression : postfix_expression MINUSMINUS' + pass + +# primary-expression: + + +def p_primary_expression(t): + '''primary_expression : ID + | constant + | SCONST + | LPAREN expression RPAREN''' + pass + +# argument-expression-list: + + +def p_argument_expression_list(t): + '''argument_expression_list : assignment_expression + | argument_expression_list COMMA assignment_expression''' + pass + +# constant: + + +def p_constant(t): + '''constant : ICONST + | FCONST + | CCONST''' + pass + + +def p_empty(t): + 'empty : ' + pass + + +def p_error(t): + print("Whoa. We're hosed") + +import profile +# Build the grammar + +yacc.yacc() +#yacc.yacc(method='LALR',write_tables=False,debug=False) + +#profile.run("yacc.yacc(method='LALR')") diff --git a/third_party/python/ply/example/calc/calc.py b/third_party/python/ply/example/calc/calc.py new file mode 100644 index 0000000000..824c3d7d0a --- /dev/null +++ b/third_party/python/ply/example/calc/calc.py @@ -0,0 +1,123 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, "../..") + +if sys.version_info[0] >= 3: + raw_input = input + +tokens = ( + 'NAME', 'NUMBER', +) + +literals = ['=', '+', '-', '*', '/', '(', ')'] + +# Tokens + +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + +def t_NUMBER(t): + r'\d+' + t.value = int(t.value) + return t + +t_ignore = " \t" + + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +import ply.lex as lex +lex.lex() + +# Parsing rules + +precedence = ( + ('left', '+', '-'), + ('left', '*', '/'), + ('right', 'UMINUS'), +) + +# dictionary of names +names = {} + + +def p_statement_assign(p): + 'statement : NAME "=" expression' + names[p[1]] = p[3] + + +def p_statement_expr(p): + 'statement : expression' + print(p[1]) + + +def p_expression_binop(p): + '''expression : expression '+' expression + | expression '-' expression + | expression '*' expression + | expression '/' expression''' + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] + + +def p_expression_uminus(p): + "expression : '-' expression %prec UMINUS" + p[0] = -p[2] + + +def p_expression_group(p): + "expression : '(' expression ')'" + p[0] = p[2] + + +def p_expression_number(p): + "expression : NUMBER" + p[0] = p[1] + + +def p_expression_name(p): + "expression : NAME" + try: + p[0] = names[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 + + +def p_error(p): + if p: + print("Syntax error at '%s'" % p.value) + else: + print("Syntax error at EOF") + +import ply.yacc as yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: + continue + yacc.parse(s) diff --git a/third_party/python/ply/example/calcdebug/calc.py b/third_party/python/ply/example/calcdebug/calc.py new file mode 100644 index 0000000000..06831e2ca5 --- /dev/null +++ b/third_party/python/ply/example/calcdebug/calc.py @@ -0,0 +1,129 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# This example shows how to run the parser in a debugging mode +# with output routed to a logging object. +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, "../..") + +if sys.version_info[0] >= 3: + raw_input = input + +tokens = ( + 'NAME', 'NUMBER', +) + +literals = ['=', '+', '-', '*', '/', '(', ')'] + +# Tokens + +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + +def t_NUMBER(t): + r'\d+' + t.value = int(t.value) + return t + +t_ignore = " \t" + + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +import ply.lex as lex +lex.lex() + +# Parsing rules + +precedence = ( + ('left', '+', '-'), + ('left', '*', '/'), + ('right', 'UMINUS'), +) + +# dictionary of names +names = {} + + +def p_statement_assign(p): + 'statement : NAME "=" expression' + names[p[1]] = p[3] + + +def p_statement_expr(p): + 'statement : expression' + print(p[1]) + + +def p_expression_binop(p): + '''expression : expression '+' expression + | expression '-' expression + | expression '*' expression + | expression '/' expression''' + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] + + +def p_expression_uminus(p): + "expression : '-' expression %prec UMINUS" + p[0] = -p[2] + + +def p_expression_group(p): + "expression : '(' expression ')'" + p[0] = p[2] + + +def p_expression_number(p): + "expression : NUMBER" + p[0] = p[1] + + +def p_expression_name(p): + "expression : NAME" + try: + p[0] = names[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 + + +def p_error(p): + if p: + print("Syntax error at '%s'" % p.value) + else: + print("Syntax error at EOF") + +import ply.yacc as yacc +yacc.yacc() + +import logging +logging.basicConfig( + level=logging.INFO, + filename="parselog.txt" +) + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: + continue + yacc.parse(s, debug=logging.getLogger()) diff --git a/third_party/python/ply/example/calceof/calc.py b/third_party/python/ply/example/calceof/calc.py new file mode 100644 index 0000000000..22b39a41a8 --- /dev/null +++ b/third_party/python/ply/example/calceof/calc.py @@ -0,0 +1,132 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. Asks the user for more input and +# demonstrates the use of the t_eof() rule. +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, "../..") + +if sys.version_info[0] >= 3: + raw_input = input + +tokens = ( + 'NAME', 'NUMBER', +) + +literals = ['=', '+', '-', '*', '/', '(', ')'] + +# Tokens + +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + +def t_NUMBER(t): + r'\d+' + t.value = int(t.value) + return t + +t_ignore = " \t" + + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + +def t_eof(t): + more = raw_input('... ') + if more: + t.lexer.input(more + '\n') + return t.lexer.token() + else: + return None + + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +import ply.lex as lex +lex.lex() + +# Parsing rules + +precedence = ( + ('left', '+', '-'), + ('left', '*', '/'), + ('right', 'UMINUS'), +) + +# dictionary of names +names = {} + + +def p_statement_assign(p): + 'statement : NAME "=" expression' + names[p[1]] = p[3] + + +def p_statement_expr(p): + 'statement : expression' + print(p[1]) + + +def p_expression_binop(p): + '''expression : expression '+' expression + | expression '-' expression + | expression '*' expression + | expression '/' expression''' + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] + + +def p_expression_uminus(p): + "expression : '-' expression %prec UMINUS" + p[0] = -p[2] + + +def p_expression_group(p): + "expression : '(' expression ')'" + p[0] = p[2] + + +def p_expression_number(p): + "expression : NUMBER" + p[0] = p[1] + + +def p_expression_name(p): + "expression : NAME" + try: + p[0] = names[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 + + +def p_error(p): + if p: + print("Syntax error at '%s'" % p.value) + else: + print("Syntax error at EOF") + +import ply.yacc as yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: + continue + yacc.parse(s + '\n') diff --git a/third_party/python/ply/example/classcalc/calc.py b/third_party/python/ply/example/classcalc/calc.py new file mode 100755 index 0000000000..ada4afd426 --- /dev/null +++ b/third_party/python/ply/example/classcalc/calc.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python + +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# +# Class-based example contributed to PLY by David McNab +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, "../..") + +if sys.version_info[0] >= 3: + raw_input = input + +import ply.lex as lex +import ply.yacc as yacc +import os + + +class Parser: + """ + Base class for a lexer/parser that has the rules defined as methods + """ + tokens = () + precedence = () + + def __init__(self, **kw): + self.debug = kw.get('debug', 0) + self.names = {} + try: + modname = os.path.split(os.path.splitext(__file__)[0])[ + 1] + "_" + self.__class__.__name__ + except: + modname = "parser" + "_" + self.__class__.__name__ + self.debugfile = modname + ".dbg" + self.tabmodule = modname + "_" + "parsetab" + # print self.debugfile, self.tabmodule + + # Build the lexer and parser + lex.lex(module=self, debug=self.debug) + yacc.yacc(module=self, + debug=self.debug, + debugfile=self.debugfile, + tabmodule=self.tabmodule) + + def run(self): + while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: + continue + yacc.parse(s) + + +class Calc(Parser): + + tokens = ( + 'NAME', 'NUMBER', + 'PLUS', 'MINUS', 'EXP', 'TIMES', 'DIVIDE', 'EQUALS', + 'LPAREN', 'RPAREN', + ) + + # Tokens + + t_PLUS = r'\+' + t_MINUS = r'-' + t_EXP = r'\*\*' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + def t_NUMBER(self, t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + # print "parsed number %s" % repr(t.value) + return t + + t_ignore = " \t" + + def t_newline(self, t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + def t_error(self, t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + + # Parsing rules + + precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('left', 'EXP'), + ('right', 'UMINUS'), + ) + + def p_statement_assign(self, p): + 'statement : NAME EQUALS expression' + self.names[p[1]] = p[3] + + def p_statement_expr(self, p): + 'statement : expression' + print(p[1]) + + def p_expression_binop(self, p): + """ + expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression + | expression EXP expression + """ + # print [repr(p[i]) for i in range(0,4)] + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] + elif p[2] == '**': + p[0] = p[1] ** p[3] + + def p_expression_uminus(self, p): + 'expression : MINUS expression %prec UMINUS' + p[0] = -p[2] + + def p_expression_group(self, p): + 'expression : LPAREN expression RPAREN' + p[0] = p[2] + + def p_expression_number(self, p): + 'expression : NUMBER' + p[0] = p[1] + + def p_expression_name(self, p): + 'expression : NAME' + try: + p[0] = self.names[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 + + def p_error(self, p): + if p: + print("Syntax error at '%s'" % p.value) + else: + print("Syntax error at EOF") + +if __name__ == '__main__': + calc = Calc() + calc.run() diff --git a/third_party/python/ply/example/cleanup.sh b/third_party/python/ply/example/cleanup.sh new file mode 100755 index 0000000000..3e115f41c4 --- /dev/null +++ b/third_party/python/ply/example/cleanup.sh @@ -0,0 +1,2 @@ +#!/bin/sh +rm -f */*.pyc */parsetab.py */parser.out */*~ */*.class diff --git a/third_party/python/ply/example/closurecalc/calc.py b/third_party/python/ply/example/closurecalc/calc.py new file mode 100644 index 0000000000..6031b05813 --- /dev/null +++ b/third_party/python/ply/example/closurecalc/calc.py @@ -0,0 +1,132 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A calculator parser that makes use of closures. The function make_calculator() +# returns a function that accepts an input string and returns a result. All +# lexing rules, parsing rules, and internal state are held inside the function. +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, "../..") + +if sys.version_info[0] >= 3: + raw_input = input + +# Make a calculator function + + +def make_calculator(): + import ply.lex as lex + import ply.yacc as yacc + + # ------- Internal calculator state + + variables = {} # Dictionary of stored variables + + # ------- Calculator tokenizing rules + + tokens = ( + 'NAME', 'NUMBER', + ) + + literals = ['=', '+', '-', '*', '/', '(', ')'] + + t_ignore = " \t" + + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + def t_NUMBER(t): + r'\d+' + t.value = int(t.value) + return t + + def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + + # Build the lexer + lexer = lex.lex() + + # ------- Calculator parsing rules + + precedence = ( + ('left', '+', '-'), + ('left', '*', '/'), + ('right', 'UMINUS'), + ) + + def p_statement_assign(p): + 'statement : NAME "=" expression' + variables[p[1]] = p[3] + p[0] = None + + def p_statement_expr(p): + 'statement : expression' + p[0] = p[1] + + def p_expression_binop(p): + '''expression : expression '+' expression + | expression '-' expression + | expression '*' expression + | expression '/' expression''' + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] + + def p_expression_uminus(p): + "expression : '-' expression %prec UMINUS" + p[0] = -p[2] + + def p_expression_group(p): + "expression : '(' expression ')'" + p[0] = p[2] + + def p_expression_number(p): + "expression : NUMBER" + p[0] = p[1] + + def p_expression_name(p): + "expression : NAME" + try: + p[0] = variables[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 + + def p_error(p): + if p: + print("Syntax error at '%s'" % p.value) + else: + print("Syntax error at EOF") + + # Build the parser + parser = yacc.yacc() + + # ------- Input function + + def input(text): + result = parser.parse(text, lexer=lexer) + return result + + return input + +# Make a calculator object and use it +calc = make_calculator() + +while True: + try: + s = raw_input("calc > ") + except EOFError: + break + r = calc(s) + if r: + print(r) diff --git a/third_party/python/ply/example/hedit/hedit.py b/third_party/python/ply/example/hedit/hedit.py new file mode 100644 index 0000000000..32da745677 --- /dev/null +++ b/third_party/python/ply/example/hedit/hedit.py @@ -0,0 +1,48 @@ +# ----------------------------------------------------------------------------- +# hedit.py +# +# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) +# +# These tokens can't be easily tokenized because they are of the following +# form: +# +# nHc1...cn +# +# where n is a positive integer and c1 ... cn are characters. +# +# This example shows how to modify the state of the lexer to parse +# such tokens +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, "../..") + + +tokens = ( + 'H_EDIT_DESCRIPTOR', +) + +# Tokens +t_ignore = " \t\n" + + +def t_H_EDIT_DESCRIPTOR(t): + r"\d+H.*" # This grabs all of the remaining text + i = t.value.index('H') + n = eval(t.value[:i]) + + # Adjust the tokenizing position + t.lexer.lexpos -= len(t.value) - (i + 1 + n) + + t.value = t.value[i + 1:i + 1 + n] + return t + + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +import ply.lex as lex +lex.lex() +lex.runmain() diff --git a/third_party/python/ply/example/newclasscalc/calc.py b/third_party/python/ply/example/newclasscalc/calc.py new file mode 100755 index 0000000000..43c9506a8a --- /dev/null +++ b/third_party/python/ply/example/newclasscalc/calc.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python + +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# +# Class-based example contributed to PLY by David McNab. +# +# Modified to use new-style classes. Test case. +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, "../..") + +if sys.version_info[0] >= 3: + raw_input = input + +import ply.lex as lex +import ply.yacc as yacc +import os + + +class Parser(object): + """ + Base class for a lexer/parser that has the rules defined as methods + """ + tokens = () + precedence = () + + def __init__(self, **kw): + self.debug = kw.get('debug', 0) + self.names = {} + try: + modname = os.path.split(os.path.splitext(__file__)[0])[ + 1] + "_" + self.__class__.__name__ + except: + modname = "parser" + "_" + self.__class__.__name__ + self.debugfile = modname + ".dbg" + self.tabmodule = modname + "_" + "parsetab" + # print self.debugfile, self.tabmodule + + # Build the lexer and parser + lex.lex(module=self, debug=self.debug) + yacc.yacc(module=self, + debug=self.debug, + debugfile=self.debugfile, + tabmodule=self.tabmodule) + + def run(self): + while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: + continue + yacc.parse(s) + + +class Calc(Parser): + + tokens = ( + 'NAME', 'NUMBER', + 'PLUS', 'MINUS', 'EXP', 'TIMES', 'DIVIDE', 'EQUALS', + 'LPAREN', 'RPAREN', + ) + + # Tokens + + t_PLUS = r'\+' + t_MINUS = r'-' + t_EXP = r'\*\*' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + def t_NUMBER(self, t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + # print "parsed number %s" % repr(t.value) + return t + + t_ignore = " \t" + + def t_newline(self, t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + def t_error(self, t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + + # Parsing rules + + precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('left', 'EXP'), + ('right', 'UMINUS'), + ) + + def p_statement_assign(self, p): + 'statement : NAME EQUALS expression' + self.names[p[1]] = p[3] + + def p_statement_expr(self, p): + 'statement : expression' + print(p[1]) + + def p_expression_binop(self, p): + """ + expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression + | expression EXP expression + """ + # print [repr(p[i]) for i in range(0,4)] + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] + elif p[2] == '**': + p[0] = p[1] ** p[3] + + def p_expression_uminus(self, p): + 'expression : MINUS expression %prec UMINUS' + p[0] = -p[2] + + def p_expression_group(self, p): + 'expression : LPAREN expression RPAREN' + p[0] = p[2] + + def p_expression_number(self, p): + 'expression : NUMBER' + p[0] = p[1] + + def p_expression_name(self, p): + 'expression : NAME' + try: + p[0] = self.names[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 + + def p_error(self, p): + if p: + print("Syntax error at '%s'" % p.value) + else: + print("Syntax error at EOF") + +if __name__ == '__main__': + calc = Calc() + calc.run() diff --git a/third_party/python/ply/example/optcalc/README b/third_party/python/ply/example/optcalc/README new file mode 100644 index 0000000000..53dd5fcd55 --- /dev/null +++ b/third_party/python/ply/example/optcalc/README @@ -0,0 +1,9 @@ +An example showing how to use Python optimized mode. +To run: + + - First run 'python calc.py' + + - Then run 'python -OO calc.py' + +If working correctly, the second version should run the +same way. diff --git a/third_party/python/ply/example/optcalc/calc.py b/third_party/python/ply/example/optcalc/calc.py new file mode 100644 index 0000000000..0c223e5994 --- /dev/null +++ b/third_party/python/ply/example/optcalc/calc.py @@ -0,0 +1,134 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, "../..") + +if sys.version_info[0] >= 3: + raw_input = input + +tokens = ( + 'NAME', 'NUMBER', + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'EQUALS', + 'LPAREN', 'RPAREN', +) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +import ply.lex as lex +lex.lex(optimize=1) + +# Parsing rules + +precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), +) + +# dictionary of names +names = {} + + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+': + t[0] = t[1] + t[3] + elif t[2] == '-': + t[0] = t[1] - t[3] + elif t[2] == '*': + t[0] = t[1] * t[3] + elif t[2] == '/': + t[0] = t[1] / t[3] + elif t[2] == '<': + t[0] = t[1] < t[3] + + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + + +def p_error(t): + if t: + print("Syntax error at '%s'" % t.value) + else: + print("Syntax error at EOF") + +import ply.yacc as yacc +yacc.yacc(optimize=1) + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + yacc.parse(s) diff --git a/third_party/python/ply/example/unicalc/calc.py b/third_party/python/ply/example/unicalc/calc.py new file mode 100644 index 0000000000..901c4b9d76 --- /dev/null +++ b/third_party/python/ply/example/unicalc/calc.py @@ -0,0 +1,133 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# +# This example uses unicode strings for tokens, docstrings, and input. +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, "../..") + +tokens = ( + 'NAME', 'NUMBER', + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'EQUALS', + 'LPAREN', 'RPAREN', +) + +# Tokens + +t_PLUS = ur'\+' +t_MINUS = ur'-' +t_TIMES = ur'\*' +t_DIVIDE = ur'/' +t_EQUALS = ur'=' +t_LPAREN = ur'\(' +t_RPAREN = ur'\)' +t_NAME = ur'[a-zA-Z_][a-zA-Z0-9_]*' + + +def t_NUMBER(t): + ur'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +t_ignore = u" \t" + + +def t_newline(t): + ur'\n+' + t.lexer.lineno += t.value.count("\n") + + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.lexer.skip(1) + +# Build the lexer +import ply.lex as lex +lex.lex() + +# Parsing rules + +precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), +) + +# dictionary of names +names = {} + + +def p_statement_assign(p): + 'statement : NAME EQUALS expression' + names[p[1]] = p[3] + + +def p_statement_expr(p): + 'statement : expression' + print p[1] + + +def p_expression_binop(p): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if p[2] == u'+': + p[0] = p[1] + p[3] + elif p[2] == u'-': + p[0] = p[1] - p[3] + elif p[2] == u'*': + p[0] = p[1] * p[3] + elif p[2] == u'/': + p[0] = p[1] / p[3] + + +def p_expression_uminus(p): + 'expression : MINUS expression %prec UMINUS' + p[0] = -p[2] + + +def p_expression_group(p): + 'expression : LPAREN expression RPAREN' + p[0] = p[2] + + +def p_expression_number(p): + 'expression : NUMBER' + p[0] = p[1] + + +def p_expression_name(p): + 'expression : NAME' + try: + p[0] = names[p[1]] + except LookupError: + print "Undefined name '%s'" % p[1] + p[0] = 0 + + +def p_error(p): + if p: + print "Syntax error at '%s'" % p.value + else: + print "Syntax error at EOF" + +import ply.yacc as yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: + continue + yacc.parse(unicode(s)) diff --git a/third_party/python/ply/example/yply/README b/third_party/python/ply/example/yply/README new file mode 100644 index 0000000000..bfadf36436 --- /dev/null +++ b/third_party/python/ply/example/yply/README @@ -0,0 +1,41 @@ +yply.py + +This example implements a program yply.py that converts a UNIX-yacc +specification file into a PLY-compatible program. To use, simply +run it like this: + + % python yply.py [-nocode] inputfile.y >myparser.py + +The output of this program is Python code. In the output, +any C code in the original file is included, but is commented out. +If you use the -nocode option, then all of the C code in the +original file is just discarded. + +To use the resulting grammer with PLY, you'll need to edit the +myparser.py file. Within this file, some stub code is included that +can be used to test the construction of the parsing tables. However, +you'll need to do more editing to make a workable parser. + +Disclaimer: This just an example I threw together in an afternoon. +It might have some bugs. However, it worked when I tried it on +a yacc-specified C++ parser containing 442 rules and 855 parsing +states. + +Comments: + +1. This example does not parse specification files meant for lex/flex. + You'll need to specify the tokenizer on your own. + +2. This example shows a number of interesting PLY features including + + - Parsing of literal text delimited by nested parentheses + - Some interaction between the parser and the lexer. + - Use of literals in the grammar specification + - One pass compilation. The program just emits the result, + there is no intermediate parse tree. + +3. This program could probably be cleaned up and enhanced a lot. + It would be great if someone wanted to work on this (hint). + +-Dave + diff --git a/third_party/python/ply/example/yply/ylex.py b/third_party/python/ply/example/yply/ylex.py new file mode 100644 index 0000000000..16410e250e --- /dev/null +++ b/third_party/python/ply/example/yply/ylex.py @@ -0,0 +1,119 @@ +# lexer for yacc-grammars +# +# Author: David Beazley (dave@dabeaz.com) +# Date : October 2, 2006 + +import sys +sys.path.append("../..") + +from ply import * + +tokens = ( + 'LITERAL', 'SECTION', 'TOKEN', 'LEFT', 'RIGHT', 'PREC', 'START', 'TYPE', 'NONASSOC', 'UNION', 'CODE', + 'ID', 'QLITERAL', 'NUMBER', +) + +states = (('code', 'exclusive'),) + +literals = [';', ',', '<', '>', '|', ':'] +t_ignore = ' \t' + +t_TOKEN = r'%token' +t_LEFT = r'%left' +t_RIGHT = r'%right' +t_NONASSOC = r'%nonassoc' +t_PREC = r'%prec' +t_START = r'%start' +t_TYPE = r'%type' +t_UNION = r'%union' +t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*' +t_QLITERAL = r'''(?P<quote>['"]).*?(?P=quote)''' +t_NUMBER = r'\d+' + + +def t_SECTION(t): + r'%%' + if getattr(t.lexer, "lastsection", 0): + t.value = t.lexer.lexdata[t.lexpos + 2:] + t.lexer.lexpos = len(t.lexer.lexdata) + else: + t.lexer.lastsection = 0 + return t + +# Comments + + +def t_ccomment(t): + r'/\*(.|\n)*?\*/' + t.lexer.lineno += t.value.count('\n') + +t_ignore_cppcomment = r'//.*' + + +def t_LITERAL(t): + r'%\{(.|\n)*?%\}' + t.lexer.lineno += t.value.count("\n") + return t + + +def t_NEWLINE(t): + r'\n' + t.lexer.lineno += 1 + + +def t_code(t): + r'\{' + t.lexer.codestart = t.lexpos + t.lexer.level = 1 + t.lexer.begin('code') + + +def t_code_ignore_string(t): + r'\"([^\\\n]|(\\.))*?\"' + + +def t_code_ignore_char(t): + r'\'([^\\\n]|(\\.))*?\'' + + +def t_code_ignore_comment(t): + r'/\*(.|\n)*?\*/' + + +def t_code_ignore_cppcom(t): + r'//.*' + + +def t_code_lbrace(t): + r'\{' + t.lexer.level += 1 + + +def t_code_rbrace(t): + r'\}' + t.lexer.level -= 1 + if t.lexer.level == 0: + t.type = 'CODE' + t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos + 1] + t.lexer.begin('INITIAL') + t.lexer.lineno += t.value.count('\n') + return t + +t_code_ignore_nonspace = r'[^\s\}\'\"\{]+' +t_code_ignore_whitespace = r'\s+' +t_code_ignore = "" + + +def t_code_error(t): + raise RuntimeError + + +def t_error(t): + print("%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0])) + print(t.value) + t.lexer.skip(1) + +lex.lex() + +if __name__ == '__main__': + lex.runmain() diff --git a/third_party/python/ply/example/yply/yparse.py b/third_party/python/ply/example/yply/yparse.py new file mode 100644 index 0000000000..1f2e8d0922 --- /dev/null +++ b/third_party/python/ply/example/yply/yparse.py @@ -0,0 +1,244 @@ +# parser for Unix yacc-based grammars +# +# Author: David Beazley (dave@dabeaz.com) +# Date : October 2, 2006 + +import ylex +tokens = ylex.tokens + +from ply import * + +tokenlist = [] +preclist = [] + +emit_code = 1 + + +def p_yacc(p): + '''yacc : defsection rulesection''' + + +def p_defsection(p): + '''defsection : definitions SECTION + | SECTION''' + p.lexer.lastsection = 1 + print("tokens = ", repr(tokenlist)) + print() + print("precedence = ", repr(preclist)) + print() + print("# -------------- RULES ----------------") + print() + + +def p_rulesection(p): + '''rulesection : rules SECTION''' + + print("# -------------- RULES END ----------------") + print_code(p[2], 0) + + +def p_definitions(p): + '''definitions : definitions definition + | definition''' + + +def p_definition_literal(p): + '''definition : LITERAL''' + print_code(p[1], 0) + + +def p_definition_start(p): + '''definition : START ID''' + print("start = '%s'" % p[2]) + + +def p_definition_token(p): + '''definition : toktype opttype idlist optsemi ''' + for i in p[3]: + if i[0] not in "'\"": + tokenlist.append(i) + if p[1] == '%left': + preclist.append(('left',) + tuple(p[3])) + elif p[1] == '%right': + preclist.append(('right',) + tuple(p[3])) + elif p[1] == '%nonassoc': + preclist.append(('nonassoc',) + tuple(p[3])) + + +def p_toktype(p): + '''toktype : TOKEN + | LEFT + | RIGHT + | NONASSOC''' + p[0] = p[1] + + +def p_opttype(p): + '''opttype : '<' ID '>' + | empty''' + + +def p_idlist(p): + '''idlist : idlist optcomma tokenid + | tokenid''' + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = p[1] + p[1].append(p[3]) + + +def p_tokenid(p): + '''tokenid : ID + | ID NUMBER + | QLITERAL + | QLITERAL NUMBER''' + p[0] = p[1] + + +def p_optsemi(p): + '''optsemi : ';' + | empty''' + + +def p_optcomma(p): + '''optcomma : ',' + | empty''' + + +def p_definition_type(p): + '''definition : TYPE '<' ID '>' namelist optsemi''' + # type declarations are ignored + + +def p_namelist(p): + '''namelist : namelist optcomma ID + | ID''' + + +def p_definition_union(p): + '''definition : UNION CODE optsemi''' + # Union declarations are ignored + + +def p_rules(p): + '''rules : rules rule + | rule''' + if len(p) == 2: + rule = p[1] + else: + rule = p[2] + + # Print out a Python equivalent of this rule + + embedded = [] # Embedded actions (a mess) + embed_count = 0 + + rulename = rule[0] + rulecount = 1 + for r in rule[1]: + # r contains one of the rule possibilities + print("def p_%s_%d(p):" % (rulename, rulecount)) + prod = [] + prodcode = "" + for i in range(len(r)): + item = r[i] + if item[0] == '{': # A code block + if i == len(r) - 1: + prodcode = item + break + else: + # an embedded action + embed_name = "_embed%d_%s" % (embed_count, rulename) + prod.append(embed_name) + embedded.append((embed_name, item)) + embed_count += 1 + else: + prod.append(item) + print(" '''%s : %s'''" % (rulename, " ".join(prod))) + # Emit code + print_code(prodcode, 4) + print() + rulecount += 1 + + for e, code in embedded: + print("def p_%s(p):" % e) + print(" '''%s : '''" % e) + print_code(code, 4) + print() + + +def p_rule(p): + '''rule : ID ':' rulelist ';' ''' + p[0] = (p[1], [p[3]]) + + +def p_rule2(p): + '''rule : ID ':' rulelist morerules ';' ''' + p[4].insert(0, p[3]) + p[0] = (p[1], p[4]) + + +def p_rule_empty(p): + '''rule : ID ':' ';' ''' + p[0] = (p[1], [[]]) + + +def p_rule_empty2(p): + '''rule : ID ':' morerules ';' ''' + + p[3].insert(0, []) + p[0] = (p[1], p[3]) + + +def p_morerules(p): + '''morerules : morerules '|' rulelist + | '|' rulelist + | '|' ''' + + if len(p) == 2: + p[0] = [[]] + elif len(p) == 3: + p[0] = [p[2]] + else: + p[0] = p[1] + p[0].append(p[3]) + +# print("morerules", len(p), p[0]) + + +def p_rulelist(p): + '''rulelist : rulelist ruleitem + | ruleitem''' + + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = p[1] + p[1].append(p[2]) + + +def p_ruleitem(p): + '''ruleitem : ID + | QLITERAL + | CODE + | PREC''' + p[0] = p[1] + + +def p_empty(p): + '''empty : ''' + + +def p_error(p): + pass + +yacc.yacc(debug=0) + + +def print_code(code, indent): + if not emit_code: + return + codelines = code.splitlines() + for c in codelines: + print("%s# %s" % (" " * indent, c)) diff --git a/third_party/python/ply/example/yply/yply.py b/third_party/python/ply/example/yply/yply.py new file mode 100755 index 0000000000..e24616c831 --- /dev/null +++ b/third_party/python/ply/example/yply/yply.py @@ -0,0 +1,51 @@ +#!/usr/local/bin/python +# yply.py +# +# Author: David Beazley (dave@dabeaz.com) +# Date : October 2, 2006 +# +# Converts a UNIX-yacc specification file into a PLY-compatible +# specification. To use, simply do this: +# +# % python yply.py [-nocode] inputfile.y >myparser.py +# +# The output of this program is Python code. In the output, +# any C code in the original file is included, but is commented. +# If you use the -nocode option, then all of the C code in the +# original file is discarded. +# +# Disclaimer: This just an example I threw together in an afternoon. +# It might have some bugs. However, it worked when I tried it on +# a yacc-specified C++ parser containing 442 rules and 855 parsing +# states. +# + +import sys +sys.path.insert(0, "../..") + +import ylex +import yparse + +from ply import * + +if len(sys.argv) == 1: + print("usage : yply.py [-nocode] inputfile") + raise SystemExit + +if len(sys.argv) == 3: + if sys.argv[1] == '-nocode': + yparse.emit_code = 0 + else: + print("Unknown option '%s'" % sys.argv[1]) + raise SystemExit + filename = sys.argv[2] +else: + filename = sys.argv[1] + +yacc.parse(open(filename).read()) + +print(""" +if __name__ == '__main__': + from ply import * + yacc.yacc() +""") |