diff options
Diffstat (limited to 'third_party/python/ply/example/yply')
-rw-r--r-- | third_party/python/ply/example/yply/README | 41 | ||||
-rw-r--r-- | third_party/python/ply/example/yply/ylex.py | 119 | ||||
-rw-r--r-- | third_party/python/ply/example/yply/yparse.py | 244 | ||||
-rwxr-xr-x | third_party/python/ply/example/yply/yply.py | 51 |
4 files changed, 455 insertions, 0 deletions
diff --git a/third_party/python/ply/example/yply/README b/third_party/python/ply/example/yply/README new file mode 100644 index 0000000000..bfadf36436 --- /dev/null +++ b/third_party/python/ply/example/yply/README @@ -0,0 +1,41 @@ +yply.py + +This example implements a program yply.py that converts a UNIX-yacc +specification file into a PLY-compatible program. To use, simply +run it like this: + + % python yply.py [-nocode] inputfile.y >myparser.py + +The output of this program is Python code. In the output, +any C code in the original file is included, but is commented out. +If you use the -nocode option, then all of the C code in the +original file is just discarded. + +To use the resulting grammer with PLY, you'll need to edit the +myparser.py file. Within this file, some stub code is included that +can be used to test the construction of the parsing tables. However, +you'll need to do more editing to make a workable parser. + +Disclaimer: This just an example I threw together in an afternoon. +It might have some bugs. However, it worked when I tried it on +a yacc-specified C++ parser containing 442 rules and 855 parsing +states. + +Comments: + +1. This example does not parse specification files meant for lex/flex. + You'll need to specify the tokenizer on your own. + +2. This example shows a number of interesting PLY features including + + - Parsing of literal text delimited by nested parentheses + - Some interaction between the parser and the lexer. + - Use of literals in the grammar specification + - One pass compilation. The program just emits the result, + there is no intermediate parse tree. + +3. This program could probably be cleaned up and enhanced a lot. + It would be great if someone wanted to work on this (hint). + +-Dave + diff --git a/third_party/python/ply/example/yply/ylex.py b/third_party/python/ply/example/yply/ylex.py new file mode 100644 index 0000000000..16410e250e --- /dev/null +++ b/third_party/python/ply/example/yply/ylex.py @@ -0,0 +1,119 @@ +# lexer for yacc-grammars +# +# Author: David Beazley (dave@dabeaz.com) +# Date : October 2, 2006 + +import sys +sys.path.append("../..") + +from ply import * + +tokens = ( + 'LITERAL', 'SECTION', 'TOKEN', 'LEFT', 'RIGHT', 'PREC', 'START', 'TYPE', 'NONASSOC', 'UNION', 'CODE', + 'ID', 'QLITERAL', 'NUMBER', +) + +states = (('code', 'exclusive'),) + +literals = [';', ',', '<', '>', '|', ':'] +t_ignore = ' \t' + +t_TOKEN = r'%token' +t_LEFT = r'%left' +t_RIGHT = r'%right' +t_NONASSOC = r'%nonassoc' +t_PREC = r'%prec' +t_START = r'%start' +t_TYPE = r'%type' +t_UNION = r'%union' +t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*' +t_QLITERAL = r'''(?P<quote>['"]).*?(?P=quote)''' +t_NUMBER = r'\d+' + + +def t_SECTION(t): + r'%%' + if getattr(t.lexer, "lastsection", 0): + t.value = t.lexer.lexdata[t.lexpos + 2:] + t.lexer.lexpos = len(t.lexer.lexdata) + else: + t.lexer.lastsection = 0 + return t + +# Comments + + +def t_ccomment(t): + r'/\*(.|\n)*?\*/' + t.lexer.lineno += t.value.count('\n') + +t_ignore_cppcomment = r'//.*' + + +def t_LITERAL(t): + r'%\{(.|\n)*?%\}' + t.lexer.lineno += t.value.count("\n") + return t + + +def t_NEWLINE(t): + r'\n' + t.lexer.lineno += 1 + + +def t_code(t): + r'\{' + t.lexer.codestart = t.lexpos + t.lexer.level = 1 + t.lexer.begin('code') + + +def t_code_ignore_string(t): + r'\"([^\\\n]|(\\.))*?\"' + + +def t_code_ignore_char(t): + r'\'([^\\\n]|(\\.))*?\'' + + +def t_code_ignore_comment(t): + r'/\*(.|\n)*?\*/' + + +def t_code_ignore_cppcom(t): + r'//.*' + + +def t_code_lbrace(t): + r'\{' + t.lexer.level += 1 + + +def t_code_rbrace(t): + r'\}' + t.lexer.level -= 1 + if t.lexer.level == 0: + t.type = 'CODE' + t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos + 1] + t.lexer.begin('INITIAL') + t.lexer.lineno += t.value.count('\n') + return t + +t_code_ignore_nonspace = r'[^\s\}\'\"\{]+' +t_code_ignore_whitespace = r'\s+' +t_code_ignore = "" + + +def t_code_error(t): + raise RuntimeError + + +def t_error(t): + print("%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0])) + print(t.value) + t.lexer.skip(1) + +lex.lex() + +if __name__ == '__main__': + lex.runmain() diff --git a/third_party/python/ply/example/yply/yparse.py b/third_party/python/ply/example/yply/yparse.py new file mode 100644 index 0000000000..1f2e8d0922 --- /dev/null +++ b/third_party/python/ply/example/yply/yparse.py @@ -0,0 +1,244 @@ +# parser for Unix yacc-based grammars +# +# Author: David Beazley (dave@dabeaz.com) +# Date : October 2, 2006 + +import ylex +tokens = ylex.tokens + +from ply import * + +tokenlist = [] +preclist = [] + +emit_code = 1 + + +def p_yacc(p): + '''yacc : defsection rulesection''' + + +def p_defsection(p): + '''defsection : definitions SECTION + | SECTION''' + p.lexer.lastsection = 1 + print("tokens = ", repr(tokenlist)) + print() + print("precedence = ", repr(preclist)) + print() + print("# -------------- RULES ----------------") + print() + + +def p_rulesection(p): + '''rulesection : rules SECTION''' + + print("# -------------- RULES END ----------------") + print_code(p[2], 0) + + +def p_definitions(p): + '''definitions : definitions definition + | definition''' + + +def p_definition_literal(p): + '''definition : LITERAL''' + print_code(p[1], 0) + + +def p_definition_start(p): + '''definition : START ID''' + print("start = '%s'" % p[2]) + + +def p_definition_token(p): + '''definition : toktype opttype idlist optsemi ''' + for i in p[3]: + if i[0] not in "'\"": + tokenlist.append(i) + if p[1] == '%left': + preclist.append(('left',) + tuple(p[3])) + elif p[1] == '%right': + preclist.append(('right',) + tuple(p[3])) + elif p[1] == '%nonassoc': + preclist.append(('nonassoc',) + tuple(p[3])) + + +def p_toktype(p): + '''toktype : TOKEN + | LEFT + | RIGHT + | NONASSOC''' + p[0] = p[1] + + +def p_opttype(p): + '''opttype : '<' ID '>' + | empty''' + + +def p_idlist(p): + '''idlist : idlist optcomma tokenid + | tokenid''' + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = p[1] + p[1].append(p[3]) + + +def p_tokenid(p): + '''tokenid : ID + | ID NUMBER + | QLITERAL + | QLITERAL NUMBER''' + p[0] = p[1] + + +def p_optsemi(p): + '''optsemi : ';' + | empty''' + + +def p_optcomma(p): + '''optcomma : ',' + | empty''' + + +def p_definition_type(p): + '''definition : TYPE '<' ID '>' namelist optsemi''' + # type declarations are ignored + + +def p_namelist(p): + '''namelist : namelist optcomma ID + | ID''' + + +def p_definition_union(p): + '''definition : UNION CODE optsemi''' + # Union declarations are ignored + + +def p_rules(p): + '''rules : rules rule + | rule''' + if len(p) == 2: + rule = p[1] + else: + rule = p[2] + + # Print out a Python equivalent of this rule + + embedded = [] # Embedded actions (a mess) + embed_count = 0 + + rulename = rule[0] + rulecount = 1 + for r in rule[1]: + # r contains one of the rule possibilities + print("def p_%s_%d(p):" % (rulename, rulecount)) + prod = [] + prodcode = "" + for i in range(len(r)): + item = r[i] + if item[0] == '{': # A code block + if i == len(r) - 1: + prodcode = item + break + else: + # an embedded action + embed_name = "_embed%d_%s" % (embed_count, rulename) + prod.append(embed_name) + embedded.append((embed_name, item)) + embed_count += 1 + else: + prod.append(item) + print(" '''%s : %s'''" % (rulename, " ".join(prod))) + # Emit code + print_code(prodcode, 4) + print() + rulecount += 1 + + for e, code in embedded: + print("def p_%s(p):" % e) + print(" '''%s : '''" % e) + print_code(code, 4) + print() + + +def p_rule(p): + '''rule : ID ':' rulelist ';' ''' + p[0] = (p[1], [p[3]]) + + +def p_rule2(p): + '''rule : ID ':' rulelist morerules ';' ''' + p[4].insert(0, p[3]) + p[0] = (p[1], p[4]) + + +def p_rule_empty(p): + '''rule : ID ':' ';' ''' + p[0] = (p[1], [[]]) + + +def p_rule_empty2(p): + '''rule : ID ':' morerules ';' ''' + + p[3].insert(0, []) + p[0] = (p[1], p[3]) + + +def p_morerules(p): + '''morerules : morerules '|' rulelist + | '|' rulelist + | '|' ''' + + if len(p) == 2: + p[0] = [[]] + elif len(p) == 3: + p[0] = [p[2]] + else: + p[0] = p[1] + p[0].append(p[3]) + +# print("morerules", len(p), p[0]) + + +def p_rulelist(p): + '''rulelist : rulelist ruleitem + | ruleitem''' + + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = p[1] + p[1].append(p[2]) + + +def p_ruleitem(p): + '''ruleitem : ID + | QLITERAL + | CODE + | PREC''' + p[0] = p[1] + + +def p_empty(p): + '''empty : ''' + + +def p_error(p): + pass + +yacc.yacc(debug=0) + + +def print_code(code, indent): + if not emit_code: + return + codelines = code.splitlines() + for c in codelines: + print("%s# %s" % (" " * indent, c)) diff --git a/third_party/python/ply/example/yply/yply.py b/third_party/python/ply/example/yply/yply.py new file mode 100755 index 0000000000..e24616c831 --- /dev/null +++ b/third_party/python/ply/example/yply/yply.py @@ -0,0 +1,51 @@ +#!/usr/local/bin/python +# yply.py +# +# Author: David Beazley (dave@dabeaz.com) +# Date : October 2, 2006 +# +# Converts a UNIX-yacc specification file into a PLY-compatible +# specification. To use, simply do this: +# +# % python yply.py [-nocode] inputfile.y >myparser.py +# +# The output of this program is Python code. In the output, +# any C code in the original file is included, but is commented. +# If you use the -nocode option, then all of the C code in the +# original file is discarded. +# +# Disclaimer: This just an example I threw together in an afternoon. +# It might have some bugs. However, it worked when I tried it on +# a yacc-specified C++ parser containing 442 rules and 855 parsing +# states. +# + +import sys +sys.path.insert(0, "../..") + +import ylex +import yparse + +from ply import * + +if len(sys.argv) == 1: + print("usage : yply.py [-nocode] inputfile") + raise SystemExit + +if len(sys.argv) == 3: + if sys.argv[1] == '-nocode': + yparse.emit_code = 0 + else: + print("Unknown option '%s'" % sys.argv[1]) + raise SystemExit + filename = sys.argv[2] +else: + filename = sys.argv[1] + +yacc.parse(open(filename).read()) + +print(""" +if __name__ == '__main__': + from ply import * + yacc.yacc() +""") |