diff options
Diffstat (limited to 'tools/yacc.py')
-rw-r--r-- | tools/yacc.py | 159 |
1 files changed, 85 insertions, 74 deletions
diff --git a/tools/yacc.py b/tools/yacc.py index 1352e963..3466fbb5 100644 --- a/tools/yacc.py +++ b/tools/yacc.py @@ -1,31 +1,11 @@ # ----------------------------------------------------------------------------- # ply: yacc.py # -# Copyright (C) 2001-2015, +# Copyright (C) 2001-2018 # David M. Beazley (Dabeaz LLC) # All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause -# ----------------------------------------------------------------------------- -# -# This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside -# Python documentation strings. The inspiration for this technique was borrowed -# from John Aycock's Spark parsing system. PLY might be viewed as cross between -# Spark and the GNU bison utility. -# -# The current implementation is only somewhat object-oriented. The -# LR parser itself is defined in terms of an object (which allows multiple -# parsers to co-exist). However, most of the variables used during table -# construction are defined in terms of global variables. Users shouldn't -# notice unless they are trying to define multiple parsers at the same -# time using threads (in which case they should have their head examined). -# -# This implementation supports both SLR and LALR(1) parsing. LALR(1) -# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), -# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, -# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced -# by the more efficient DeRemer and Pennello algorithm. # # :::::::: WARNING ::::::: # @@ -41,11 +21,10 @@ import types import sys import os.path import inspect -import base64 import warnings -__version__ = '3.8' -__tabversion__ = '3.8' +__version__ = '3.11' +__tabversion__ = '3.10' #----------------------------------------------------------------------------- # === User configurable parameters === @@ -245,6 +224,9 @@ class YaccProduction: def lexpos(self, n): return getattr(self.slice[n], 'lexpos', 0) + def set_lexpos(self, n, lexpos): + self.slice[n].lexpos = lexpos + def lexspan(self, n): startpos = getattr(self.slice[n], 'lexpos', 0) endpos = getattr(self.slice[n], 'endlexpos', startpos) @@ -286,7 +268,7 @@ class LRParser: # certain kinds of advanced parsing situations where the lexer and parser interact with # each other or change states (i.e., manipulation of scope, lexer states, etc.). # - # See: https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions + # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions def set_defaulted_states(self): self.defaulted_states = {} for state, actions in self.action.items(): @@ -474,8 +456,9 @@ class LRParser: try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) + del statestack[-plen:] #--! DEBUG debug.info('Result : %s', format_result(pslice[0])) #--! DEBUG @@ -484,14 +467,16 @@ class LRParser: statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -514,6 +499,7 @@ class LRParser: try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) #--! DEBUG debug.info('Result : %s', format_result(pslice[0])) @@ -523,14 +509,15 @@ class LRParser: statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -556,7 +543,7 @@ class LRParser: # If there are any synchronization rules, they may # catch it. # - # In addition to pushing the error token, we call + # In addition to pushing the error token, we call call # the user defined p_error() function if this is the # first syntax error. This function is only called if # errorcount == 0. @@ -569,6 +556,7 @@ class LRParser: if self.errorfunc: if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer + self.state = state tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic @@ -788,21 +776,24 @@ class LRParser: try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) + del statestack[-plen:] symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -825,20 +816,22 @@ class LRParser: try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -856,7 +849,7 @@ class LRParser: # If there are any synchronization rules, they may # catch it. # - # In addition to pushing the error token, we call + # In addition to pushing the error token, we call call # the user defined p_error() function if this is the # first syntax error. This function is only called if # errorcount == 0. @@ -869,6 +862,7 @@ class LRParser: if self.errorfunc: if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer + self.state = state tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic @@ -1079,21 +1073,24 @@ class LRParser: try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) + del statestack[-plen:] symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -1111,20 +1108,22 @@ class LRParser: try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -1142,7 +1141,7 @@ class LRParser: # If there are any synchronization rules, they may # catch it. # - # In addition to pushing the error token, we call + # In addition to pushing the error token, we call call # the user defined p_error() function if this is the # first syntax error. This function is only called if # errorcount == 0. @@ -1155,6 +1154,7 @@ class LRParser: if self.errorfunc: if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer + self.state = state tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic @@ -1319,7 +1319,7 @@ class Production(object): p = LRItem(self, n) # Precompute the list of productions immediately following. try: - p.lr_after = Prodnames[p.prod[n+1]] + p.lr_after = self.Prodnames[p.prod[n+1]] except (IndexError, KeyError): p.lr_after = [] try: @@ -1459,7 +1459,7 @@ class Grammar(object): self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the # form ('right',level) or ('nonassoc', level) or ('left',level) - self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammar. # This is only used to provide error checking and to generate # a warning about unused precedence rules. @@ -2260,7 +2260,6 @@ class LRGeneratedTable(LRTable): # ----------------------------------------------------------------------------- def dr_relation(self, C, trans, nullable): - dr_set = {} state, N = trans terms = [] @@ -2544,8 +2543,13 @@ class LRGeneratedTable(LRTable): # Need to decide on shift or reduce here # By default we favor shifting. Need to add # some precedence rules here. - sprec, slevel = Productions[st_actionp[a].number].prec - rprec, rlevel = Precedence.get(a, ('right', 0)) + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): # We really need to reduce here. st_action[a] = -p.number @@ -2603,8 +2607,13 @@ class LRGeneratedTable(LRTable): # - if precedence of reduce rule is higher, we reduce. # - if precedence of reduce is same and left assoc, we reduce. # - otherwise we shift - rprec, rlevel = Productions[st_actionp[a].number].prec + + # Shift precedence comes from the token sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): # We decide to shift here... highest precedence to shift Productions[st_actionp[a].number].reduced -= 1 @@ -2684,6 +2693,7 @@ class LRGeneratedTable(LRTable): f.write(''' # %s # This file is automatically generated. Do not edit. +# pylint: disable=W,C,R _tabversion = %r _lr_method = %r @@ -2917,28 +2927,20 @@ class ParserReflect(object): # Compute a signature over the grammar def signature(self): + parts = [] try: - from hashlib import md5 - except ImportError: - from md5 import md5 - try: - sig = md5() if self.start: - sig.update(self.start.encode('latin-1')) + parts.append(self.start) if self.prec: - sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1')) + parts.append(''.join([''.join(p) for p in self.prec])) if self.tokens: - sig.update(' '.join(self.tokens).encode('latin-1')) + parts.append(' '.join(self.tokens)) for f in self.pfuncs: if f[3]: - sig.update(f[3].encode('latin-1')) + parts.append(f[3]) except (TypeError, ValueError): pass - - digest = base64.b16encode(sig.digest()) - if sys.version_info[0] >= 3: - digest = digest.decode('latin-1') - return digest + return ''.join(parts) # ----------------------------------------------------------------------------- # validate_modules() @@ -2956,7 +2958,10 @@ class ParserReflect(object): fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') for module in self.modules: - lines, linen = inspect.getsourcelines(module) + try: + lines, linen = inspect.getsourcelines(module) + except IOError: + continue counthash = {} for linen, line in enumerate(lines): @@ -3026,7 +3031,7 @@ class ParserReflect(object): self.error = True return - self.tokens = tokens + self.tokens = sorted(tokens) # Validate the tokens def validate_tokens(self): @@ -3084,7 +3089,7 @@ class ParserReflect(object): if not name.startswith('p_') or name == 'p_error': continue if isinstance(item, (types.FunctionType, types.MethodType)): - line = item.__code__.co_firstlineno + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) module = inspect.getmodule(item) p_functions.append((line, module, name, item.__doc__)) @@ -3186,9 +3191,13 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star if module: _items = [(k, getattr(module, k)) for k in dir(module)] pdict = dict(_items) - # If no __file__ attribute is available, try to obtain it from the __module__ instead + # If no __file__ or __package__ attributes are available, try to obtain them + # from the __module__ instead if '__file__' not in pdict: pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ else: pdict = get_caller_module_dict(2) @@ -3262,7 +3271,7 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star else: debuglog = NullLogger() - debuglog.info('Created by PLY version %s (https://www.dabeaz.com/ply/)', __version__) + debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) errors = False @@ -3430,6 +3439,8 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star if write_tables: try: lr.write_table(tabmodule, outputdir, signature) + if tabmodule in sys.modules: + del sys.modules[tabmodule] except IOError as e: errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) |