From e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 10 Apr 2024 22:34:10 +0200 Subject: Adding upstream version 4.2.2. Signed-off-by: Daniel Baumann --- tools/lemon/CMakeLists.txt | 46 + tools/lemon/README | 52 + tools/lemon/apply-patches.sh | 16 + tools/lemon/lemon.c | 5893 ++++++++++++++++++++++++++++++++++++++++++ tools/lemon/lempar.c | 1068 ++++++++ 5 files changed, 7075 insertions(+) create mode 100644 tools/lemon/CMakeLists.txt create mode 100644 tools/lemon/README create mode 100755 tools/lemon/apply-patches.sh create mode 100644 tools/lemon/lemon.c create mode 100644 tools/lemon/lempar.c (limited to 'tools/lemon') diff --git a/tools/lemon/CMakeLists.txt b/tools/lemon/CMakeLists.txt new file mode 100644 index 00000000..529eeae1 --- /dev/null +++ b/tools/lemon/CMakeLists.txt @@ -0,0 +1,46 @@ +# CMakeLists.txt +# +# Wireshark - Network traffic analyzer +# By Gerald Combs +# Copyright 1998 Gerald Combs +# +# SPDX-License-Identifier: GPL-2.0-or-later +# + +add_executable(lemon lemon.c) + +if(DEFINED LEMON_C_COMPILER) + set(CMAKE_C_COMPILER "${LEMON_C_COMPILER}") + set(CMAKE_C_FLAGS "") +endif() + +# To keep lemon.c as close to upstream as possible disable all warnings +if(CMAKE_C_COMPILER_ID MATCHES "MSVC") + target_compile_options(lemon PRIVATE /w) +else() + target_compile_options(lemon PRIVATE -w) +endif() +if(CMAKE_C_COMPILER_ID MATCHES "Clang") + # Disable static analysis for lemon source code. These issues don't + # affect Wireshark at runtime. + target_compile_options(lemon PRIVATE -Xclang -analyzer-disable-all-checks) +endif() +if(DEFINED NO_SANITIZE_CFLAGS) + target_compile_options(lemon PRIVATE ${NO_SANITIZE_CFLAGS}) +endif() +if(DEFINED NO_SANITIZE_LDFLAGS) + target_link_options(lemon PRIVATE ${NO_SANITIZE_LDFLAGS}) +endif() + +# +# Editor modelines - https://www.wireshark.org/tools/modelines.html +# +# Local variables: +# c-basic-offset: 8 +# tab-width: 8 +# indent-tabs-mode: t +# End: +# +# vi: set shiftwidth=8 tabstop=8 noexpandtab: +# :indentSize=8:tabSize=8:noTabs=false: +# diff --git a/tools/lemon/README b/tools/lemon/README new file mode 100644 index 00000000..59ed3431 --- /dev/null +++ b/tools/lemon/README @@ -0,0 +1,52 @@ +The Lemon Parser Generator's home page is: https://www.hwaci.com/sw/lemon/ +Lemon seems now to be maintained at: https://sqlite.org/lemon.html + +Documentation is available at: https://sqlite.org/src/doc/trunk/doc/lemon.html +Git mirror of the upstream Fossil repository: https://github.com/mackyle/sqlite + +The lempar.c and lemon.c are taken from sqlite and are modified as little as +possible to make it easier to synchronize changes. Last updated at: + + commit a913f942cf6b32b85de6428fd542b39458df2a88 + Author: D. Richard Hipp + Date: Wed Dec 28 14:03:47 2022 +0000 + + Version 3.40.1 + +To check for changes (adjust "previous commit" accordingly): + + git clone --depth=1000 https://github.com/sqlite/sqlite + cd sqlite/tools + git log -p 273ee15121.. lemon.c lempar.c + +To create a Wireshark version (steps 1-3) and validate the result (steps 4-5): +1. Copy the two files. +2. Run ./apply-patches.sh to apply local patches. +3. Update the commit in this README (to ensure the base is known). +4. Check for CSA warnings: clang-check -analyze lemon.c -- +5. Build and run lemon: ninja epan/dfilter/grammar.c + +To keep the lemon source as pristine as possible from upstream all warnings +when building lemon itself are disabled. Only patch the lemon source code as +a last resort. + +Warnings for lemon generated code are few in practice with -Wall -Wextra. These +are preferably selectively disabled in the Wireshark build. + +The patches to lemon to silence compiler warnings and static analysis reports +(for edge cases that cannot occur) are not proposed upstream because that +process is difficult. From : + + SQLite is open-source, meaning that you can make as many copies of it as you + want and do whatever you want with those copies, without limitation. But + SQLite is not open-contribution. In order to keep SQLite in the public + domain and ensure that the code does not become contaminated with + proprietary or licensed content, the project does not accept patches from + unknown persons. + +A note about the Lemon patches, we have no intention to fork Lemon and maintain +it. These patches are written to address static analyzer warnings without +actually modifying the functionality. If upstream is willing to accept patches, +then that would be great and the intention is to make it as easy as possible. +The lemon and lempar patches are dedicated to the public domain, as set forward +in Creative Commons Zero v1.0 Universal (IANAL, but I hope this is sufficient). diff --git a/tools/lemon/apply-patches.sh b/tools/lemon/apply-patches.sh new file mode 100755 index 00000000..e445c87a --- /dev/null +++ b/tools/lemon/apply-patches.sh @@ -0,0 +1,16 @@ +#!/bin/sh -e +# Patch lemon.c and lempar.c to silence static analyzer warnings. +# See also tools/lemon/README + +# Strip trailing whitespace +sed -e 's/ \+$//' -i lemon.c lempar.c + +# Other patches +if [ -d "patches" ]; then + for i in patches/*.patch; do + echo "Applying $i" + patch --silent -p1 -i "$i" + done +fi + +echo DONE diff --git a/tools/lemon/lemon.c b/tools/lemon/lemon.c new file mode 100644 index 00000000..869ac580 --- /dev/null +++ b/tools/lemon/lemon.c @@ -0,0 +1,5893 @@ +/* +** This file contains all sources (including headers) to the LEMON +** LALR(1) parser generator. The sources have been combined into a +** single file to make it easy to include LEMON in the source tree +** and Makefile of another program. +** +** The author of this program disclaims copyright. +*/ +#include +#include +#include +#include +#include +#include + +#define ISSPACE(X) isspace((unsigned char)(X)) +#define ISDIGIT(X) isdigit((unsigned char)(X)) +#define ISALNUM(X) isalnum((unsigned char)(X)) +#define ISALPHA(X) isalpha((unsigned char)(X)) +#define ISUPPER(X) isupper((unsigned char)(X)) +#define ISLOWER(X) islower((unsigned char)(X)) + + +#ifndef __WIN32__ +# if defined(_WIN32) || defined(WIN32) +# define __WIN32__ +# endif +#endif + +#ifdef __WIN32__ +#ifdef __cplusplus +extern "C" { +#endif +extern int access(const char *path, int mode); +#ifdef __cplusplus +} +#endif +#else +#include +#endif + +/* #define PRIVATE static */ +#define PRIVATE + +#ifdef TEST +#define MAXRHS 5 /* Set low to exercise exception code */ +#else +#define MAXRHS 1000 +#endif + +extern void memory_error(); +static int showPrecedenceConflict = 0; +static char *msort(char*,char**,int(*)(const char*,const char*)); + +/* +** Compilers are getting increasingly pedantic about type conversions +** as C evolves ever closer to Ada.... To work around the latest problems +** we have to define the following variant of strlen(). +*/ +#define lemonStrlen(X) ((int)strlen(X)) + +/* +** Compilers are starting to complain about the use of sprintf() and strcpy(), +** saying they are unsafe. So we define our own versions of those routines too. +** +** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and +** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). +** The third is a helper routine for vsnprintf() that adds texts to the end of a +** buffer, making sure the buffer is always zero-terminated. +** +** The string formatter is a minimal subset of stdlib sprintf() supporting only +** a few simply conversions: +** +** %d +** %s +** %.*s +** +*/ +static void lemon_addtext( + char *zBuf, /* The buffer to which text is added */ + int *pnUsed, /* Slots of the buffer used so far */ + const char *zIn, /* Text to add */ + int nIn, /* Bytes of text to add. -1 to use strlen() */ + int iWidth /* Field width. Negative to left justify */ +){ + if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){} + while( iWidth>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth--; } + if( nIn==0 ) return; + memcpy(&zBuf[*pnUsed], zIn, nIn); + *pnUsed += nIn; + while( (-iWidth)>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth++; } + zBuf[*pnUsed] = 0; +} +static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){ + int i, j, k, c; + int nUsed = 0; + const char *z; + char zTemp[50]; + str[0] = 0; + for(i=j=0; (c = zFormat[i])!=0; i++){ + if( c=='%' ){ + int iWidth = 0; + lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); + c = zFormat[++i]; + if( ISDIGIT(c) || (c=='-' && ISDIGIT(zFormat[i+1])) ){ + if( c=='-' ) i++; + while( ISDIGIT(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0'; + if( c=='-' ) iWidth = -iWidth; + c = zFormat[i]; + } + if( c=='d' ){ + int v = va_arg(ap, int); + if( v<0 ){ + lemon_addtext(str, &nUsed, "-", 1, iWidth); + v = -v; + }else if( v==0 ){ + lemon_addtext(str, &nUsed, "0", 1, iWidth); + } + k = 0; + while( v>0 ){ + k++; + zTemp[sizeof(zTemp)-k] = (v%10) + '0'; + v /= 10; + } + lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth); + }else if( c=='s' ){ + z = va_arg(ap, const char*); + lemon_addtext(str, &nUsed, z, -1, iWidth); + }else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){ + i += 2; + k = va_arg(ap, int); + z = va_arg(ap, const char*); + lemon_addtext(str, &nUsed, z, k, iWidth); + }else if( c=='%' ){ + lemon_addtext(str, &nUsed, "%", 1, 0); + }else{ + fprintf(stderr, "illegal format\n"); + exit(1); + } + j = i+1; + } + } + lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); + return nUsed; +} +static int lemon_sprintf(char *str, const char *format, ...){ + va_list ap; + int rc; + va_start(ap, format); + rc = lemon_vsprintf(str, format, ap); + va_end(ap); + return rc; +} +static void lemon_strcpy(char *dest, const char *src){ + while( (*(dest++) = *(src++))!=0 ){} +} +static void lemon_strcat(char *dest, const char *src){ + while( *dest ) dest++; + lemon_strcpy(dest, src); +} + + +/* a few forward declarations... */ +struct rule; +struct lemon; +struct action; + +static struct action *Action_new(void); +static struct action *Action_sort(struct action *); + +/********** From the file "build.h" ************************************/ +void FindRulePrecedences(struct lemon*); +void FindFirstSets(struct lemon*); +void FindStates(struct lemon*); +void FindLinks(struct lemon*); +void FindFollowSets(struct lemon*); +void FindActions(struct lemon*); + +/********* From the file "configlist.h" *********************************/ +void Configlist_init(void); +struct config *Configlist_add(struct rule *, int); +struct config *Configlist_addbasis(struct rule *, int); +void Configlist_closure(struct lemon *); +void Configlist_sort(void); +void Configlist_sortbasis(void); +struct config *Configlist_return(void); +struct config *Configlist_basis(void); +void Configlist_eat(struct config *); +void Configlist_reset(void); + +/********* From the file "error.h" ***************************************/ +void ErrorMsg(const char *, int,const char *, ...); + +/****** From the file "option.h" ******************************************/ +enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, + OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; +struct s_options { + enum option_type type; + const char *label; + char *arg; + const char *message; +}; +int OptInit(char**,struct s_options*,FILE*); +int OptNArgs(void); +char *OptArg(int); +void OptErr(int); +void OptPrint(void); + +/******** From the file "parse.h" *****************************************/ +void Parse(struct lemon *lemp); + +/********* From the file "plink.h" ***************************************/ +struct plink *Plink_new(void); +void Plink_add(struct plink **, struct config *); +void Plink_copy(struct plink **, struct plink *); +void Plink_delete(struct plink *); + +/********** From the file "report.h" *************************************/ +void Reprint(struct lemon *); +void ReportOutput(struct lemon *); +void ReportTable(struct lemon *, int, int); +void ReportHeader(struct lemon *); +void CompressTables(struct lemon *); +void ResortStates(struct lemon *); + +/********** From the file "set.h" ****************************************/ +void SetSize(int); /* All sets will be of size N */ +char *SetNew(void); /* A new set for element 0..N */ +void SetFree(char*); /* Deallocate a set */ +int SetAdd(char*,int); /* Add element to a set */ +int SetUnion(char *,char *); /* A <- A U B, thru element N */ +#define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ + +/********** From the file "struct.h" *************************************/ +/* +** Principal data structures for the LEMON parser generator. +*/ + +typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; + +/* Symbols (terminals and nonterminals) of the grammar are stored +** in the following: */ +enum symbol_type { + TERMINAL, + NONTERMINAL, + MULTITERMINAL +}; +enum e_assoc { + LEFT, + RIGHT, + NONE, + UNK +}; +struct symbol { + const char *name; /* Name of the symbol */ + int index; /* Index number for this symbol */ + enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ + struct rule *rule; /* Linked list of rules of this (if an NT) */ + struct symbol *fallback; /* fallback token in case this token doesn't parse */ + int prec; /* Precedence if defined (-1 otherwise) */ + enum e_assoc assoc; /* Associativity if precedence is defined */ + char *firstset; /* First-set for all rules of this symbol */ + Boolean lambda; /* True if NT and can generate an empty string */ + int useCnt; /* Number of times used */ + char *destructor; /* Code which executes whenever this symbol is + ** popped from the stack during error processing */ + int destLineno; /* Line number for start of destructor. Set to + ** -1 for duplicate destructors. */ + char *datatype; /* The data type of information held by this + ** object. Only used if type==NONTERMINAL */ + int dtnum; /* The data type number. In the parser, the value + ** stack is a union. The .yy%d element of this + ** union is the correct data type for this object */ + int bContent; /* True if this symbol ever carries content - if + ** it is ever more than just syntax */ + /* The following fields are used by MULTITERMINALs only */ + int nsubsym; /* Number of constituent symbols in the MULTI */ + struct symbol **subsym; /* Array of constituent symbols */ +}; + +/* Each production rule in the grammar is stored in the following +** structure. */ +struct rule { + struct symbol *lhs; /* Left-hand side of the rule */ + const char *lhsalias; /* Alias for the LHS (NULL if none) */ + int lhsStart; /* True if left-hand side is the start symbol */ + int ruleline; /* Line number for the rule */ + int nrhs; /* Number of RHS symbols */ + struct symbol **rhs; /* The RHS symbols */ + const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ + int line; /* Line number at which code begins */ + const char *code; /* The code executed when this rule is reduced */ + const char *codePrefix; /* Setup code before code[] above */ + const char *codeSuffix; /* Breakdown code after code[] above */ + struct symbol *precsym; /* Precedence symbol for this rule */ + int index; /* An index number for this rule */ + int iRule; /* Rule number as used in the generated tables */ + Boolean noCode; /* True if this rule has no associated C code */ + Boolean codeEmitted; /* True if the code has been emitted already */ + Boolean canReduce; /* True if this rule is ever reduced */ + Boolean doesReduce; /* Reduce actions occur after optimization */ + Boolean neverReduce; /* Reduce is theoretically possible, but prevented + ** by actions or other outside implementation */ + struct rule *nextlhs; /* Next rule with the same LHS */ + struct rule *next; /* Next rule in the global list */ +}; + +/* A configuration is a production rule of the grammar together with +** a mark (dot) showing how much of that rule has been processed so far. +** Configurations also contain a follow-set which is a list of terminal +** symbols which are allowed to immediately follow the end of the rule. +** Every configuration is recorded as an instance of the following: */ +enum cfgstatus { + COMPLETE, + INCOMPLETE +}; +struct config { + struct rule *rp; /* The rule upon which the configuration is based */ + int dot; /* The parse point */ + char *fws; /* Follow-set for this configuration only */ + struct plink *fplp; /* Follow-set forward propagation links */ + struct plink *bplp; /* Follow-set backwards propagation links */ + struct state *stp; /* Pointer to state which contains this */ + enum cfgstatus status; /* used during followset and shift computations */ + struct config *next; /* Next configuration in the state */ + struct config *bp; /* The next basis configuration */ +}; + +enum e_action { + SHIFT, + ACCEPT, + REDUCE, + ERROR, + SSCONFLICT, /* A shift/shift conflict */ + SRCONFLICT, /* Was a reduce, but part of a conflict */ + RRCONFLICT, /* Was a reduce, but part of a conflict */ + SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ + RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ + NOT_USED, /* Deleted by compression */ + SHIFTREDUCE /* Shift first, then reduce */ +}; + +/* Every shift or reduce operation is stored as one of the following */ +struct action { + struct symbol *sp; /* The look-ahead symbol */ + enum e_action type; + union { + struct state *stp; /* The new state, if a shift */ + struct rule *rp; /* The rule, if a reduce */ + } x; + struct symbol *spOpt; /* SHIFTREDUCE optimization to this symbol */ + struct action *next; /* Next action for this state */ + struct action *collide; /* Next action with the same hash */ +}; + +/* Each state of the generated parser's finite state machine +** is encoded as an instance of the following structure. */ +struct state { + struct config *bp; /* The basis configurations for this state */ + struct config *cfp; /* All configurations in this set */ + int statenum; /* Sequential number for this state */ + struct action *ap; /* List of actions for this state */ + int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ + int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ + int iDfltReduce; /* Default action is to REDUCE by this rule */ + struct rule *pDfltReduce;/* The default REDUCE rule. */ + int autoReduce; /* True if this is an auto-reduce state */ +}; +#define NO_OFFSET (-2147483647) + +/* A followset propagation link indicates that the contents of one +** configuration followset should be propagated to another whenever +** the first changes. */ +struct plink { + struct config *cfp; /* The configuration to which linked */ + struct plink *next; /* The next propagate link */ +}; + +/* The state vector for the entire parser generator is recorded as +** follows. (LEMON uses no global variables and makes little use of +** static variables. Fields in the following structure can be thought +** of as begin global variables in the program.) */ +struct lemon { + struct state **sorted; /* Table of states sorted by state number */ + struct rule *rule; /* List of all rules */ + struct rule *startRule; /* First rule */ + int nstate; /* Number of states */ + int nxstate; /* nstate with tail degenerate states removed */ + int nrule; /* Number of rules */ + int nruleWithAction; /* Number of rules with actions */ + int nsymbol; /* Number of terminal and nonterminal symbols */ + int nterminal; /* Number of terminal symbols */ + int minShiftReduce; /* Minimum shift-reduce action value */ + int errAction; /* Error action value */ + int accAction; /* Accept action value */ + int noAction; /* No-op action value */ + int minReduce; /* Minimum reduce action */ + int maxAction; /* Maximum action value of any kind */ + struct symbol **symbols; /* Sorted array of pointers to symbols */ + int errorcnt; /* Number of errors */ + struct symbol *errsym; /* The error symbol */ + struct symbol *wildcard; /* Token that matches anything */ + char *name; /* Name of the generated parser */ + char *arg; /* Declaration of the 3rd argument to parser */ + char *ctx; /* Declaration of 2nd argument to constructor */ + char *tokentype; /* Type of terminal symbols in the parser stack */ + char *vartype; /* The default type of non-terminal symbols */ + char *start; /* Name of the start symbol for the grammar */ + char *stacksize; /* Size of the parser stack */ + char *include; /* Code to put at the start of the C file */ + char *error; /* Code to execute when an error is seen */ + char *overflow; /* Code to execute on a stack overflow */ + char *failure; /* Code to execute on parser failure */ + char *accept; /* Code to execute when the parser excepts */ + char *extracode; /* Code appended to the generated file */ + char *tokendest; /* Code to execute to destroy token data */ + char *vardest; /* Code for the default non-terminal destructor */ + char *filename; /* Name of the input file */ + char *outname; /* Name of the current output file */ + char *tokenprefix; /* A prefix added to token names in the .h file */ + int nconflict; /* Number of parsing conflicts */ + int nactiontab; /* Number of entries in the yy_action[] table */ + int nlookaheadtab; /* Number of entries in yy_lookahead[] */ + int tablesize; /* Total table size of all tables in bytes */ + int basisflag; /* Print only basis configurations */ + int printPreprocessed; /* Show preprocessor output on stdout */ + int has_fallback; /* True if any %fallback is seen in the grammar */ + int nolinenosflag; /* True if #line statements should not be printed */ + char *argv0; /* Name of the program */ +}; + +#define MemoryCheck(X) if((X)==0){ \ + extern void memory_error(); \ + memory_error(); \ +} + +/**************** From the file "table.h" *********************************/ +/* +** All code in this file has been automatically generated +** from a specification in the file +** "table.q" +** by the associative array code building program "aagen". +** Do not edit this file! Instead, edit the specification +** file, then rerun aagen. +*/ +/* +** Code for processing tables in the LEMON parser generator. +*/ +/* Routines for handling a strings */ + +const char *Strsafe(const char *); + +void Strsafe_init(void); +int Strsafe_insert(const char *); +const char *Strsafe_find(const char *); + +/* Routines for handling symbols of the grammar */ + +struct symbol *Symbol_new(const char *); +int Symbolcmpp(const void *, const void *); +void Symbol_init(void); +int Symbol_insert(struct symbol *, const char *); +struct symbol *Symbol_find(const char *); +struct symbol *Symbol_Nth(int); +int Symbol_count(void); +struct symbol **Symbol_arrayof(void); + +/* Routines to manage the state table */ + +int Configcmp(const char *, const char *); +struct state *State_new(void); +void State_init(void); +int State_insert(struct state *, struct config *); +struct state *State_find(struct config *); +struct state **State_arrayof(void); + +/* Routines used for efficiency in Configlist_add */ + +void Configtable_init(void); +int Configtable_insert(struct config *); +struct config *Configtable_find(struct config *); +void Configtable_clear(int(*)(struct config *)); + +/****************** From the file "action.c" *******************************/ +/* +** Routines processing parser actions in the LEMON parser generator. +*/ + +/* Allocate a new parser action */ +static struct action *Action_new(void){ + static struct action *actionfreelist = 0; + struct action *newaction; + + if( actionfreelist==0 ){ + int i; + int amt = 100; + actionfreelist = (struct action *)calloc(amt, sizeof(struct action)); + if( actionfreelist==0 ){ + fprintf(stderr,"Unable to allocate memory for a new parser action."); + exit(1); + } + for(i=0; inext; + return newaction; +} + +/* Compare two actions for sorting purposes. Return negative, zero, or +** positive if the first action is less than, equal to, or greater than +** the first +*/ +static int actioncmp( + struct action *ap1, + struct action *ap2 +){ + int rc; + rc = ap1->sp->index - ap2->sp->index; + if( rc==0 ){ + rc = (int)ap1->type - (int)ap2->type; + } + if( rc==0 && (ap1->type==REDUCE || ap1->type==SHIFTREDUCE) ){ + rc = ap1->x.rp->index - ap2->x.rp->index; + } + if( rc==0 ){ + rc = (int) (ap2 - ap1); + } + return rc; +} + +/* Sort parser actions */ +static struct action *Action_sort( + struct action *ap +){ + ap = (struct action *)msort((char *)ap,(char **)&ap->next, + (int(*)(const char*,const char*))actioncmp); + return ap; +} + +void Action_add( + struct action **app, + enum e_action type, + struct symbol *sp, + char *arg +){ + struct action *newaction; + newaction = Action_new(); + newaction->next = *app; + *app = newaction; + newaction->type = type; + newaction->sp = sp; + newaction->spOpt = 0; + if( type==SHIFT ){ + newaction->x.stp = (struct state *)arg; + }else{ + newaction->x.rp = (struct rule *)arg; + } +} +/********************** New code to implement the "acttab" module ***********/ +/* +** This module implements routines use to construct the yy_action[] table. +*/ + +/* +** The state of the yy_action table under construction is an instance of +** the following structure. +** +** The yy_action table maps the pair (state_number, lookahead) into an +** action_number. The table is an array of integers pairs. The state_number +** determines an initial offset into the yy_action array. The lookahead +** value is then added to this initial offset to get an index X into the +** yy_action array. If the aAction[X].lookahead equals the value of the +** of the lookahead input, then the value of the action_number output is +** aAction[X].action. If the lookaheads do not match then the +** default action for the state_number is returned. +** +** All actions associated with a single state_number are first entered +** into aLookahead[] using multiple calls to acttab_action(). Then the +** actions for that single state_number are placed into the aAction[] +** array with a single call to acttab_insert(). The acttab_insert() call +** also resets the aLookahead[] array in preparation for the next +** state number. +*/ +struct lookahead_action { + int lookahead; /* Value of the lookahead token */ + int action; /* Action to take on the given lookahead */ +}; +typedef struct acttab acttab; +struct acttab { + int nAction; /* Number of used slots in aAction[] */ + int nActionAlloc; /* Slots allocated for aAction[] */ + struct lookahead_action + *aAction, /* The yy_action[] table under construction */ + *aLookahead; /* A single new transaction set */ + int mnLookahead; /* Minimum aLookahead[].lookahead */ + int mnAction; /* Action associated with mnLookahead */ + int mxLookahead; /* Maximum aLookahead[].lookahead */ + int nLookahead; /* Used slots in aLookahead[] */ + int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ + int nterminal; /* Number of terminal symbols */ + int nsymbol; /* total number of symbols */ +}; + +/* Return the number of entries in the yy_action table */ +#define acttab_lookahead_size(X) ((X)->nAction) + +/* The value for the N-th entry in yy_action */ +#define acttab_yyaction(X,N) ((X)->aAction[N].action) + +/* The value for the N-th entry in yy_lookahead */ +#define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) + +/* Free all memory associated with the given acttab */ +void acttab_free(acttab *p){ + free( p->aAction ); + free( p->aLookahead ); + free( p ); +} + +/* Allocate a new acttab structure */ +acttab *acttab_alloc(int nsymbol, int nterminal){ + acttab *p = (acttab *) calloc( 1, sizeof(*p) ); + if( p==0 ){ + fprintf(stderr,"Unable to allocate memory for a new acttab."); + exit(1); + } + memset(p, 0, sizeof(*p)); + p->nsymbol = nsymbol; + p->nterminal = nterminal; + return p; +} + +/* Add a new action to the current transaction set. +** +** This routine is called once for each lookahead for a particular +** state. +*/ +void acttab_action(acttab *p, int lookahead, int action){ + if( p->nLookahead>=p->nLookaheadAlloc ){ + p->nLookaheadAlloc += 25; + p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, + sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); + if( p->aLookahead==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + } + if( p->nLookahead==0 ){ + p->mxLookahead = lookahead; + p->mnLookahead = lookahead; + p->mnAction = action; + }else{ + if( p->mxLookaheadmxLookahead = lookahead; + if( p->mnLookahead>lookahead ){ + p->mnLookahead = lookahead; + p->mnAction = action; + } + } + p->aLookahead[p->nLookahead].lookahead = lookahead; + p->aLookahead[p->nLookahead].action = action; + p->nLookahead++; +} + +/* +** Add the transaction set built up with prior calls to acttab_action() +** into the current action table. Then reset the transaction set back +** to an empty set in preparation for a new round of acttab_action() calls. +** +** Return the offset into the action table of the new transaction. +** +** If the makeItSafe parameter is true, then the offset is chosen so that +** it is impossible to overread the yy_lookaside[] table regardless of +** the lookaside token. This is done for the terminal symbols, as they +** come from external inputs and can contain syntax errors. When makeItSafe +** is false, there is more flexibility in selecting offsets, resulting in +** a smaller table. For non-terminal symbols, which are never syntax errors, +** makeItSafe can be false. +*/ +int acttab_insert(acttab *p, int makeItSafe){ + int i, j, k, n, end; + assert( p->nLookahead>0 ); + + /* Make sure we have enough space to hold the expanded action table + ** in the worst case. The worst case occurs if the transaction set + ** must be appended to the current action table + */ + n = p->nsymbol + 1; + if( p->nAction + n >= p->nActionAlloc ){ + int oldAlloc = p->nActionAlloc; + p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; + p->aAction = (struct lookahead_action *) realloc( p->aAction, + sizeof(p->aAction[0])*p->nActionAlloc); + if( p->aAction==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + for(i=oldAlloc; inActionAlloc; i++){ + p->aAction[i].lookahead = -1; + p->aAction[i].action = -1; + } + } + + /* Scan the existing action table looking for an offset that is a + ** duplicate of the current transaction set. Fall out of the loop + ** if and when the duplicate is found. + ** + ** i is the index in p->aAction[] where p->mnLookahead is inserted. + */ + end = makeItSafe ? p->mnLookahead : 0; + for(i=p->nAction-1; i>=end; i--){ + if( p->aAction[i].lookahead==p->mnLookahead ){ + /* All lookaheads and actions in the aLookahead[] transaction + ** must match against the candidate aAction[i] entry. */ + if( p->aAction[i].action!=p->mnAction ) continue; + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + if( k<0 || k>=p->nAction ) break; + if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; + if( p->aLookahead[j].action!=p->aAction[k].action ) break; + } + if( jnLookahead ) continue; + + /* No possible lookahead value that is not in the aLookahead[] + ** transaction is allowed to match aAction[i] */ + n = 0; + for(j=0; jnAction; j++){ + if( p->aAction[j].lookahead<0 ) continue; + if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; + } + if( n==p->nLookahead ){ + break; /* An exact match is found at offset i */ + } + } + } + + /* If no existing offsets exactly match the current transaction, find an + ** an empty offset in the aAction[] table in which we can add the + ** aLookahead[] transaction. + */ + if( inAction, which means the + ** transaction will be appended. */ + i = makeItSafe ? p->mnLookahead : 0; + for(; inActionAlloc - p->mxLookahead; i++){ + if( p->aAction[i].lookahead<0 ){ + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + if( k<0 ) break; + if( p->aAction[k].lookahead>=0 ) break; + } + if( jnLookahead ) continue; + for(j=0; jnAction; j++){ + if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; + } + if( j==p->nAction ){ + break; /* Fits in empty slots */ + } + } + } + } + /* Insert transaction set at index i. */ +#if 0 + printf("Acttab:"); + for(j=0; jnLookahead; j++){ + printf(" %d", p->aLookahead[j].lookahead); + } + printf(" inserted at %d\n", i); +#endif + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + p->aAction[k] = p->aLookahead[j]; + if( k>=p->nAction ) p->nAction = k+1; + } + if( makeItSafe && i+p->nterminal>=p->nAction ) p->nAction = i+p->nterminal+1; + p->nLookahead = 0; + + /* Return the offset that is added to the lookahead in order to get the + ** index into yy_action of the action */ + return i - p->mnLookahead; +} + +/* +** Return the size of the action table without the trailing syntax error +** entries. +*/ +int acttab_action_size(acttab *p){ + int n = p->nAction; + while( n>0 && p->aAction[n-1].lookahead<0 ){ n--; } + return n; +} + +/********************** From the file "build.c" *****************************/ +/* +** Routines to construction the finite state machine for the LEMON +** parser generator. +*/ + +/* Find a precedence symbol of every rule in the grammar. +** +** Those rules which have a precedence symbol coded in the input +** grammar using the "[symbol]" construct will already have the +** rp->precsym field filled. Other rules take as their precedence +** symbol the first RHS symbol with a defined precedence. If there +** are not RHS symbols with a defined precedence, the precedence +** symbol field is left blank. +*/ +void FindRulePrecedences(struct lemon *xp) +{ + struct rule *rp; + for(rp=xp->rule; rp; rp=rp->next){ + if( rp->precsym==0 ){ + int i, j; + for(i=0; inrhs && rp->precsym==0; i++){ + struct symbol *sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + for(j=0; jnsubsym; j++){ + if( sp->subsym[j]->prec>=0 ){ + rp->precsym = sp->subsym[j]; + break; + } + } + }else if( sp->prec>=0 ){ + rp->precsym = rp->rhs[i]; + } + } + } + } + return; +} + +/* Find all nonterminals which will generate the empty string. +** Then go back and compute the first sets of every nonterminal. +** The first set is the set of all terminal symbols which can begin +** a string generated by that nonterminal. +*/ +void FindFirstSets(struct lemon *lemp) +{ + int i, j; + struct rule *rp; + int progress; + + for(i=0; insymbol; i++){ + lemp->symbols[i]->lambda = LEMON_FALSE; + } + for(i=lemp->nterminal; insymbol; i++){ + lemp->symbols[i]->firstset = SetNew(); + } + + /* First compute all lambdas */ + do{ + progress = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->lhs->lambda ) continue; + for(i=0; inrhs; i++){ + struct symbol *sp = rp->rhs[i]; + assert( sp->type==NONTERMINAL || sp->lambda==LEMON_FALSE ); + if( sp->lambda==LEMON_FALSE ) break; + } + if( i==rp->nrhs ){ + rp->lhs->lambda = LEMON_TRUE; + progress = 1; + } + } + }while( progress ); + + /* Now compute all first sets */ + do{ + struct symbol *s1, *s2; + progress = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + s1 = rp->lhs; + for(i=0; inrhs; i++){ + s2 = rp->rhs[i]; + if( s2->type==TERMINAL ){ + progress += SetAdd(s1->firstset,s2->index); + break; + }else if( s2->type==MULTITERMINAL ){ + for(j=0; jnsubsym; j++){ + progress += SetAdd(s1->firstset,s2->subsym[j]->index); + } + break; + }else if( s1==s2 ){ + if( s1->lambda==LEMON_FALSE ) break; + }else{ + progress += SetUnion(s1->firstset,s2->firstset); + if( s2->lambda==LEMON_FALSE ) break; + } + } + } + }while( progress ); + return; +} + +/* Compute all LR(0) states for the grammar. Links +** are added to between some states so that the LR(1) follow sets +** can be computed later. +*/ +PRIVATE struct state *getstate(struct lemon *); /* forward reference */ +void FindStates(struct lemon *lemp) +{ + struct symbol *sp; + struct rule *rp; + + Configlist_init(); + + /* Find the start symbol */ + if( lemp->start ){ + sp = Symbol_find(lemp->start); + if( sp==0 ){ + ErrorMsg(lemp->filename,0, + "The specified start symbol \"%s\" is not " + "in a nonterminal of the grammar. \"%s\" will be used as the start " + "symbol instead.",lemp->start,lemp->startRule->lhs->name); + lemp->errorcnt++; + sp = lemp->startRule->lhs; + } + }else if( lemp->startRule ){ + sp = lemp->startRule->lhs; + }else{ + ErrorMsg(lemp->filename,0,"Internal error - no start rule\n"); + exit(1); + } + + /* Make sure the start symbol doesn't occur on the right-hand side of + ** any rule. Report an error if it does. (YACC would generate a new + ** start symbol in this case.) */ + for(rp=lemp->rule; rp; rp=rp->next){ + int i; + for(i=0; inrhs; i++){ + if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ + ErrorMsg(lemp->filename,0, + "The start symbol \"%s\" occurs on the " + "right-hand side of a rule. This will result in a parser which " + "does not work properly.",sp->name); + lemp->errorcnt++; + } + } + } + + /* The basis configuration set for the first state + ** is all rules which have the start symbol as their + ** left-hand side */ + for(rp=sp->rule; rp; rp=rp->nextlhs){ + struct config *newcfp; + rp->lhsStart = 1; + newcfp = Configlist_addbasis(rp,0); + SetAdd(newcfp->fws,0); + } + + /* Compute the first state. All other states will be + ** computed automatically during the computation of the first one. + ** The returned pointer to the first state is not used. */ + (void)getstate(lemp); + return; +} + +/* Return a pointer to a state which is described by the configuration +** list which has been built from calls to Configlist_add. +*/ +PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ +PRIVATE struct state *getstate(struct lemon *lemp) +{ + struct config *cfp, *bp; + struct state *stp; + + /* Extract the sorted basis of the new state. The basis was constructed + ** by prior calls to "Configlist_addbasis()". */ + Configlist_sortbasis(); + bp = Configlist_basis(); + + /* Get a state with the same basis */ + stp = State_find(bp); + if( stp ){ + /* A state with the same basis already exists! Copy all the follow-set + ** propagation links from the state under construction into the + ** preexisting state, then return a pointer to the preexisting state */ + struct config *x, *y; + for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ + Plink_copy(&y->bplp,x->bplp); + Plink_delete(x->fplp); + x->fplp = x->bplp = 0; + } + cfp = Configlist_return(); + Configlist_eat(cfp); + }else{ + /* This really is a new state. Construct all the details */ + Configlist_closure(lemp); /* Compute the configuration closure */ + Configlist_sort(); /* Sort the configuration closure */ + cfp = Configlist_return(); /* Get a pointer to the config list */ + stp = State_new(); /* A new state structure */ + MemoryCheck(stp); + stp->bp = bp; /* Remember the configuration basis */ + stp->cfp = cfp; /* Remember the configuration closure */ + stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ + stp->ap = 0; /* No actions, yet. */ + State_insert(stp,stp->bp); /* Add to the state table */ + buildshifts(lemp,stp); /* Recursively compute successor states */ + } + return stp; +} + +/* +** Return true if two symbols are the same. +*/ +int same_symbol(struct symbol *a, struct symbol *b) +{ + int i; + if( a==b ) return 1; + if( a->type!=MULTITERMINAL ) return 0; + if( b->type!=MULTITERMINAL ) return 0; + if( a->nsubsym!=b->nsubsym ) return 0; + for(i=0; insubsym; i++){ + if( a->subsym[i]!=b->subsym[i] ) return 0; + } + return 1; +} + +/* Construct all successor states to the given state. A "successor" +** state is any state which can be reached by a shift action. +*/ +PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) +{ + struct config *cfp; /* For looping thru the config closure of "stp" */ + struct config *bcfp; /* For the inner loop on config closure of "stp" */ + struct config *newcfg; /* */ + struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ + struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ + struct state *newstp; /* A pointer to a successor state */ + + /* Each configuration becomes complete after it contributes to a successor + ** state. Initially, all configurations are incomplete */ + for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; + + /* Loop through all configurations of the state "stp" */ + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ + if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ + Configlist_reset(); /* Reset the new config set */ + sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ + + /* For every configuration in the state "stp" which has the symbol "sp" + ** following its dot, add the same configuration to the basis set under + ** construction but with the dot shifted one symbol to the right. */ + for(bcfp=cfp; bcfp; bcfp=bcfp->next){ + if( bcfp->status==COMPLETE ) continue; /* Already used */ + if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ + bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ + if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ + bcfp->status = COMPLETE; /* Mark this config as used */ + newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); + Plink_add(&newcfg->bplp,bcfp); + } + + /* Get a pointer to the state described by the basis configuration set + ** constructed in the preceding loop */ + newstp = getstate(lemp); + + /* The state "newstp" is reached from the state "stp" by a shift action + ** on the symbol "sp" */ + if( sp->type==MULTITERMINAL ){ + int i; + for(i=0; insubsym; i++){ + Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); + } + }else{ + Action_add(&stp->ap,SHIFT,sp,(char *)newstp); + } + } +} + +/* +** Construct the propagation links +*/ +void FindLinks(struct lemon *lemp) +{ + int i; + struct config *cfp, *other; + struct state *stp; + struct plink *plp; + + /* Housekeeping detail: + ** Add to every propagate link a pointer back to the state to + ** which the link is attached. */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(cfp=stp?stp->cfp:0; cfp; cfp=cfp->next){ + cfp->stp = stp; + } + } + + /* Convert all backlinks into forward links. Only the forward + ** links are used in the follow-set computation. */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(cfp=stp?stp->cfp:0; cfp; cfp=cfp->next){ + for(plp=cfp->bplp; plp; plp=plp->next){ + other = plp->cfp; + Plink_add(&other->fplp,cfp); + } + } + } +} + +/* Compute all followsets. +** +** A followset is the set of all symbols which can come immediately +** after a configuration. +*/ +void FindFollowSets(struct lemon *lemp) +{ + int i; + struct config *cfp; + struct plink *plp; + int progress; + int change; + + for(i=0; instate; i++){ + assert( lemp->sorted[i]!=0 ); + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ + cfp->status = INCOMPLETE; + } + } + + do{ + progress = 0; + for(i=0; instate; i++){ + assert( lemp->sorted[i]!=0 ); + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ + if( cfp->status==COMPLETE ) continue; + for(plp=cfp->fplp; plp; plp=plp->next){ + change = SetUnion(plp->cfp->fws,cfp->fws); + if( change ){ + plp->cfp->status = INCOMPLETE; + progress = 1; + } + } + cfp->status = COMPLETE; + } + } + }while( progress ); +} + +static int resolve_conflict(struct action *,struct action *); + +/* Compute the reduce actions, and resolve conflicts. +*/ +void FindActions(struct lemon *lemp) +{ + int i,j; + struct config *cfp; + struct state *stp; + struct symbol *sp; + struct rule *rp; + + /* Add all of the reduce actions + ** A reduce action is added for each element of the followset of + ** a configuration which has its dot at the extreme right. + */ + for(i=0; instate; i++){ /* Loop over all states */ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ + if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ + for(j=0; jnterminal; j++){ + if( SetFind(cfp->fws,j) ){ + /* Add a reduce action to the state "stp" which will reduce by the + ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ + Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); + } + } + } + } + } + + /* Add the accepting token */ + if( lemp->start ){ + sp = Symbol_find(lemp->start); + if( sp==0 ){ + if( lemp->startRule==0 ){ + fprintf(stderr, "internal error on source line %d: no start rule\n", + __LINE__); + exit(1); + } + sp = lemp->startRule->lhs; + } + }else{ + sp = lemp->startRule->lhs; + } + /* Add to the first state (which is always the starting state of the + ** finite state machine) an action to ACCEPT if the lookahead is the + ** start nonterminal. */ + Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); + + /* Resolve conflicts */ + for(i=0; instate; i++){ + struct action *ap, *nap; + stp = lemp->sorted[i]; + /* assert( stp->ap ); */ + stp->ap = Action_sort(stp->ap); + for(ap=stp->ap; ap && ap->next; ap=ap->next){ + for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ + /* The two actions "ap" and "nap" have the same lookahead. + ** Figure out which one should be used */ + lemp->nconflict += resolve_conflict(ap,nap); + } + } + } + + /* Report an error for each rule that can never be reduced. */ + for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; + for(i=0; instate; i++){ + struct action *ap; + for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ + if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; + } + } + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->canReduce ) continue; + ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); + lemp->errorcnt++; + } +} + +/* Resolve a conflict between the two given actions. If the +** conflict can't be resolved, return non-zero. +** +** NO LONGER TRUE: +** To resolve a conflict, first look to see if either action +** is on an error rule. In that case, take the action which +** is not associated with the error rule. If neither or both +** actions are associated with an error rule, then try to +** use precedence to resolve the conflict. +** +** If either action is a SHIFT, then it must be apx. This +** function won't work if apx->type==REDUCE and apy->type==SHIFT. +*/ +static int resolve_conflict( + struct action *apx, + struct action *apy +){ + struct symbol *spx, *spy; + int errcnt = 0; + assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ + if( apx->type==SHIFT && apy->type==SHIFT ){ + apy->type = SSCONFLICT; + errcnt++; + } + if( apx->type==SHIFT && apy->type==REDUCE ){ + spx = apx->sp; + spy = apy->x.rp->precsym; + if( spy==0 || spx->prec<0 || spy->prec<0 ){ + /* Not enough precedence information. */ + apy->type = SRCONFLICT; + errcnt++; + }else if( spx->prec>spy->prec ){ /* higher precedence wins */ + apy->type = RD_RESOLVED; + }else if( spx->precprec ){ + apx->type = SH_RESOLVED; + }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ + apy->type = RD_RESOLVED; /* associativity */ + }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ + apx->type = SH_RESOLVED; + }else{ + assert( spx->prec==spy->prec && spx->assoc==NONE ); + apx->type = ERROR; + } + }else if( apx->type==REDUCE && apy->type==REDUCE ){ + spx = apx->x.rp->precsym; + spy = apy->x.rp->precsym; + if( spx==0 || spy==0 || spx->prec<0 || + spy->prec<0 || spx->prec==spy->prec ){ + apy->type = RRCONFLICT; + errcnt++; + }else if( spx->prec>spy->prec ){ + apy->type = RD_RESOLVED; + }else if( spx->precprec ){ + apx->type = RD_RESOLVED; + } + }else{ + assert( + apx->type==SH_RESOLVED || + apx->type==RD_RESOLVED || + apx->type==SSCONFLICT || + apx->type==SRCONFLICT || + apx->type==RRCONFLICT || + apy->type==SH_RESOLVED || + apy->type==RD_RESOLVED || + apy->type==SSCONFLICT || + apy->type==SRCONFLICT || + apy->type==RRCONFLICT + ); + /* The REDUCE/SHIFT case cannot happen because SHIFTs come before + ** REDUCEs on the list. If we reach this point it must be because + ** the parser conflict had already been resolved. */ + } + return errcnt; +} +/********************* From the file "configlist.c" *************************/ +/* +** Routines to processing a configuration list and building a state +** in the LEMON parser generator. +*/ + +static struct config *freelist = 0; /* List of free configurations */ +static struct config *current = 0; /* Top of list of configurations */ +static struct config **currentend = 0; /* Last on list of configs */ +static struct config *basis = 0; /* Top of list of basis configs */ +static struct config **basisend = 0; /* End of list of basis configs */ + +/* Return a pointer to a new configuration */ +PRIVATE struct config *newconfig(void){ + return (struct config*)calloc(1, sizeof(struct config)); +} + +/* The configuration "old" is no longer used */ +PRIVATE void deleteconfig(struct config *old) +{ + old->next = freelist; + freelist = old; +} + +/* Initialized the configuration list builder */ +void Configlist_init(void){ + current = 0; + currentend = ¤t; + basis = 0; + basisend = &basis; + Configtable_init(); + return; +} + +/* Initialized the configuration list builder */ +void Configlist_reset(void){ + current = 0; + currentend = ¤t; + basis = 0; + basisend = &basis; + Configtable_clear(0); + return; +} + +/* Add another configuration to the configuration list */ +struct config *Configlist_add( + struct rule *rp, /* The rule */ + int dot /* Index into the RHS of the rule where the dot goes */ +){ + struct config *cfp, model; + + assert( currentend!=0 ); + model.rp = rp; + model.dot = dot; + cfp = Configtable_find(&model); + if( cfp==0 ){ + cfp = newconfig(); + cfp->rp = rp; + cfp->dot = dot; + cfp->fws = SetNew(); + cfp->stp = 0; + cfp->fplp = cfp->bplp = 0; + cfp->next = 0; + cfp->bp = 0; + *currentend = cfp; + currentend = &cfp->next; + Configtable_insert(cfp); + } + return cfp; +} + +/* Add a basis configuration to the configuration list */ +struct config *Configlist_addbasis(struct rule *rp, int dot) +{ + struct config *cfp, model; + + assert( basisend!=0 ); + assert( currentend!=0 ); + model.rp = rp; + model.dot = dot; + cfp = Configtable_find(&model); + if( cfp==0 ){ + cfp = newconfig(); + cfp->rp = rp; + cfp->dot = dot; + cfp->fws = SetNew(); + cfp->stp = 0; + cfp->fplp = cfp->bplp = 0; + cfp->next = 0; + cfp->bp = 0; + *currentend = cfp; + currentend = &cfp->next; + *basisend = cfp; + basisend = &cfp->bp; + Configtable_insert(cfp); + } + return cfp; +} + +/* Compute the closure of the configuration list */ +void Configlist_closure(struct lemon *lemp) +{ + struct config *cfp, *newcfp; + struct rule *rp, *newrp; + struct symbol *sp, *xsp; + int i, dot; + + assert( currentend!=0 ); + for(cfp=current; cfp; cfp=cfp->next){ + rp = cfp->rp; + dot = cfp->dot; + if( dot>=rp->nrhs ) continue; + sp = rp->rhs[dot]; + if( sp->type==NONTERMINAL ){ + if( sp->rule==0 && sp!=lemp->errsym ){ + ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", + sp->name); + lemp->errorcnt++; + } + for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ + newcfp = Configlist_add(newrp,0); + for(i=dot+1; inrhs; i++){ + xsp = rp->rhs[i]; + if( xsp->type==TERMINAL ){ + SetAdd(newcfp->fws,xsp->index); + break; + }else if( xsp->type==MULTITERMINAL ){ + int k; + for(k=0; knsubsym; k++){ + SetAdd(newcfp->fws, xsp->subsym[k]->index); + } + break; + }else{ + SetUnion(newcfp->fws,xsp->firstset); + if( xsp->lambda==LEMON_FALSE ) break; + } + } + if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); + } + } + } + return; +} + +/* Sort the configuration list */ +void Configlist_sort(void){ + current = (struct config*)msort((char*)current,(char**)&(current->next), + Configcmp); + currentend = 0; + return; +} + +/* Sort the basis configuration list */ +void Configlist_sortbasis(void){ + basis = (struct config*)msort((char*)current,(char**)&(current->bp), + Configcmp); + basisend = 0; + return; +} + +/* Return a pointer to the head of the configuration list and +** reset the list */ +struct config *Configlist_return(void){ + struct config *old; + old = current; + current = 0; + currentend = 0; + return old; +} + +/* Return a pointer to the head of the configuration list and +** reset the list */ +struct config *Configlist_basis(void){ + struct config *old; + old = basis; + basis = 0; + basisend = 0; + return old; +} + +/* Free all elements of the given configuration list */ +void Configlist_eat(struct config *cfp) +{ + struct config *nextcfp; + for(; cfp; cfp=nextcfp){ + nextcfp = cfp->next; + assert( cfp->fplp==0 ); + assert( cfp->bplp==0 ); + if( cfp->fws ) SetFree(cfp->fws); + deleteconfig(cfp); + } + return; +} +/***************** From the file "error.c" *********************************/ +/* +** Code for printing error message. +*/ + +void ErrorMsg(const char *filename, int lineno, const char *format, ...){ + va_list ap; + fprintf(stderr, "%s:%d: ", filename, lineno); + va_start(ap, format); + vfprintf(stderr,format,ap); + va_end(ap); + fprintf(stderr, "\n"); +} +/**************** From the file "main.c" ************************************/ +/* +** Main program file for the LEMON parser generator. +*/ + +/* Report an out-of-memory condition and abort. This function +** is used mostly by the "MemoryCheck" macro in struct.h +*/ +void memory_error(void){ + fprintf(stderr,"Out of memory. Aborting...\n"); + exit(1); +} + +static int nDefine = 0; /* Number of -D options on the command line */ +static char **azDefine = 0; /* Name of the -D macros */ + +/* This routine is called with the argument to each -D command-line option. +** Add the macro defined to the azDefine array. +*/ +static void handle_D_option(char *z){ + char **paz; + nDefine++; + azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); + if( azDefine==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + paz = &azDefine[nDefine-1]; + *paz = (char *) malloc( lemonStrlen(z)+1 ); + if( *paz==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + lemon_strcpy(*paz, z); + for(z=*paz; *z && *z!='='; z++){} + *z = 0; +} + +/* Rember the name of the output directory +*/ +static char *outputDir = NULL; +static void handle_d_option(char *z){ + outputDir = (char *) malloc( lemonStrlen(z)+1 ); + if( outputDir==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + lemon_strcpy(outputDir, z); +} + +static char *user_templatename = NULL; +static void handle_T_option(char *z){ + user_templatename = (char *) malloc( lemonStrlen(z)+1 ); + if( user_templatename==0 ){ + memory_error(); + } + lemon_strcpy(user_templatename, z); +} + +/* Merge together to lists of rules ordered by rule.iRule */ +static struct rule *Rule_merge(struct rule *pA, struct rule *pB){ + struct rule *pFirst = 0; + struct rule **ppPrev = &pFirst; + while( pA && pB ){ + if( pA->iRuleiRule ){ + *ppPrev = pA; + ppPrev = &pA->next; + pA = pA->next; + }else{ + *ppPrev = pB; + ppPrev = &pB->next; + pB = pB->next; + } + } + if( pA ){ + *ppPrev = pA; + }else{ + *ppPrev = pB; + } + return pFirst; +} + +/* +** Sort a list of rules in order of increasing iRule value +*/ +static struct rule *Rule_sort(struct rule *rp){ + unsigned int i; + struct rule *pNext; + struct rule *x[32]; + memset(x, 0, sizeof(x)); + while( rp ){ + pNext = rp->next; + rp->next = 0; + for(i=0; iindex = i; + qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); + for(i=0; iindex = i; + while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } + assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); + lem.nsymbol = i - 1; + for(i=1; ISUPPER(lem.symbols[i]->name[0]); i++); + lem.nterminal = i; + + /* Assign sequential rule numbers. Start with 0. Put rules that have no + ** reduce action C-code associated with them last, so that the switch() + ** statement that selects reduction actions will have a smaller jump table. + */ + for(i=0, rp=lem.rule; rp; rp=rp->next){ + rp->iRule = rp->code ? i++ : -1; + } + lem.nruleWithAction = i; + for(rp=lem.rule; rp; rp=rp->next){ + if( rp->iRule<0 ) rp->iRule = i++; + } + lem.startRule = lem.rule; + lem.rule = Rule_sort(lem.rule); + + /* Generate a reprint of the grammar, if requested on the command line */ + if( rpflag ){ + Reprint(&lem); + }else{ + /* Initialize the size for all follow and first sets */ + SetSize(lem.nterminal+1); + + /* Find the precedence for every production rule (that has one) */ + FindRulePrecedences(&lem); + + /* Compute the lambda-nonterminals and the first-sets for every + ** nonterminal */ + FindFirstSets(&lem); + + /* Compute all LR(0) states. Also record follow-set propagation + ** links so that the follow-set can be computed later */ + lem.nstate = 0; + FindStates(&lem); + lem.sorted = State_arrayof(); + + /* Tie up loose ends on the propagation links */ + FindLinks(&lem); + + /* Compute the follow set of every reducible configuration */ + FindFollowSets(&lem); + + /* Compute the action tables */ + FindActions(&lem); + + /* Compress the action tables */ + if( compress==0 ) CompressTables(&lem); + + /* Reorder and renumber the states so that states with fewer choices + ** occur at the end. This is an optimization that helps make the + ** generated parser tables smaller. */ + if( noResort==0 ) ResortStates(&lem); + + /* Generate a report of the parser generated. (the "y.output" file) */ + if( !quiet ) ReportOutput(&lem); + + /* Generate the source code for the parser */ + ReportTable(&lem, mhflag, sqlFlag); + + /* Produce a header file for use by the scanner. (This step is + ** omitted if the "-m" option is used because makeheaders will + ** generate the file for us.) */ + if( !mhflag ) ReportHeader(&lem); + } + if( statistics ){ + printf("Parser statistics:\n"); + stats_line("terminal symbols", lem.nterminal); + stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); + stats_line("total symbols", lem.nsymbol); + stats_line("rules", lem.nrule); + stats_line("states", lem.nxstate); + stats_line("conflicts", lem.nconflict); + stats_line("action table entries", lem.nactiontab); + stats_line("lookahead table entries", lem.nlookaheadtab); + stats_line("total table size (bytes)", lem.tablesize); + } + if( lem.nconflict > 0 ){ + fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); + } + + /* return 0 on success, 1 on failure. */ + exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; + exit(exitcode); + return (exitcode); +} +/******************** From the file "msort.c" *******************************/ +/* +** A generic merge-sort program. +** +** USAGE: +** Let "ptr" be a pointer to some structure which is at the head of +** a null-terminated list. Then to sort the list call: +** +** ptr = msort(ptr,&(ptr->next),cmpfnc); +** +** In the above, "cmpfnc" is a pointer to a function which compares +** two instances of the structure and returns an integer, as in +** strcmp. The second argument is a pointer to the pointer to the +** second element of the linked list. This address is used to compute +** the offset to the "next" field within the structure. The offset to +** the "next" field must be constant for all structures in the list. +** +** The function returns a new pointer which is the head of the list +** after sorting. +** +** ALGORITHM: +** Merge-sort. +*/ + +/* +** Return a pointer to the next structure in the linked list. +*/ +#define NEXT(A) (*(char**)(((char*)A)+offset)) + +/* +** Inputs: +** a: A sorted, null-terminated linked list. (May be null). +** b: A sorted, null-terminated linked list. (May be null). +** cmp: A pointer to the comparison function. +** offset: Offset in the structure to the "next" field. +** +** Return Value: +** A pointer to the head of a sorted list containing the elements +** of both a and b. +** +** Side effects: +** The "next" pointers for elements in the lists a and b are +** changed. +*/ +static char *merge( + char *a, + char *b, + int (*cmp)(const char*,const char*), + int offset +){ + char *ptr, *head; + + if( a==0 ){ + head = b; + }else if( b==0 ){ + head = a; + }else{ + if( (*cmp)(a,b)<=0 ){ + ptr = a; + a = NEXT(a); + }else{ + ptr = b; + b = NEXT(b); + } + head = ptr; + while( a && b ){ + if( (*cmp)(a,b)<=0 ){ + NEXT(ptr) = a; + ptr = a; + a = NEXT(a); + }else{ + NEXT(ptr) = b; + ptr = b; + b = NEXT(b); + } + } + if( a ) NEXT(ptr) = a; + else NEXT(ptr) = b; + } + return head; +} + +/* +** Inputs: +** list: Pointer to a singly-linked list of structures. +** next: Pointer to pointer to the second element of the list. +** cmp: A comparison function. +** +** Return Value: +** A pointer to the head of a sorted list containing the elements +** originally in list. +** +** Side effects: +** The "next" pointers for elements in list are changed. +*/ +#define LISTSIZE 30 +static char *msort( + char *list, + char **next, + int (*cmp)(const char*,const char*) +){ + unsigned long offset; + char *ep; + char *set[LISTSIZE]; + int i; + offset = (unsigned long)((char*)next - (char*)list); + for(i=0; istate = WAITING_FOR_DECL_KEYWORD; + }else if( ISLOWER(x[0]) ){ + psp->lhs = Symbol_new(x); + psp->nrhs = 0; + psp->lhsalias = 0; + psp->state = WAITING_FOR_ARROW; + }else if( x[0]=='{' ){ + if( psp->prevrule==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "There is no prior rule upon which to attach the code " + "fragment which begins on this line."); + psp->errorcnt++; + }else if( psp->prevrule->code!=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Code fragment beginning on this line is not the first " + "to follow the previous rule."); + psp->errorcnt++; + }else if( strcmp(x, "{NEVER-REDUCE")==0 ){ + psp->prevrule->neverReduce = 1; + }else{ + psp->prevrule->line = psp->tokenlineno; + psp->prevrule->code = &x[1]; + psp->prevrule->noCode = 0; + } + }else if( x[0]=='[' ){ + psp->state = PRECEDENCE_MARK_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Token \"%s\" should be either \"%%\" or a nonterminal name.", + x); + psp->errorcnt++; + } + break; + case PRECEDENCE_MARK_1: + if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "The precedence symbol must be a terminal."); + psp->errorcnt++; + }else if( psp->prevrule==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "There is no prior rule to assign precedence \"[%s]\".",x); + psp->errorcnt++; + }else if( psp->prevrule->precsym!=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Precedence mark on this line is not the first " + "to follow the previous rule."); + psp->errorcnt++; + }else{ + psp->prevrule->precsym = Symbol_new(x); + } + psp->state = PRECEDENCE_MARK_2; + break; + case PRECEDENCE_MARK_2: + if( x[0]!=']' ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \"]\" on precedence mark."); + psp->errorcnt++; + } + psp->state = WAITING_FOR_DECL_OR_RULE; + break; + case WAITING_FOR_ARROW: + if( x[0]==':' && x[1]==':' && x[2]=='=' ){ + psp->state = IN_RHS; + }else if( x[0]=='(' ){ + psp->state = LHS_ALIAS_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Expected to see a \":\" following the LHS symbol \"%s\".", + psp->lhs->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_1: + if( ISALPHA(x[0]) ){ + psp->lhsalias = x; + psp->state = LHS_ALIAS_2; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "\"%s\" is not a valid alias for the LHS \"%s\"\n", + x,psp->lhs->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_2: + if( x[0]==')' ){ + psp->state = LHS_ALIAS_3; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_3: + if( x[0]==':' && x[1]==':' && x[2]=='=' ){ + psp->state = IN_RHS; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \"->\" following: \"%s(%s)\".", + psp->lhs->name,psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case IN_RHS: + if( x[0]=='.' ){ + struct rule *rp; + rp = (struct rule *)calloc( sizeof(struct rule) + + sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); + if( rp==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Can't allocate enough memory for this rule."); + psp->errorcnt++; + psp->prevrule = 0; + }else{ + int i; + rp->ruleline = psp->tokenlineno; + rp->rhs = (struct symbol**)&rp[1]; + rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); + for(i=0; inrhs; i++){ + rp->rhs[i] = psp->rhs[i]; + rp->rhsalias[i] = psp->alias[i]; + if( rp->rhsalias[i]!=0 ){ rp->rhs[i]->bContent = 1; } + } + rp->lhs = psp->lhs; + rp->lhsalias = psp->lhsalias; + rp->nrhs = psp->nrhs; + rp->code = 0; + rp->noCode = 1; + rp->precsym = 0; + rp->index = psp->gp->nrule++; + rp->nextlhs = rp->lhs->rule; + rp->lhs->rule = rp; + rp->next = 0; + if( psp->firstrule==0 ){ + psp->firstrule = psp->lastrule = rp; + }else{ + psp->lastrule->next = rp; + psp->lastrule = rp; + } + psp->prevrule = rp; + } + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISALPHA(x[0]) ){ + if( psp->nrhs>=MAXRHS ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Too many symbols on RHS of rule beginning at \"%s\".", + x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + }else{ + psp->rhs[psp->nrhs] = Symbol_new(x); + psp->alias[psp->nrhs] = 0; + psp->nrhs++; + } + }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 && ISUPPER(x[1]) ){ + struct symbol *msp = psp->rhs[psp->nrhs-1]; + if( msp->type!=MULTITERMINAL ){ + struct symbol *origsp = msp; + msp = (struct symbol *) calloc(1,sizeof(*msp)); + memset(msp, 0, sizeof(*msp)); + msp->type = MULTITERMINAL; + msp->nsubsym = 1; + msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*)); + msp->subsym[0] = origsp; + msp->name = origsp->name; + psp->rhs[psp->nrhs-1] = msp; + } + msp->nsubsym++; + msp->subsym = (struct symbol **) realloc(msp->subsym, + sizeof(struct symbol*)*msp->nsubsym); + msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); + if( ISLOWER(x[1]) || ISLOWER(msp->subsym[0]->name[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Cannot form a compound containing a non-terminal"); + psp->errorcnt++; + } + }else if( x[0]=='(' && psp->nrhs>0 ){ + psp->state = RHS_ALIAS_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal character on RHS of rule: \"%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case RHS_ALIAS_1: + if( ISALPHA(x[0]) ){ + psp->alias[psp->nrhs-1] = x; + psp->state = RHS_ALIAS_2; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", + x,psp->rhs[psp->nrhs-1]->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case RHS_ALIAS_2: + if( x[0]==')' ){ + psp->state = IN_RHS; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case WAITING_FOR_DECL_KEYWORD: + if( ISALPHA(x[0]) ){ + psp->declkeyword = x; + psp->declargslot = 0; + psp->decllinenoslot = 0; + psp->insertLineMacro = 1; + psp->state = WAITING_FOR_DECL_ARG; + if( strcmp(x,"name")==0 ){ + psp->declargslot = &(psp->gp->name); + psp->insertLineMacro = 0; + }else if( strcmp(x,"include")==0 ){ + psp->declargslot = &(psp->gp->include); + }else if( strcmp(x,"code")==0 ){ + psp->declargslot = &(psp->gp->extracode); + }else if( strcmp(x,"token_destructor")==0 ){ + psp->declargslot = &psp->gp->tokendest; + }else if( strcmp(x,"default_destructor")==0 ){ + psp->declargslot = &psp->gp->vardest; + }else if( strcmp(x,"token_prefix")==0 ){ + psp->declargslot = &psp->gp->tokenprefix; + psp->insertLineMacro = 0; + }else if( strcmp(x,"syntax_error")==0 ){ + psp->declargslot = &(psp->gp->error); + }else if( strcmp(x,"parse_accept")==0 ){ + psp->declargslot = &(psp->gp->accept); + }else if( strcmp(x,"parse_failure")==0 ){ + psp->declargslot = &(psp->gp->failure); + }else if( strcmp(x,"stack_overflow")==0 ){ + psp->declargslot = &(psp->gp->overflow); + }else if( strcmp(x,"extra_argument")==0 ){ + psp->declargslot = &(psp->gp->arg); + psp->insertLineMacro = 0; + }else if( strcmp(x,"extra_context")==0 ){ + psp->declargslot = &(psp->gp->ctx); + psp->insertLineMacro = 0; + }else if( strcmp(x,"token_type")==0 ){ + psp->declargslot = &(psp->gp->tokentype); + psp->insertLineMacro = 0; + }else if( strcmp(x,"default_type")==0 ){ + psp->declargslot = &(psp->gp->vartype); + psp->insertLineMacro = 0; + }else if( strcmp(x,"stack_size")==0 ){ + psp->declargslot = &(psp->gp->stacksize); + psp->insertLineMacro = 0; + }else if( strcmp(x,"start_symbol")==0 ){ + psp->declargslot = &(psp->gp->start); + psp->insertLineMacro = 0; + }else if( strcmp(x,"left")==0 ){ + psp->preccounter++; + psp->declassoc = LEFT; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"right")==0 ){ + psp->preccounter++; + psp->declassoc = RIGHT; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"nonassoc")==0 ){ + psp->preccounter++; + psp->declassoc = NONE; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"destructor")==0 ){ + psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; + }else if( strcmp(x,"type")==0 ){ + psp->state = WAITING_FOR_DATATYPE_SYMBOL; + }else if( strcmp(x,"fallback")==0 ){ + psp->fallback = 0; + psp->state = WAITING_FOR_FALLBACK_ID; + }else if( strcmp(x,"token")==0 ){ + psp->state = WAITING_FOR_TOKEN_NAME; + }else if( strcmp(x,"wildcard")==0 ){ + psp->state = WAITING_FOR_WILDCARD_ID; + }else if( strcmp(x,"token_class")==0 ){ + psp->state = WAITING_FOR_CLASS_ID; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Unknown declaration keyword: \"%%%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal declaration keyword: \"%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case WAITING_FOR_DESTRUCTOR_SYMBOL: + if( !ISALPHA(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol name missing after %%destructor keyword"); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + struct symbol *sp = Symbol_new(x); + psp->declargslot = &sp->destructor; + psp->decllinenoslot = &sp->destLineno; + psp->insertLineMacro = 1; + psp->state = WAITING_FOR_DECL_ARG; + } + break; + case WAITING_FOR_DATATYPE_SYMBOL: + if( !ISALPHA(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol name missing after %%type keyword"); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + struct symbol *sp = Symbol_find(x); + if((sp) && (sp->datatype)){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol %%type \"%s\" already defined", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + if (!sp){ + sp = Symbol_new(x); + } + psp->declargslot = &sp->datatype; + psp->insertLineMacro = 0; + psp->state = WAITING_FOR_DECL_ARG; + } + } + break; + case WAITING_FOR_PRECEDENCE_SYMBOL: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISUPPER(x[0]) ){ + struct symbol *sp; + sp = Symbol_new(x); + if( sp->prec>=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol \"%s\" has already be given a precedence.",x); + psp->errorcnt++; + }else{ + sp->prec = psp->preccounter; + sp->assoc = psp->declassoc; + } + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Can't assign a precedence to \"%s\".",x); + psp->errorcnt++; + } + break; + case WAITING_FOR_DECL_ARG: + if( x[0]=='{' || x[0]=='\"' || ISALNUM(x[0]) ){ + const char *zOld, *zNew; + char *zBuf, *z; + int nOld, n, nLine = 0, nNew, nBack; + int addLineMacro; + char zLine[50]; + zNew = x; + if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; + nNew = lemonStrlen(zNew); + if( *psp->declargslot ){ + zOld = *psp->declargslot; + }else{ + zOld = ""; + } + nOld = lemonStrlen(zOld); + n = nOld + nNew + 20; + addLineMacro = !psp->gp->nolinenosflag + && psp->insertLineMacro + && psp->tokenlineno>1 + && (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); + if( addLineMacro ){ + for(z=psp->filename, nBack=0; *z; z++){ + if( *z=='\\' ) nBack++; + } + lemon_sprintf(zLine, "#line %d ", psp->tokenlineno); + nLine = lemonStrlen(zLine); + n += nLine + lemonStrlen(psp->filename) + nBack; + } + *psp->declargslot = (char *) realloc(*psp->declargslot, n); + zBuf = *psp->declargslot + nOld; + if( addLineMacro ){ + if( nOld && zBuf[-1]!='\n' ){ + *(zBuf++) = '\n'; + } + memcpy(zBuf, zLine, nLine); + zBuf += nLine; + *(zBuf++) = '"'; + for(z=psp->filename; *z; z++){ + if( *z=='\\' ){ + *(zBuf++) = '\\'; + } + *(zBuf++) = *z; + } + *(zBuf++) = '"'; + *(zBuf++) = '\n'; + } + if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ + psp->decllinenoslot[0] = psp->tokenlineno; + } + memcpy(zBuf, zNew, nNew); + zBuf += nNew; + *zBuf = 0; + psp->state = WAITING_FOR_DECL_OR_RULE; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal argument to %%%s: %s",psp->declkeyword,x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case WAITING_FOR_FALLBACK_ID: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%fallback argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + struct symbol *sp = Symbol_new(x); + if( psp->fallback==0 ){ + psp->fallback = sp; + }else if( sp->fallback ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "More than one fallback assigned to token %s", x); + psp->errorcnt++; + }else{ + sp->fallback = psp->fallback; + psp->gp->has_fallback = 1; + } + } + break; + case WAITING_FOR_TOKEN_NAME: + /* Tokens do not have to be declared before use. But they can be + ** in order to control their assigned integer number. The number for + ** each token is assigned when it is first seen. So by including + ** + ** %token ONE TWO THREE. + ** + ** early in the grammar file, that assigns small consecutive values + ** to each of the tokens ONE TWO and THREE. + */ + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + (void)Symbol_new(x); + } + break; + case WAITING_FOR_WILDCARD_ID: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%wildcard argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + struct symbol *sp = Symbol_new(x); + if( psp->gp->wildcard==0 ){ + psp->gp->wildcard = sp; + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "Extra wildcard to token: %s", x); + psp->errorcnt++; + } + } + break; + case WAITING_FOR_CLASS_ID: + if( !ISLOWER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class must be followed by an identifier: %s", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else if( Symbol_find(x) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "Symbol \"%s\" already used", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + psp->tkclass = Symbol_new(x); + psp->tkclass->type = MULTITERMINAL; + psp->state = WAITING_FOR_CLASS_TOKEN; + } + break; + case WAITING_FOR_CLASS_TOKEN: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISUPPER(x[0]) || ((x[0]=='|' || x[0]=='/') && ISUPPER(x[1])) ){ + struct symbol *msp = psp->tkclass; + msp->nsubsym++; + msp->subsym = (struct symbol **) realloc(msp->subsym, + sizeof(struct symbol*)*msp->nsubsym); + if( !ISUPPER(x[0]) ) x++; + msp->subsym[msp->nsubsym-1] = Symbol_new(x); + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class argument \"%s\" should be a token", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case RESYNC_AFTER_RULE_ERROR: +/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; +** break; */ + case RESYNC_AFTER_DECL_ERROR: + if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; + if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; + break; + } +} + +/* The text in the input is part of the argument to an %ifdef or %ifndef. +** Evaluate the text as a boolean expression. Return true or false. +*/ +static int eval_preprocessor_boolean(char *z, int lineno){ + int neg = 0; + int res = 0; + int okTerm = 1; + int i; + for(i=0; z[i]!=0; i++){ + if( ISSPACE(z[i]) ) continue; + if( z[i]=='!' ){ + if( !okTerm ) goto pp_syntax_error; + neg = !neg; + continue; + } + if( z[i]=='|' && z[i+1]=='|' ){ + if( okTerm ) goto pp_syntax_error; + if( res ) return 1; + i++; + okTerm = 1; + continue; + } + if( z[i]=='&' && z[i+1]=='&' ){ + if( okTerm ) goto pp_syntax_error; + if( !res ) return 0; + i++; + okTerm = 1; + continue; + } + if( z[i]=='(' ){ + int k; + int n = 1; + if( !okTerm ) goto pp_syntax_error; + for(k=i+1; z[k]; k++){ + if( z[k]==')' ){ + n--; + if( n==0 ){ + z[k] = 0; + res = eval_preprocessor_boolean(&z[i+1], -1); + z[k] = ')'; + if( res<0 ){ + i = i-res; + goto pp_syntax_error; + } + i = k; + break; + } + }else if( z[k]=='(' ){ + n++; + }else if( z[k]==0 ){ + i = k; + goto pp_syntax_error; + } + } + if( neg ){ + res = !res; + neg = 0; + } + okTerm = 0; + continue; + } + if( ISALPHA(z[i]) ){ + int j, k, n; + if( !okTerm ) goto pp_syntax_error; + for(k=i+1; ISALNUM(z[k]) || z[k]=='_'; k++){} + n = k - i; + res = 0; + for(j=0; j0 ){ + fprintf(stderr, "%%if syntax error on line %d.\n", lineno); + fprintf(stderr, " %.*s <-- syntax error here\n", i+1, z); + exit(1); + }else{ + return -(i+1); + } +} + +/* Run the preprocessor over the input file text. The global variables +** azDefine[0] through azDefine[nDefine-1] contains the names of all defined +** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and +** comments them out. Text in between is also commented out as appropriate. +*/ +static void preprocess_input(char *z){ + int i, j, k; + int exclude = 0; + int start = 0; + int lineno = 1; + int start_lineno = 1; + for(i=0; z[i]; i++){ + if( z[i]=='\n' ) lineno++; + if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; + if( strncmp(&z[i],"%endif",6)==0 && ISSPACE(z[i+6]) ){ + if( exclude ){ + exclude--; + if( exclude==0 ){ + for(j=start; jfilename; + ps.errorcnt = 0; + ps.state = INITIALIZE; + + /* Begin by reading the input file */ + fp = fopen(ps.filename,"rb"); + if( fp==0 ){ + ErrorMsg(ps.filename,0,"Can't open this file for reading."); + gp->errorcnt++; + return; + } + fseek(fp,0,2); + filesize = ftell(fp); + rewind(fp); + filebuf = (char *)malloc( filesize+1 ); + if( filesize>100000000 || filebuf==0 ){ + ErrorMsg(ps.filename,0,"Input file too large."); + free(filebuf); + gp->errorcnt++; + fclose(fp); + return; + } + if( fread(filebuf,1,filesize,fp)!=filesize ){ + ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", + filesize); + free(filebuf); + gp->errorcnt++; + fclose(fp); + return; + } + fclose(fp); + filebuf[filesize] = 0; + + /* Make an initial pass through the file to handle %ifdef and %ifndef */ + preprocess_input(filebuf); + if( gp->printPreprocessed ){ + printf("%s\n", filebuf); + return; + } + + /* Now scan the text of the input file */ + lineno = 1; + for(cp=filebuf; (c= *cp)!=0; ){ + if( c=='\n' ) lineno++; /* Keep track of the line number */ + if( ISSPACE(c) ){ cp++; continue; } /* Skip all white space */ + if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ + cp+=2; + while( (c= *cp)!=0 && c!='\n' ) cp++; + continue; + } + if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ + cp+=2; + if( (*cp)=='/' ) cp++; + while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ + if( c=='\n' ) lineno++; + cp++; + } + if( c ) cp++; + continue; + } + ps.tokenstart = cp; /* Mark the beginning of the token */ + ps.tokenlineno = lineno; /* Linenumber on which token begins */ + if( c=='\"' ){ /* String literals */ + cp++; + while( (c= *cp)!=0 && c!='\"' ){ + if( c=='\n' ) lineno++; + cp++; + } + if( c==0 ){ + ErrorMsg(ps.filename,startline, + "String starting on this line is not terminated before " + "the end of the file."); + ps.errorcnt++; + nextcp = cp; + }else{ + nextcp = cp+1; + } + }else if( c=='{' ){ /* A block of C code */ + int level; + cp++; + for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ + if( c=='\n' ) lineno++; + else if( c=='{' ) level++; + else if( c=='}' ) level--; + else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ + int prevc; + cp = &cp[2]; + prevc = 0; + while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ + if( c=='\n' ) lineno++; + prevc = c; + cp++; + } + }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ + cp = &cp[2]; + while( (c= *cp)!=0 && c!='\n' ) cp++; + if( c ) lineno++; + }else if( c=='\'' || c=='\"' ){ /* String a character literals */ + int startchar, prevc; + startchar = c; + prevc = 0; + for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ + if( c=='\n' ) lineno++; + if( prevc=='\\' ) prevc = 0; + else prevc = c; + } + } + } + if( c==0 ){ + ErrorMsg(ps.filename,ps.tokenlineno, + "C code starting on this line is not terminated before " + "the end of the file."); + ps.errorcnt++; + nextcp = cp; + }else{ + nextcp = cp+1; + } + }else if( ISALNUM(c) ){ /* Identifiers */ + while( (c= *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; + nextcp = cp; + }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ + cp += 3; + nextcp = cp; + }else if( (c=='/' || c=='|') && ISALPHA(cp[1]) ){ + cp += 2; + while( (c = *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; + nextcp = cp; + }else{ /* All other (one character) operators */ + cp++; + nextcp = cp; + } + c = *cp; + *cp = 0; /* Null terminate the token */ + parseonetoken(&ps); /* Parse the token */ + *cp = (char)c; /* Restore the buffer */ + cp = nextcp; + } + free(filebuf); /* Release the buffer after parsing */ + gp->rule = ps.firstrule; + gp->errorcnt = ps.errorcnt; +} +/*************************** From the file "plink.c" *********************/ +/* +** Routines processing configuration follow-set propagation links +** in the LEMON parser generator. +*/ +static struct plink *plink_freelist = 0; + +/* Allocate a new plink */ +struct plink *Plink_new(void){ + struct plink *newlink; + + if( plink_freelist==0 ){ + int i; + int amt = 100; + plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); + if( plink_freelist==0 ){ + fprintf(stderr, + "Unable to allocate memory for a new follow-set propagation link.\n"); + exit(1); + } + for(i=0; inext; + return newlink; +} + +/* Add a plink to a plink list */ +void Plink_add(struct plink **plpp, struct config *cfp) +{ + struct plink *newlink; + newlink = Plink_new(); + newlink->next = *plpp; + *plpp = newlink; + newlink->cfp = cfp; +} + +/* Transfer every plink on the list "from" to the list "to" */ +void Plink_copy(struct plink **to, struct plink *from) +{ + struct plink *nextpl; + while( from ){ + nextpl = from->next; + from->next = *to; + *to = from; + from = nextpl; + } +} + +/* Delete every plink on the list */ +void Plink_delete(struct plink *plp) +{ + struct plink *nextpl; + + while( plp ){ + nextpl = plp->next; + plp->next = plink_freelist; + plink_freelist = plp; + plp = nextpl; + } +} +/*********************** From the file "report.c" **************************/ +/* +** Procedures for generating reports and tables in the LEMON parser generator. +*/ + +/* Generate a filename with the given suffix. Space to hold the +** name comes from malloc() and must be freed by the calling +** function. +*/ +PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) +{ + char *name; + char *cp; + char *filename = lemp->filename; + int sz; + + if( outputDir ){ + cp = strrchr(filename, '/'); + if( cp ) filename = cp + 1; + } + sz = lemonStrlen(filename); + sz += lemonStrlen(suffix); + if( outputDir ) sz += lemonStrlen(outputDir) + 1; + sz += 5; + name = (char*)malloc( sz ); + if( name==0 ){ + fprintf(stderr,"Can't allocate space for a filename.\n"); + exit(1); + } + name[0] = 0; + if( outputDir ){ + lemon_strcpy(name, outputDir); + lemon_strcat(name, "/"); + } + lemon_strcat(name,filename); + cp = strrchr(name,'.'); + if( cp ) *cp = 0; + lemon_strcat(name,suffix); + return name; +} + +/* Open a file with a name based on the name of the input file, +** but with a different (specified) suffix, and return a pointer +** to the stream */ +PRIVATE FILE *file_open( + struct lemon *lemp, + const char *suffix, + const char *mode +){ + FILE *fp; + + if( lemp->outname ) free(lemp->outname); + lemp->outname = file_makename(lemp, suffix); + fp = fopen(lemp->outname,mode); + if( fp==0 && *mode=='w' ){ + fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); + lemp->errorcnt++; + return 0; + } + return fp; +} + +/* Print the text of a rule +*/ +void rule_print(FILE *out, struct rule *rp){ + int i, j; + fprintf(out, "%s",rp->lhs->name); + /* if( rp->lhsalias ) fprintf(out,"(%s)",rp->lhsalias); */ + fprintf(out," ::="); + for(i=0; inrhs; i++){ + struct symbol *sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + fprintf(out," %s", sp->subsym[0]->name); + for(j=1; jnsubsym; j++){ + fprintf(out,"|%s", sp->subsym[j]->name); + } + }else{ + fprintf(out," %s", sp->name); + } + /* if( rp->rhsalias[i] ) fprintf(out,"(%s)",rp->rhsalias[i]); */ + } +} + +/* Duplicate the input file without comments and without actions +** on rules */ +void Reprint(struct lemon *lemp) +{ + struct rule *rp; + struct symbol *sp; + int i, j, maxlen, len, ncolumns, skip; + printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); + maxlen = 10; + for(i=0; insymbol; i++){ + sp = lemp->symbols[i]; + len = lemonStrlen(sp->name); + if( len>maxlen ) maxlen = len; + } + ncolumns = 76/(maxlen+5); + if( ncolumns<1 ) ncolumns = 1; + skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; + for(i=0; insymbol; j+=skip){ + sp = lemp->symbols[j]; + assert( sp->index==j ); + printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); + } + printf("\n"); + } + for(rp=lemp->rule; rp; rp=rp->next){ + rule_print(stdout, rp); + printf("."); + if( rp->precsym ) printf(" [%s]",rp->precsym->name); + /* if( rp->code ) printf("\n %s",rp->code); */ + printf("\n"); + } +} + +/* Print a single rule. +*/ +void RulePrint(FILE *fp, struct rule *rp, int iCursor){ + struct symbol *sp; + int i, j; + fprintf(fp,"%s ::=",rp->lhs->name); + for(i=0; i<=rp->nrhs; i++){ + if( i==iCursor ) fprintf(fp," *"); + if( i==rp->nrhs ) break; + sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + fprintf(fp," %s", sp->subsym[0]->name); + for(j=1; jnsubsym; j++){ + fprintf(fp,"|%s",sp->subsym[j]->name); + } + }else{ + fprintf(fp," %s", sp->name); + } + } +} + +/* Print the rule for a configuration. +*/ +void ConfigPrint(FILE *fp, struct config *cfp){ + RulePrint(fp, cfp->rp, cfp->dot); +} + +/* #define TEST */ +#if 0 +/* Print a set */ +PRIVATE void SetPrint(out,set,lemp) +FILE *out; +char *set; +struct lemon *lemp; +{ + int i; + char *spacer; + spacer = ""; + fprintf(out,"%12s[",""); + for(i=0; interminal; i++){ + if( SetFind(set,i) ){ + fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); + spacer = " "; + } + } + fprintf(out,"]\n"); +} + +/* Print a plink chain */ +PRIVATE void PlinkPrint(out,plp,tag) +FILE *out; +struct plink *plp; +char *tag; +{ + while( plp ){ + fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); + ConfigPrint(out,plp->cfp); + fprintf(out,"\n"); + plp = plp->next; + } +} +#endif + +/* Print an action to the given file descriptor. Return FALSE if +** nothing was actually printed. +*/ +int PrintAction( + struct action *ap, /* The action to print */ + FILE *fp, /* Print the action here */ + int indent /* Indent by this amount */ +){ + int result = 1; + switch( ap->type ){ + case SHIFT: { + struct state *stp = ap->x.stp; + fprintf(fp,"%*s shift %-7d",indent,ap->sp->name,stp->statenum); + break; + } + case REDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s reduce %-7d",indent,ap->sp->name,rp->iRule); + RulePrint(fp, rp, -1); + break; + } + case SHIFTREDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s shift-reduce %-7d",indent,ap->sp->name,rp->iRule); + RulePrint(fp, rp, -1); + break; + } + case ACCEPT: + fprintf(fp,"%*s accept",indent,ap->sp->name); + break; + case ERROR: + fprintf(fp,"%*s error",indent,ap->sp->name); + break; + case SRCONFLICT: + case RRCONFLICT: + fprintf(fp,"%*s reduce %-7d ** Parsing conflict **", + indent,ap->sp->name,ap->x.rp->iRule); + break; + case SSCONFLICT: + fprintf(fp,"%*s shift %-7d ** Parsing conflict **", + indent,ap->sp->name,ap->x.stp->statenum); + break; + case SH_RESOLVED: + if( showPrecedenceConflict ){ + fprintf(fp,"%*s shift %-7d -- dropped by precedence", + indent,ap->sp->name,ap->x.stp->statenum); + }else{ + result = 0; + } + break; + case RD_RESOLVED: + if( showPrecedenceConflict ){ + fprintf(fp,"%*s reduce %-7d -- dropped by precedence", + indent,ap->sp->name,ap->x.rp->iRule); + }else{ + result = 0; + } + break; + case NOT_USED: + result = 0; + break; + } + if( result && ap->spOpt ){ + fprintf(fp," /* because %s==%s */", ap->sp->name, ap->spOpt->name); + } + return result; +} + +/* Generate the "*.out" log file */ +void ReportOutput(struct lemon *lemp) +{ + int i, n; + struct state *stp; + struct config *cfp; + struct action *ap; + struct rule *rp; + FILE *fp; + + fp = file_open(lemp,".out","wb"); + if( fp==0 ) return; + for(i=0; inxstate; i++){ + stp = lemp->sorted[i]; + fprintf(fp,"State %d:\n",stp->statenum); + if( lemp->basisflag ) cfp=stp->bp; + else cfp=stp->cfp; + while( cfp ){ + char buf[20]; + if( cfp->dot==cfp->rp->nrhs ){ + lemon_sprintf(buf,"(%d)",cfp->rp->iRule); + fprintf(fp," %5s ",buf); + }else{ + fprintf(fp," "); + } + ConfigPrint(fp,cfp); + fprintf(fp,"\n"); +#if 0 + SetPrint(fp,cfp->fws,lemp); + PlinkPrint(fp,cfp->fplp,"To "); + PlinkPrint(fp,cfp->bplp,"From"); +#endif + if( lemp->basisflag ) cfp=cfp->bp; + else cfp=cfp->next; + } + fprintf(fp,"\n"); + for(ap=stp->ap; ap; ap=ap->next){ + if( PrintAction(ap,fp,30) ) fprintf(fp,"\n"); + } + fprintf(fp,"\n"); + } + fprintf(fp, "----------------------------------------------------\n"); + fprintf(fp, "Symbols:\n"); + fprintf(fp, "The first-set of non-terminals is shown after the name.\n\n"); + for(i=0; insymbol; i++){ + int j; + struct symbol *sp; + + sp = lemp->symbols[i]; + fprintf(fp, " %3d: %s", i, sp->name); + if( sp->type==NONTERMINAL ){ + fprintf(fp, ":"); + if( sp->lambda ){ + fprintf(fp, " "); + } + for(j=0; jnterminal; j++){ + if( sp->firstset && SetFind(sp->firstset, j) ){ + fprintf(fp, " %s", lemp->symbols[j]->name); + } + } + } + if( sp->prec>=0 ) fprintf(fp," (precedence=%d)", sp->prec); + fprintf(fp, "\n"); + } + fprintf(fp, "----------------------------------------------------\n"); + fprintf(fp, "Syntax-only Symbols:\n"); + fprintf(fp, "The following symbols never carry semantic content.\n\n"); + for(i=n=0; insymbol; i++){ + int w; + struct symbol *sp = lemp->symbols[i]; + if( sp->bContent ) continue; + w = (int)strlen(sp->name); + if( n>0 && n+w>75 ){ + fprintf(fp,"\n"); + n = 0; + } + if( n>0 ){ + fprintf(fp, " "); + n++; + } + fprintf(fp, "%s", sp->name); + n += w; + } + if( n>0 ) fprintf(fp, "\n"); + fprintf(fp, "----------------------------------------------------\n"); + fprintf(fp, "Rules:\n"); + for(rp=lemp->rule; rp; rp=rp->next){ + fprintf(fp, "%4d: ", rp->iRule); + rule_print(fp, rp); + fprintf(fp,"."); + if( rp->precsym ){ + fprintf(fp," [%s precedence=%d]", + rp->precsym->name, rp->precsym->prec); + } + fprintf(fp,"\n"); + } + fclose(fp); + return; +} + +/* Search for the file "name" which is in the same directory as +** the executable */ +PRIVATE char *pathsearch(char *argv0, char *name, int modemask) +{ + const char *pathlist; + char *pathbufptr = 0; + char *pathbuf = 0; + char *path,*cp; + char c; + +#ifdef __WIN32__ + cp = strrchr(argv0,'\\'); +#else + cp = strrchr(argv0,'/'); +#endif + if( cp ){ + c = *cp; + *cp = 0; + path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 ); + if( path ) lemon_sprintf(path,"%s/%s",argv0,name); + *cp = c; + }else{ + pathlist = getenv("PATH"); + if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; + pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 ); + path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 ); + if( (pathbuf != 0) && (path!=0) ){ + pathbufptr = pathbuf; + lemon_strcpy(pathbuf, pathlist); + while( *pathbuf ){ + cp = strchr(pathbuf,':'); + if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)]; + c = *cp; + *cp = 0; + lemon_sprintf(path,"%s/%s",pathbuf,name); + *cp = c; + if( c==0 ) pathbuf[0] = 0; + else pathbuf = &cp[1]; + if( access(path,modemask)==0 ) break; + } + } + free(pathbufptr); + } + return path; +} + +/* Given an action, compute the integer value for that action +** which is to be put in the action table of the generated machine. +** Return negative if no action should be generated. +*/ +PRIVATE int compute_action(struct lemon *lemp, struct action *ap) +{ + int act; + switch( ap->type ){ + case SHIFT: act = ap->x.stp->statenum; break; + case SHIFTREDUCE: { + /* Since a SHIFT is inherient after a prior REDUCE, convert any + ** SHIFTREDUCE action with a nonterminal on the LHS into a simple + ** REDUCE action: */ + if( ap->sp->index>=lemp->nterminal + && (lemp->errsym==0 || ap->sp->index!=lemp->errsym->index) + ){ + act = lemp->minReduce + ap->x.rp->iRule; + }else{ + act = lemp->minShiftReduce + ap->x.rp->iRule; + } + break; + } + case REDUCE: act = lemp->minReduce + ap->x.rp->iRule; break; + case ERROR: act = lemp->errAction; break; + case ACCEPT: act = lemp->accAction; break; + default: act = -1; break; + } + return act; +} + +#define LINESIZE 1000 +/* The next cluster of routines are for reading the template file +** and writing the results to the generated parser */ +/* The first function transfers data from "in" to "out" until +** a line is seen which begins with "%%". The line number is +** tracked. +** +** if name!=0, then any word that begin with "Parse" is changed to +** begin with *name instead. +*/ +PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) +{ + int i, iStart; + char line[LINESIZE]; + while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ + (*lineno)++; + iStart = 0; + if( name ){ + for(i=0; line[i]; i++){ + if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0 + && (i==0 || !ISALPHA(line[i-1])) + ){ + if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); + fprintf(out,"%s",name); + i += 4; + iStart = i+1; + } + } + } + fprintf(out,"%s",&line[iStart]); + } +} + +/* Skip forward past the header of the template file to the first "%%" +*/ +PRIVATE void tplt_skip_header(FILE *in, int *lineno) +{ + char line[LINESIZE]; + while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ + (*lineno)++; + } +} + +/* The next function finds the template file and opens it, returning +** a pointer to the opened file. */ +PRIVATE FILE *tplt_open(struct lemon *lemp) +{ + static char templatename[] = "lempar.c"; + char buf[1000]; + FILE *in; + char *tpltname; + char *toFree = 0; + char *cp; + + /* first, see if user specified a template filename on the command line. */ + if (user_templatename != 0) { + if( access(user_templatename,004)==-1 ){ + fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", + user_templatename); + lemp->errorcnt++; + return 0; + } + in = fopen(user_templatename,"rb"); + if( in==0 ){ + fprintf(stderr,"Can't open the template file \"%s\".\n", + user_templatename); + lemp->errorcnt++; + return 0; + } + return in; + } + + cp = strrchr(lemp->filename,'.'); + if( cp ){ + lemon_sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); + }else{ + lemon_sprintf(buf,"%s.lt",lemp->filename); + } + if( access(buf,004)==0 ){ + tpltname = buf; + }else if( access(templatename,004)==0 ){ + tpltname = templatename; + }else{ + toFree = tpltname = pathsearch(lemp->argv0,templatename,0); + } + if( tpltname==0 ){ + fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", + templatename); + lemp->errorcnt++; + return 0; + } + in = fopen(tpltname,"rb"); + if( in==0 ){ + fprintf(stderr,"Can't open the template file \"%s\".\n",tpltname); + lemp->errorcnt++; + } + free(toFree); + return in; +} + +/* Print a #line directive line to the output file. */ +PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) +{ + fprintf(out,"#line %d \"",lineno); + while( *filename ){ + if( *filename == '\\' ) putc('\\',out); + putc(*filename,out); + filename++; + } + fprintf(out,"\"\n"); +} + +/* Print a string to the file and keep the linenumber up to date */ +PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) +{ + if( str==0 ) return; + while( *str ){ + putc(*str,out); + if( *str=='\n' ) (*lineno)++; + str++; + } + if( str[-1]!='\n' ){ + putc('\n',out); + (*lineno)++; + } + if (!lemp->nolinenosflag) { + (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); + } + return; +} + +/* +** The following routine emits code for the destructor for the +** symbol sp +*/ +void emit_destructor_code( + FILE *out, + struct symbol *sp, + struct lemon *lemp, + int *lineno +){ + char *cp = 0; + + if( sp->type==TERMINAL ){ + cp = lemp->tokendest; + if( cp==0 ) return; + fprintf(out,"{\n"); (*lineno)++; + }else if( sp->destructor ){ + cp = sp->destructor; + fprintf(out,"{\n"); (*lineno)++; + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,sp->destLineno,lemp->filename); + } + }else if( lemp->vardest ){ + cp = lemp->vardest; + if( cp==0 ) return; + fprintf(out,"{\n"); (*lineno)++; + }else{ + assert( 0 ); /* Cannot happen */ + } + for(; *cp; cp++){ + if( *cp=='$' && cp[1]=='$' ){ + fprintf(out,"(yypminor->yy%d)",sp->dtnum); + cp++; + continue; + } + if( *cp=='\n' ) (*lineno)++; + fputc(*cp,out); + } + fprintf(out,"\n"); (*lineno)++; + if (!lemp->nolinenosflag) { + (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); + } + fprintf(out,"}\n"); (*lineno)++; + return; +} + +/* +** Return TRUE (non-zero) if the given symbol has a destructor. +*/ +int has_destructor(struct symbol *sp, struct lemon *lemp) +{ + int ret; + if( sp->type==TERMINAL ){ + ret = lemp->tokendest!=0; + }else{ + ret = lemp->vardest!=0 || sp->destructor!=0; + } + return ret; +} + +/* +** Append text to a dynamically allocated string. If zText is 0 then +** reset the string to be empty again. Always return the complete text +** of the string (which is overwritten with each call). +** +** n bytes of zText are stored. If n==0 then all of zText up to the first +** \000 terminator is stored. zText can contain up to two instances of +** %d. The values of p1 and p2 are written into the first and second +** %d. +** +** If n==-1, then the previous character is overwritten. +*/ +PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ + static char empty[1] = { 0 }; + static char *z = 0; + static int alloced = 0; + static int used = 0; + int c; + char zInt[40]; + if( zText==0 ){ + if( used==0 && z!=0 ) z[0] = 0; + used = 0; + return z; + } + if( n<=0 ){ + if( n<0 ){ + used += n; + assert( used>=0 ); + } + n = lemonStrlen(zText); + } + if( (int) (n+sizeof(zInt)*2+used) >= alloced ){ + alloced = n + sizeof(zInt)*2 + used + 200; + z = (char *) realloc(z, alloced); + } + if( z==0 ) return empty; + while( n-- > 0 ){ + c = *(zText++); + if( c=='%' && n>0 && zText[0]=='d' ){ + lemon_sprintf(zInt, "%d", p1); + p1 = p2; + lemon_strcpy(&z[used], zInt); + used += lemonStrlen(&z[used]); + zText++; + n--; + }else{ + z[used++] = (char)c; + } + } + z[used] = 0; + return z; +} + +/* +** Write and transform the rp->code string so that symbols are expanded. +** Populate the rp->codePrefix and rp->codeSuffix strings, as appropriate. +** +** Return 1 if the expanded code requires that "yylhsminor" local variable +** to be defined. +*/ +PRIVATE int translate_code(struct lemon *lemp, struct rule *rp){ + char *cp, *xp; + int i; + int rc = 0; /* True if yylhsminor is used */ + int dontUseRhs0 = 0; /* If true, use of left-most RHS label is illegal */ + const char *zSkip = 0; /* The zOvwrt comment within rp->code, or NULL */ + char lhsused = 0; /* True if the LHS element has been used */ + char lhsdirect; /* True if LHS writes directly into stack */ + char used[MAXRHS]; /* True for each RHS element which is used */ + char zLhs[50]; /* Convert the LHS symbol into this string */ + char zOvwrt[900]; /* Comment that to allow LHS to overwrite RHS */ + + for(i=0; inrhs; i++) used[i] = 0; + lhsused = 0; + + if( rp->code==0 ){ + static char newlinestr[2] = { '\n', '\0' }; + rp->code = newlinestr; + rp->line = rp->ruleline; + rp->noCode = 1; + }else{ + rp->noCode = 0; + } + + + if( rp->nrhs==0 ){ + /* If there are no RHS symbols, then writing directly to the LHS is ok */ + lhsdirect = 1; + }else if( rp->rhsalias[0]==0 ){ + /* The left-most RHS symbol has no value. LHS direct is ok. But + ** we have to call the destructor on the RHS symbol first. */ + lhsdirect = 1; + if( has_destructor(rp->rhs[0],lemp) ){ + append_str(0,0,0,0); + append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, + rp->rhs[0]->index,1-rp->nrhs); + rp->codePrefix = Strsafe(append_str(0,0,0,0)); + rp->noCode = 0; + } + }else if( rp->lhsalias==0 ){ + /* There is no LHS value symbol. */ + lhsdirect = 1; + }else if( strcmp(rp->lhsalias,rp->rhsalias[0])==0 ){ + /* The LHS symbol and the left-most RHS symbol are the same, so + ** direct writing is allowed */ + lhsdirect = 1; + lhsused = 1; + used[0] = 1; + if( rp->lhs->dtnum!=rp->rhs[0]->dtnum ){ + ErrorMsg(lemp->filename,rp->ruleline, + "%s(%s) and %s(%s) share the same label but have " + "different datatypes.", + rp->lhs->name, rp->lhsalias, rp->rhs[0]->name, rp->rhsalias[0]); + lemp->errorcnt++; + } + }else{ + lemon_sprintf(zOvwrt, "/*%s-overwrites-%s*/", + rp->lhsalias, rp->rhsalias[0]); + zSkip = strstr(rp->code, zOvwrt); + if( zSkip!=0 ){ + /* The code contains a special comment that indicates that it is safe + ** for the LHS label to overwrite left-most RHS label. */ + lhsdirect = 1; + }else{ + lhsdirect = 0; + } + } + if( lhsdirect ){ + sprintf(zLhs, "yymsp[%d].minor.yy%d",1-rp->nrhs,rp->lhs->dtnum); + }else{ + rc = 1; + sprintf(zLhs, "yylhsminor.yy%d",rp->lhs->dtnum); + } + + append_str(0,0,0,0); + + /* This const cast is wrong but harmless, if we're careful. */ + for(cp=(char *)rp->code; *cp; cp++){ + if( cp==zSkip ){ + append_str(zOvwrt,0,0,0); + cp += lemonStrlen(zOvwrt)-1; + dontUseRhs0 = 1; + continue; + } + if( ISALPHA(*cp) && (cp==rp->code || (!ISALNUM(cp[-1]) && cp[-1]!='_')) ){ + char saved; + for(xp= &cp[1]; ISALNUM(*xp) || *xp=='_'; xp++); + saved = *xp; + *xp = 0; + if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ + append_str(zLhs,0,0,0); + cp = xp; + lhsused = 1; + }else{ + for(i=0; inrhs; i++){ + if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ + if( i==0 && dontUseRhs0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s used after '%s'.", + rp->rhsalias[0], zOvwrt); + lemp->errorcnt++; + }else if( cp!=rp->code && cp[-1]=='@' ){ + /* If the argument is of the form @X then substituted + ** the token number of X, not the value of X */ + append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0); + }else{ + struct symbol *sp = rp->rhs[i]; + int dtnum; + if( sp->type==MULTITERMINAL ){ + dtnum = sp->subsym[0]->dtnum; + }else{ + dtnum = sp->dtnum; + } + append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum); + } + cp = xp; + used[i] = 1; + break; + } + } + } + *xp = saved; + } + append_str(cp, 1, 0, 0); + } /* End loop */ + + /* Main code generation completed */ + cp = append_str(0,0,0,0); + if( cp && cp[0] ) rp->code = Strsafe(cp); + append_str(0,0,0,0); + + /* Check to make sure the LHS has been used */ + if( rp->lhsalias && !lhsused ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label \"%s\" for \"%s(%s)\" is never used.", + rp->lhsalias,rp->lhs->name,rp->lhsalias); + lemp->errorcnt++; + } + + /* Generate destructor code for RHS minor values which are not referenced. + ** Generate error messages for unused labels and duplicate labels. + */ + for(i=0; inrhs; i++){ + if( rp->rhsalias[i] ){ + if( i>0 ){ + int j; + if( rp->lhsalias && strcmp(rp->lhsalias,rp->rhsalias[i])==0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "%s(%s) has the same label as the LHS but is not the left-most " + "symbol on the RHS.", + rp->rhs[i]->name, rp->rhsalias[i]); + lemp->errorcnt++; + } + for(j=0; jrhsalias[j] && strcmp(rp->rhsalias[j],rp->rhsalias[i])==0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s used for multiple symbols on the RHS of a rule.", + rp->rhsalias[i]); + lemp->errorcnt++; + break; + } + } + } + if( !used[i] ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s for \"%s(%s)\" is never used.", + rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); + lemp->errorcnt++; + } + }else if( i>0 && has_destructor(rp->rhs[i],lemp) ){ + append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, + rp->rhs[i]->index,i-rp->nrhs+1); + } + } + + /* If unable to write LHS values directly into the stack, write the + ** saved LHS value now. */ + if( lhsdirect==0 ){ + append_str(" yymsp[%d].minor.yy%d = ", 0, 1-rp->nrhs, rp->lhs->dtnum); + append_str(zLhs, 0, 0, 0); + append_str(";\n", 0, 0, 0); + } + + /* Suffix code generation complete */ + cp = append_str(0,0,0,0); + if( cp && cp[0] ){ + rp->codeSuffix = Strsafe(cp); + rp->noCode = 0; + } + + return rc; +} + +/* +** Generate code which executes when the rule "rp" is reduced. Write +** the code to "out". Make sure lineno stays up-to-date. +*/ +PRIVATE void emit_code( + FILE *out, + struct rule *rp, + struct lemon *lemp, + int *lineno +){ + const char *cp; + + /* Setup code prior to the #line directive */ + if( rp->codePrefix && rp->codePrefix[0] ){ + fprintf(out, "{%s", rp->codePrefix); + for(cp=rp->codePrefix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + } + + /* Generate code to do the reduce action */ + if( rp->code ){ + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,rp->line,lemp->filename); + } + fprintf(out,"{%s",rp->code); + for(cp=rp->code; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + fprintf(out,"}\n"); (*lineno)++; + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,*lineno,lemp->outname); + } + } + + /* Generate breakdown code that occurs after the #line directive */ + if( rp->codeSuffix && rp->codeSuffix[0] ){ + fprintf(out, "%s", rp->codeSuffix); + for(cp=rp->codeSuffix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + } + + if( rp->codePrefix ){ + fprintf(out, "}\n"); (*lineno)++; + } + + return; +} + +/* +** Print the definition of the union used for the parser's data stack. +** This union contains fields for every possible data type for tokens +** and nonterminals. In the process of computing and printing this +** union, also set the ".dtnum" field of every terminal and nonterminal +** symbol. +*/ +void print_stack_union( + FILE *out, /* The output stream */ + struct lemon *lemp, /* The main info structure for this parser */ + int *plineno, /* Pointer to the line number */ + int mhflag /* True if generating makeheaders output */ +){ + int lineno; /* The line number of the output */ + char **types; /* A hash table of datatypes */ + int arraysize; /* Size of the "types" array */ + int maxdtlength; /* Maximum length of any ".datatype" field. */ + char *stddt; /* Standardized name for a datatype */ + int i,j; /* Loop counters */ + unsigned hash; /* For hashing the name of a type */ + const char *name; /* Name of the parser */ + + /* Allocate and initialize types[] and allocate stddt[] */ + arraysize = lemp->nsymbol * 2; + types = (char**)calloc( arraysize, sizeof(char*) ); + if( types==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + for(i=0; ivartype ){ + maxdtlength = lemonStrlen(lemp->vartype); + } + for(i=0; insymbol; i++){ + int len; + struct symbol *sp = lemp->symbols[i]; + if( sp->datatype==0 ) continue; + len = lemonStrlen(sp->datatype); + if( len>maxdtlength ) maxdtlength = len; + } + stddt = (char*)malloc( maxdtlength*2 + 1 ); + if( stddt==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + + /* Build a hash table of datatypes. The ".dtnum" field of each symbol + ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is + ** used for terminal symbols. If there is no %default_type defined then + ** 0 is also used as the .dtnum value for nonterminals which do not specify + ** a datatype using the %type directive. + */ + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + char *cp; + if( sp==lemp->errsym ){ + sp->dtnum = arraysize+1; + continue; + } + if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ + sp->dtnum = 0; + continue; + } + cp = sp->datatype; + if( cp==0 ) cp = lemp->vartype; + j = 0; + while( ISSPACE(*cp) ) cp++; + while( *cp ) stddt[j++] = *cp++; + while( j>0 && ISSPACE(stddt[j-1]) ) j--; + stddt[j] = 0; + if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){ + sp->dtnum = 0; + continue; + } + hash = 0; + for(j=0; stddt[j]; j++){ + hash = hash*53 + stddt[j]; + } + hash = (hash & 0x7fffffff)%arraysize; + while( types[hash] ){ + if( strcmp(types[hash],stddt)==0 ){ + sp->dtnum = hash + 1; + break; + } + hash++; + if( hash>=(unsigned)arraysize ) hash = 0; + } + if( types[hash]==0 ){ + sp->dtnum = hash + 1; + types[hash] = (char*)malloc( lemonStrlen(stddt)+1 ); + if( types[hash]==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + lemon_strcpy(types[hash],stddt); + } + } + + /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ + name = lemp->name ? lemp->name : "Parse"; + lineno = *plineno; + if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; } + fprintf(out,"#define %sTOKENTYPE %s\n",name, + lemp->tokentype?lemp->tokentype:"void*"); lineno++; + if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } + fprintf(out,"typedef union {\n"); lineno++; + fprintf(out," int yyinit;\n"); lineno++; + fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; + for(i=0; ierrsym && lemp->errsym->useCnt ){ + fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; + } + free(stddt); + free(types); + fprintf(out,"} YYMINORTYPE;\n"); lineno++; + *plineno = lineno; +} + +/* +** Return the name of a C datatype able to represent values between +** lwr and upr, inclusive. If pnByte!=NULL then also write the sizeof +** for that type (1, 2, or 4) into *pnByte. +*/ +static const char *minimum_size_type(int lwr, int upr, int *pnByte){ + const char *zType = "int"; + int nByte = 4; + if( lwr>=0 ){ + if( upr<=255 ){ + zType = "unsigned char"; + nByte = 1; + }else if( upr<65535 ){ + zType = "unsigned short int"; + nByte = 2; + }else{ + zType = "unsigned int"; + nByte = 4; + } + }else if( lwr>=-127 && upr<=127 ){ + zType = "signed char"; + nByte = 1; + }else if( lwr>=-32767 && upr<32767 ){ + zType = "short"; + nByte = 2; + } + if( pnByte ) *pnByte = nByte; + return zType; +} + +/* +** Each state contains a set of token transaction and a set of +** nonterminal transactions. Each of these sets makes an instance +** of the following structure. An array of these structures is used +** to order the creation of entries in the yy_action[] table. +*/ +struct axset { + struct state *stp; /* A pointer to a state */ + int isTkn; /* True to use tokens. False for non-terminals */ + int nAction; /* Number of actions */ + int iOrder; /* Original order of action sets */ +}; + +/* +** Compare to axset structures for sorting purposes +*/ +static int axset_compare(const void *a, const void *b){ + struct axset *p1 = (struct axset*)a; + struct axset *p2 = (struct axset*)b; + int c; + c = p2->nAction - p1->nAction; + if( c==0 ){ + c = p1->iOrder - p2->iOrder; + } + assert( c!=0 || p1==p2 ); + return c; +} + +/* +** Write text on "out" that describes the rule "rp". +*/ +static void writeRuleText(FILE *out, struct rule *rp){ + int j; + fprintf(out,"%s ::=", rp->lhs->name); + for(j=0; jnrhs; j++){ + struct symbol *sp = rp->rhs[j]; + if( sp->type!=MULTITERMINAL ){ + fprintf(out," %s", sp->name); + }else{ + int k; + fprintf(out," %s", sp->subsym[0]->name); + for(k=1; knsubsym; k++){ + fprintf(out,"|%s",sp->subsym[k]->name); + } + } + } +} + + +/* Generate C source code for the parser */ +void ReportTable( + struct lemon *lemp, + int mhflag, /* Output in makeheaders format if true */ + int sqlFlag /* Generate the *.sql file too */ +){ + FILE *out, *in, *sql; + int lineno; + struct state *stp; + struct action *ap; + struct rule *rp; + struct acttab *pActtab; + int i, j, n, sz; + int nLookAhead; + int szActionType; /* sizeof(YYACTIONTYPE) */ + int szCodeType; /* sizeof(YYCODETYPE) */ + const char *name; + int mnTknOfst, mxTknOfst; + int mnNtOfst, mxNtOfst; + struct axset *ax; + char *prefix; + + lemp->minShiftReduce = lemp->nstate; + lemp->errAction = lemp->minShiftReduce + lemp->nrule; + lemp->accAction = lemp->errAction + 1; + lemp->noAction = lemp->accAction + 1; + lemp->minReduce = lemp->noAction + 1; + lemp->maxAction = lemp->minReduce + lemp->nrule; + + in = tplt_open(lemp); + if( in==0 ) return; + out = file_open(lemp,".c","wb"); + if( out==0 ){ + fclose(in); + return; + } + if( sqlFlag==0 ){ + sql = 0; + }else{ + sql = file_open(lemp, ".sql", "wb"); + if( sql==0 ){ + fclose(in); + fclose(out); + return; + } + fprintf(sql, + "BEGIN;\n" + "CREATE TABLE symbol(\n" + " id INTEGER PRIMARY KEY,\n" + " name TEXT NOT NULL,\n" + " isTerminal BOOLEAN NOT NULL,\n" + " fallback INTEGER REFERENCES symbol" + " DEFERRABLE INITIALLY DEFERRED\n" + ");\n" + ); + for(i=0; insymbol; i++){ + fprintf(sql, + "INSERT INTO symbol(id,name,isTerminal,fallback)" + "VALUES(%d,'%s',%s", + i, lemp->symbols[i]->name, + interminal ? "TRUE" : "FALSE" + ); + if( lemp->symbols[i]->fallback ){ + fprintf(sql, ",%d);\n", lemp->symbols[i]->fallback->index); + }else{ + fprintf(sql, ",NULL);\n"); + } + } + fprintf(sql, + "CREATE TABLE rule(\n" + " ruleid INTEGER PRIMARY KEY,\n" + " lhs INTEGER REFERENCES symbol(id),\n" + " txt TEXT\n" + ");\n" + "CREATE TABLE rulerhs(\n" + " ruleid INTEGER REFERENCES rule(ruleid),\n" + " pos INTEGER,\n" + " sym INTEGER REFERENCES symbol(id)\n" + ");\n" + ); + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + assert( i==rp->iRule ); + fprintf(sql, + "INSERT INTO rule(ruleid,lhs,txt)VALUES(%d,%d,'", + rp->iRule, rp->lhs->index + ); + writeRuleText(sql, rp); + fprintf(sql,"');\n"); + for(j=0; jnrhs; j++){ + struct symbol *sp = rp->rhs[j]; + if( sp->type!=MULTITERMINAL ){ + fprintf(sql, + "INSERT INTO rulerhs(ruleid,pos,sym)VALUES(%d,%d,%d);\n", + i,j,sp->index + ); + }else{ + int k; + for(k=0; knsubsym; k++){ + fprintf(sql, + "INSERT INTO rulerhs(ruleid,pos,sym)VALUES(%d,%d,%d);\n", + i,j,sp->subsym[k]->index + ); + } + } + } + } + fprintf(sql, "COMMIT;\n"); + } + lineno = 1; + + fprintf(out, + "/* This file is automatically generated by Lemon from input grammar\n" + "** source file \"%s\". */\n", lemp->filename); lineno += 2; + + /* The first %include directive begins with a C-language comment, + ** then skip over the header comment of the template file + */ + if( lemp->include==0 ) lemp->include = ""; + for(i=0; ISSPACE(lemp->include[i]); i++){ + if( lemp->include[i]=='\n' ){ + lemp->include += i+1; + i = -1; + } + } + if( lemp->include[0]=='/' ){ + tplt_skip_header(in,&lineno); + }else{ + tplt_xfer(lemp->name,in,out,&lineno); + } + + /* Generate the include code, if any */ + tplt_print(out,lemp,lemp->include,&lineno); + if( mhflag ){ + char *incName = file_makename(lemp, ".h"); + fprintf(out,"#include \"%s\"\n", incName); lineno++; + free(incName); + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate #defines for all tokens */ + if( lemp->tokenprefix ) prefix = lemp->tokenprefix; + else prefix = ""; + if( mhflag ){ + fprintf(out,"#if INTERFACE\n"); lineno++; + }else{ + fprintf(out,"#ifndef %s%s\n", prefix, lemp->symbols[1]->name); + } + for(i=1; interminal; i++){ + fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); + lineno++; + } + fprintf(out,"#endif\n"); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the defines */ + fprintf(out,"#define YYCODETYPE %s\n", + minimum_size_type(0, lemp->nsymbol, &szCodeType)); lineno++; + fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol); lineno++; + fprintf(out,"#define YYACTIONTYPE %s\n", + minimum_size_type(0,lemp->maxAction,&szActionType)); lineno++; + if( lemp->wildcard ){ + fprintf(out,"#define YYWILDCARD %d\n", + lemp->wildcard->index); lineno++; + } + print_stack_union(out,lemp,&lineno,mhflag); + fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++; + if( lemp->stacksize ){ + fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++; + }else{ + fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++; + } + fprintf(out, "#endif\n"); lineno++; + if( mhflag ){ + fprintf(out,"#if INTERFACE\n"); lineno++; + } + name = lemp->name ? lemp->name : "Parse"; + if( lemp->arg && lemp->arg[0] ){ + i = lemonStrlen(lemp->arg); + while( i>=1 && ISSPACE(lemp->arg[i-1]) ) i--; + while( i>=1 && (ISALNUM(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; + fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; + fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; + fprintf(out,"#define %sARG_PARAM ,%s\n",name,&lemp->arg[i]); lineno++; + fprintf(out,"#define %sARG_FETCH %s=yypParser->%s;\n", + name,lemp->arg,&lemp->arg[i]); lineno++; + fprintf(out,"#define %sARG_STORE yypParser->%s=%s;\n", + name,&lemp->arg[i],&lemp->arg[i]); lineno++; + }else{ + fprintf(out,"#define %sARG_SDECL\n",name); lineno++; + fprintf(out,"#define %sARG_PDECL\n",name); lineno++; + fprintf(out,"#define %sARG_PARAM\n",name); lineno++; + fprintf(out,"#define %sARG_FETCH\n",name); lineno++; + fprintf(out,"#define %sARG_STORE\n",name); lineno++; + } + if( lemp->ctx && lemp->ctx[0] ){ + i = lemonStrlen(lemp->ctx); + while( i>=1 && ISSPACE(lemp->ctx[i-1]) ) i--; + while( i>=1 && (ISALNUM(lemp->ctx[i-1]) || lemp->ctx[i-1]=='_') ) i--; + fprintf(out,"#define %sCTX_SDECL %s;\n",name,lemp->ctx); lineno++; + fprintf(out,"#define %sCTX_PDECL ,%s\n",name,lemp->ctx); lineno++; + fprintf(out,"#define %sCTX_PARAM ,%s\n",name,&lemp->ctx[i]); lineno++; + fprintf(out,"#define %sCTX_FETCH %s=yypParser->%s;\n", + name,lemp->ctx,&lemp->ctx[i]); lineno++; + fprintf(out,"#define %sCTX_STORE yypParser->%s=%s;\n", + name,&lemp->ctx[i],&lemp->ctx[i]); lineno++; + }else{ + fprintf(out,"#define %sCTX_SDECL\n",name); lineno++; + fprintf(out,"#define %sCTX_PDECL\n",name); lineno++; + fprintf(out,"#define %sCTX_PARAM\n",name); lineno++; + fprintf(out,"#define %sCTX_FETCH\n",name); lineno++; + fprintf(out,"#define %sCTX_STORE\n",name); lineno++; + } + if( mhflag ){ + fprintf(out,"#endif\n"); lineno++; + } + if( lemp->errsym && lemp->errsym->useCnt ){ + fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; + fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; + } + if( lemp->has_fallback ){ + fprintf(out,"#define YYFALLBACK 1\n"); lineno++; + } + + /* Compute the action table, but do not output it yet. The action + ** table must be computed before generating the YYNSTATE macro because + ** we need to know how many states can be eliminated. + */ + ax = (struct axset *) calloc(lemp->nxstate*2, sizeof(ax[0])); + if( ax==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + for(i=0; inxstate; i++){ + stp = lemp->sorted[i]; + ax[i*2].stp = stp; + ax[i*2].isTkn = 1; + ax[i*2].nAction = stp->nTknAct; + ax[i*2+1].stp = stp; + ax[i*2+1].isTkn = 0; + ax[i*2+1].nAction = stp->nNtAct; + } + mxTknOfst = mnTknOfst = 0; + mxNtOfst = mnNtOfst = 0; + /* In an effort to minimize the action table size, use the heuristic + ** of placing the largest action sets first */ + for(i=0; inxstate*2; i++) ax[i].iOrder = i; + qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); + pActtab = acttab_alloc(lemp->nsymbol, lemp->nterminal); + for(i=0; inxstate*2 && ax[i].nAction>0; i++){ + stp = ax[i].stp; + if( ax[i].isTkn ){ + for(ap=stp->ap; ap; ap=ap->next){ + int action; + if( ap->sp->index>=lemp->nterminal ) continue; + action = compute_action(lemp, ap); + if( action<0 ) continue; + acttab_action(pActtab, ap->sp->index, action); + } + stp->iTknOfst = acttab_insert(pActtab, 1); + if( stp->iTknOfstiTknOfst; + if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; + }else{ + for(ap=stp->ap; ap; ap=ap->next){ + int action; + if( ap->sp->indexnterminal ) continue; + if( ap->sp->index==lemp->nsymbol ) continue; + action = compute_action(lemp, ap); + if( action<0 ) continue; + acttab_action(pActtab, ap->sp->index, action); + } + stp->iNtOfst = acttab_insert(pActtab, 0); + if( stp->iNtOfstiNtOfst; + if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; + } +#if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ + { int jj, nn; + for(jj=nn=0; jjnAction; jj++){ + if( pActtab->aAction[jj].action<0 ) nn++; + } + printf("%4d: State %3d %s n: %2d size: %5d freespace: %d\n", + i, stp->statenum, ax[i].isTkn ? "Token" : "Var ", + ax[i].nAction, pActtab->nAction, nn); + } +#endif + } + free(ax); + + /* Mark rules that are actually used for reduce actions after all + ** optimizations have been applied + */ + for(rp=lemp->rule; rp; rp=rp->next) rp->doesReduce = LEMON_FALSE; + for(i=0; inxstate; i++){ + for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ + if( ap->type==REDUCE || ap->type==SHIFTREDUCE ){ + ap->x.rp->doesReduce = 1; + } + } + } + + /* Finish rendering the constants now that the action table has + ** been computed */ + fprintf(out,"#define YYNSTATE %d\n",lemp->nxstate); lineno++; + fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; + fprintf(out,"#define YYNRULE_WITH_ACTION %d\n",lemp->nruleWithAction); + lineno++; + fprintf(out,"#define YYNTOKEN %d\n",lemp->nterminal); lineno++; + fprintf(out,"#define YY_MAX_SHIFT %d\n",lemp->nxstate-1); lineno++; + i = lemp->minShiftReduce; + fprintf(out,"#define YY_MIN_SHIFTREDUCE %d\n",i); lineno++; + i += lemp->nrule; + fprintf(out,"#define YY_MAX_SHIFTREDUCE %d\n", i-1); lineno++; + fprintf(out,"#define YY_ERROR_ACTION %d\n", lemp->errAction); lineno++; + fprintf(out,"#define YY_ACCEPT_ACTION %d\n", lemp->accAction); lineno++; + fprintf(out,"#define YY_NO_ACTION %d\n", lemp->noAction); lineno++; + fprintf(out,"#define YY_MIN_REDUCE %d\n", lemp->minReduce); lineno++; + i = lemp->minReduce + lemp->nrule; + fprintf(out,"#define YY_MAX_REDUCE %d\n", i-1); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Now output the action table and its associates: + ** + ** yy_action[] A single table containing all actions. + ** yy_lookahead[] A table containing the lookahead for each entry in + ** yy_action. Used to detect hash collisions. + ** yy_shift_ofst[] For each state, the offset into yy_action for + ** shifting terminals. + ** yy_reduce_ofst[] For each state, the offset into yy_action for + ** shifting non-terminals after a reduce. + ** yy_default[] Default action for each state. + */ + + /* Output the yy_action table */ + lemp->nactiontab = n = acttab_action_size(pActtab); + lemp->tablesize += n*szActionType; + fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; + fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; + for(i=j=0; inoAction; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", action); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_lookahead table */ + lemp->nlookaheadtab = n = acttab_lookahead_size(pActtab); + lemp->tablesize += n*szCodeType; + fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; + for(i=j=0; insymbol; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", la); + if( j==9 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + /* Add extra entries to the end of the yy_lookahead[] table so that + ** yy_shift_ofst[]+iToken will always be a valid index into the array, + ** even for the largest possible value of yy_shift_ofst[] and iToken. */ + nLookAhead = lemp->nterminal + lemp->nactiontab; + while( interminal); + if( j==9 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + i++; + } + if( j>0 ){ fprintf(out, "\n"); lineno++; } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_shift_ofst[] table */ + n = lemp->nxstate; + while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; + fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; + fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; + fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; + fprintf(out, "static const %s yy_shift_ofst[] = {\n", + minimum_size_type(mnTknOfst, lemp->nterminal+lemp->nactiontab, &sz)); + lineno++; + lemp->tablesize += n*sz; + for(i=j=0; isorted[i]; + ofst = stp->iTknOfst; + if( ofst==NO_OFFSET ) ofst = lemp->nactiontab; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", ofst); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_reduce_ofst[] table */ + n = lemp->nxstate; + while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; + fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; + fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; + fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; + fprintf(out, "static const %s yy_reduce_ofst[] = {\n", + minimum_size_type(mnNtOfst-1, mxNtOfst, &sz)); lineno++; + lemp->tablesize += n*sz; + for(i=j=0; isorted[i]; + ofst = stp->iNtOfst; + if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", ofst); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the default action table */ + fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; + n = lemp->nxstate; + lemp->tablesize += n*szActionType; + for(i=j=0; isorted[i]; + if( j==0 ) fprintf(out," /* %5d */ ", i); + if( stp->iDfltReduce<0 ){ + fprintf(out, " %4d,", lemp->errAction); + }else{ + fprintf(out, " %4d,", stp->iDfltReduce + lemp->minReduce); + } + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the table of fallback tokens. + */ + if( lemp->has_fallback ){ + int mx = lemp->nterminal - 1; + /* 2019-08-28: Generate fallback entries for every token to avoid + ** having to do a range check on the index */ + /* while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } */ + lemp->tablesize += (mx+1)*szCodeType; + for(i=0; i<=mx; i++){ + struct symbol *p = lemp->symbols[i]; + if( p->fallback==0 ){ + fprintf(out, " 0, /* %10s => nothing */\n", p->name); + }else{ + fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, + p->name, p->fallback->name); + } + lineno++; + } + } + tplt_xfer(lemp->name, in, out, &lineno); + + /* Generate a table containing the symbolic name of every symbol + */ + for(i=0; insymbol; i++){ + fprintf(out," /* %4d */ \"%s\",\n",i, lemp->symbols[i]->name); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate a table containing a text string that describes every + ** rule in the rule set of the grammar. This information is used + ** when tracing REDUCE actions. + */ + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + assert( rp->iRule==i ); + fprintf(out," /* %3d */ \"", i); + writeRuleText(out, rp); + fprintf(out,"\",\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes every time a symbol is popped from + ** the stack while processing errors or while destroying the parser. + ** (In other words, generate the %destructor actions) + */ + if( lemp->tokendest ){ + int once = 1; + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type!=TERMINAL ) continue; + if( once ){ + fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + } + for(i=0; insymbol && lemp->symbols[i]->type!=TERMINAL; i++); + if( insymbol ){ + emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); + fprintf(out," break;\n"); lineno++; + } + } + if( lemp->vardest ){ + struct symbol *dflt_sp = 0; + int once = 1; + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type==TERMINAL || + sp->index<=0 || sp->destructor!=0 ) continue; + if( once ){ + fprintf(out, " /* Default NON-TERMINAL Destructor */\n");lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + dflt_sp = sp; + } + if( dflt_sp!=0 ){ + emit_destructor_code(out,dflt_sp,lemp,&lineno); + } + fprintf(out," break;\n"); lineno++; + } + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; + if( sp->destLineno<0 ) continue; /* Already emitted */ + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + + /* Combine duplicate destructors into a single case */ + for(j=i+1; jnsymbol; j++){ + struct symbol *sp2 = lemp->symbols[j]; + if( sp2 && sp2->type!=TERMINAL && sp2->destructor + && sp2->dtnum==sp->dtnum + && strcmp(sp->destructor,sp2->destructor)==0 ){ + fprintf(out," case %d: /* %s */\n", + sp2->index, sp2->name); lineno++; + sp2->destLineno = -1; /* Avoid emitting this destructor again */ + } + } + + emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); + fprintf(out," break;\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes whenever the parser stack overflows */ + tplt_print(out,lemp,lemp->overflow,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the tables of rule information. yyRuleInfoLhs[] and + ** yyRuleInfoNRhs[]. + ** + ** Note: This code depends on the fact that rules are number + ** sequentially beginning with 0. + */ + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + fprintf(out," %4d, /* (%d) ", rp->lhs->index, i); + rule_print(out, rp); + fprintf(out," */\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + fprintf(out," %3d, /* (%d) ", -rp->nrhs, i); + rule_print(out, rp); + fprintf(out," */\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which execution during each REDUCE action */ + i = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + i += translate_code(lemp, rp); + } + if( i ){ + fprintf(out," YYMINORTYPE yylhsminor;\n"); lineno++; + } + /* First output rules other than the default: rule */ + for(rp=lemp->rule; rp; rp=rp->next){ + struct rule *rp2; /* Other rules with the same action */ + if( rp->codeEmitted ) continue; + if( rp->noCode ){ + /* No C code actions, so this will be part of the "default:" rule */ + continue; + } + fprintf(out," case %d: /* ", rp->iRule); + writeRuleText(out, rp); + fprintf(out, " */\n"); lineno++; + for(rp2=rp->next; rp2; rp2=rp2->next){ + if( rp2->code==rp->code && rp2->codePrefix==rp->codePrefix + && rp2->codeSuffix==rp->codeSuffix ){ + fprintf(out," case %d: /* ", rp2->iRule); + writeRuleText(out, rp2); + fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->iRule); lineno++; + rp2->codeEmitted = 1; + } + } + emit_code(out,rp,lemp,&lineno); + fprintf(out," break;\n"); lineno++; + rp->codeEmitted = 1; + } + /* Finally, output the default: rule. We choose as the default: all + ** empty actions. */ + fprintf(out," default:\n"); lineno++; + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->codeEmitted ) continue; + assert( rp->noCode ); + fprintf(out," /* (%d) ", rp->iRule); + writeRuleText(out, rp); + if( rp->neverReduce ){ + fprintf(out, " (NEVER REDUCES) */ assert(yyruleno!=%d);\n", + rp->iRule); lineno++; + }else if( rp->doesReduce ){ + fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->iRule); lineno++; + }else{ + fprintf(out, " (OPTIMIZED OUT) */ assert(yyruleno!=%d);\n", + rp->iRule); lineno++; + } + } + fprintf(out," break;\n"); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes if a parse fails */ + tplt_print(out,lemp,lemp->failure,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes when a syntax error occurs */ + tplt_print(out,lemp,lemp->error,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes when the parser accepts its input */ + tplt_print(out,lemp,lemp->accept,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Append any addition code the user desires */ + tplt_print(out,lemp,lemp->extracode,&lineno); + + acttab_free(pActtab); + fclose(in); + fclose(out); + if( sql ) fclose(sql); + return; +} + +/* Generate a header file for the parser */ +void ReportHeader(struct lemon *lemp) +{ + FILE *out, *in; + const char *prefix; + char line[LINESIZE]; + char pattern[LINESIZE]; + int i; + + if( lemp->tokenprefix ) prefix = lemp->tokenprefix; + else prefix = ""; + in = file_open(lemp,".h","rb"); + if( in ){ + int nextChar; + for(i=1; interminal && fgets(line,LINESIZE,in); i++){ + lemon_sprintf(pattern,"#define %s%-30s %3d\n", + prefix,lemp->symbols[i]->name,i); + if( strcmp(line,pattern) ) break; + } + nextChar = fgetc(in); + fclose(in); + if( i==lemp->nterminal && nextChar==EOF ){ + /* No change in the file. Don't rewrite it. */ + return; + } + } + out = file_open(lemp,".h","wb"); + if( out ){ + for(i=1; interminal; i++){ + fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i); + } + fclose(out); + } + return; +} + +/* Reduce the size of the action tables, if possible, by making use +** of defaults. +** +** In this version, we take the most frequent REDUCE action and make +** it the default. Except, there is no default if the wildcard token +** is a possible look-ahead. +*/ +void CompressTables(struct lemon *lemp) +{ + struct state *stp; + struct action *ap, *ap2, *nextap; + struct rule *rp, *rp2, *rbest; + int nbest, n; + int i; + int usesWildcard; + + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + nbest = 0; + rbest = 0; + usesWildcard = 0; + + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ + usesWildcard = 1; + } + if( ap->type!=REDUCE ) continue; + rp = ap->x.rp; + if( rp->lhsStart ) continue; + if( rp==rbest ) continue; + n = 1; + for(ap2=ap->next; ap2; ap2=ap2->next){ + if( ap2->type!=REDUCE ) continue; + rp2 = ap2->x.rp; + if( rp2==rbest ) continue; + if( rp2==rp ) n++; + } + if( n>nbest ){ + nbest = n; + rbest = rp; + } + } + + /* Do not make a default if the number of rules to default + ** is not at least 1 or if the wildcard token is a possible + ** lookahead. + */ + if( nbest<1 || usesWildcard ) continue; + + + /* Combine matching REDUCE actions into a single default */ + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==REDUCE && ap->x.rp==rbest ) break; + } + assert( ap ); + ap->sp = Symbol_new("{default}"); + for(ap=ap->next; ap; ap=ap->next){ + if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; + } + stp->ap = Action_sort(stp->ap); + + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT ) break; + if( ap->type==REDUCE && ap->x.rp!=rbest ) break; + } + if( ap==0 ){ + stp->autoReduce = 1; + stp->pDfltReduce = rbest; + } + } + + /* Make a second pass over all states and actions. Convert + ** every action that is a SHIFT to an autoReduce state into + ** a SHIFTREDUCE action. + */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(ap=stp->ap; ap; ap=ap->next){ + struct state *pNextState; + if( ap->type!=SHIFT ) continue; + pNextState = ap->x.stp; + if( pNextState->autoReduce && pNextState->pDfltReduce!=0 ){ + ap->type = SHIFTREDUCE; + ap->x.rp = pNextState->pDfltReduce; + } + } + } + + /* If a SHIFTREDUCE action specifies a rule that has a single RHS term + ** (meaning that the SHIFTREDUCE will land back in the state where it + ** started) and if there is no C-code associated with the reduce action, + ** then we can go ahead and convert the action to be the same as the + ** action for the RHS of the rule. + */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(ap=stp->ap; ap; ap=nextap){ + nextap = ap->next; + if( ap->type!=SHIFTREDUCE ) continue; + rp = ap->x.rp; + if( rp->noCode==0 ) continue; + if( rp->nrhs!=1 ) continue; +#if 1 + /* Only apply this optimization to non-terminals. It would be OK to + ** apply it to terminal symbols too, but that makes the parser tables + ** larger. */ + if( ap->sp->indexnterminal ) continue; +#endif + /* If we reach this point, it means the optimization can be applied */ + nextap = ap; + for(ap2=stp->ap; ap2 && (ap2==ap || ap2->sp!=rp->lhs); ap2=ap2->next){} + assert( ap2!=0 ); + ap->spOpt = ap2->sp; + ap->type = ap2->type; + ap->x = ap2->x; + } + } +} + + +/* +** Compare two states for sorting purposes. The smaller state is the +** one with the most non-terminal actions. If they have the same number +** of non-terminal actions, then the smaller is the one with the most +** token actions. +*/ +static int stateResortCompare(const void *a, const void *b){ + const struct state *pA = *(const struct state**)a; + const struct state *pB = *(const struct state**)b; + int n; + + n = pB->nNtAct - pA->nNtAct; + if( n==0 ){ + n = pB->nTknAct - pA->nTknAct; + if( n==0 ){ + n = pB->statenum - pA->statenum; + } + } + assert( n!=0 ); + return n; +} + + +/* +** Renumber and resort states so that states with fewer choices +** occur at the end. Except, keep state 0 as the first state. +*/ +void ResortStates(struct lemon *lemp) +{ + int i; + struct state *stp; + struct action *ap; + + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + stp->nTknAct = stp->nNtAct = 0; + stp->iDfltReduce = -1; /* Init dflt action to "syntax error" */ + stp->iTknOfst = NO_OFFSET; + stp->iNtOfst = NO_OFFSET; + for(ap=stp->ap; ap; ap=ap->next){ + int iAction = compute_action(lemp,ap); + if( iAction>=0 ){ + if( ap->sp->indexnterminal ){ + stp->nTknAct++; + }else if( ap->sp->indexnsymbol ){ + stp->nNtAct++; + }else{ + assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp ); + stp->iDfltReduce = iAction; + } + } + } + } + qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), + stateResortCompare); + for(i=0; instate; i++){ + lemp->sorted[i]->statenum = i; + } + lemp->nxstate = lemp->nstate; + while( lemp->nxstate>1 && lemp->sorted[lemp->nxstate-1]->autoReduce ){ + lemp->nxstate--; + } +} + + +/***************** From the file "set.c" ************************************/ +/* +** Set manipulation routines for the LEMON parser generator. +*/ + +static int size = 0; + +/* Set the set size */ +void SetSize(int n) +{ + size = n+1; +} + +/* Allocate a new set */ +char *SetNew(void){ + char *s; + s = (char*)calloc( size, 1); + if( s==0 ){ + memory_error(); + } + return s; +} + +/* Deallocate a set */ +void SetFree(char *s) +{ + free(s); +} + +/* Add a new element to the set. Return TRUE if the element was added +** and FALSE if it was already there. */ +int SetAdd(char *s, int e) +{ + int rv; + assert( e>=0 && esize = 1024; + x1a->count = 0; + x1a->tbl = (x1node*)calloc(1024, sizeof(x1node) + sizeof(x1node*)); + if( x1a->tbl==0 ){ + free(x1a); + x1a = 0; + }else{ + int i; + x1a->ht = (x1node**)&(x1a->tbl[1024]); + for(i=0; i<1024; i++) x1a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Strsafe_insert(const char *data) +{ + x1node *np; + unsigned h; + unsigned ph; + + if( x1a==0 ) return 0; + ph = strhash(data); + h = ph & (x1a->size-1); + np = x1a->ht[h]; + while( np ){ + if( strcmp(np->data,data)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x1a->count>=x1a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x1 array; + array.size = arrSize = x1a->size*2; + array.count = x1a->count; + array.tbl = (x1node*)calloc(arrSize, sizeof(x1node) + sizeof(x1node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x1node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x1node *oldnp, *newnp; + oldnp = &(x1a->tbl[i]); + h = strhash(oldnp->data) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + /* free(x1a->tbl); // This program was originally for 16-bit machines. + ** Don't worry about freeing memory on modern platforms. */ + *x1a = array; + } + /* Insert the new data */ + h = ph & (x1a->size-1); + np = &(x1a->tbl[x1a->count++]); + np->data = data; + if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); + np->next = x1a->ht[h]; + x1a->ht[h] = np; + np->from = &(x1a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +const char *Strsafe_find(const char *key) +{ + unsigned h; + x1node *np; + + if( x1a==0 ) return 0; + h = strhash(key) & (x1a->size-1); + np = x1a->ht[h]; + while( np ){ + if( strcmp(np->data,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return a pointer to the (terminal or nonterminal) symbol "x". +** Create a new symbol if this is the first time "x" has been seen. +*/ +struct symbol *Symbol_new(const char *x) +{ + struct symbol *sp; + + sp = Symbol_find(x); + if( sp==0 ){ + sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); + MemoryCheck(sp); + sp->name = Strsafe(x); + sp->type = ISUPPER(*x) ? TERMINAL : NONTERMINAL; + sp->rule = 0; + sp->fallback = 0; + sp->prec = -1; + sp->assoc = UNK; + sp->firstset = 0; + sp->lambda = LEMON_FALSE; + sp->destructor = 0; + sp->destLineno = 0; + sp->datatype = 0; + sp->useCnt = 0; + Symbol_insert(sp,sp->name); + } + sp->useCnt++; + return sp; +} + +/* Compare two symbols for sorting purposes. Return negative, +** zero, or positive if a is less then, equal to, or greater +** than b. +** +** Symbols that begin with upper case letters (terminals or tokens) +** must sort before symbols that begin with lower case letters +** (non-terminals). And MULTITERMINAL symbols (created using the +** %token_class directive) must sort at the very end. Other than +** that, the order does not matter. +** +** We find experimentally that leaving the symbols in their original +** order (the order they appeared in the grammar file) gives the +** smallest parser tables in SQLite. +*/ +int Symbolcmpp(const void *_a, const void *_b) +{ + const struct symbol *a = *(const struct symbol **) _a; + const struct symbol *b = *(const struct symbol **) _b; + int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; + int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; + return i1==i2 ? a->index - b->index : i1 - i2; +} + +/* There is one instance of the following structure for each +** associative array of type "x2". +*/ +struct s_x2 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x2node *tbl; /* The data stored here */ + struct s_x2node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x2". +*/ +typedef struct s_x2node { + struct symbol *data; /* The data */ + const char *key; /* The key */ + struct s_x2node *next; /* Next entry with the same hash */ + struct s_x2node **from; /* Previous link */ +} x2node; + +/* There is only one instance of the array, which is the following */ +static struct s_x2 *x2a; + +/* Allocate a new associative array */ +void Symbol_init(void){ + if( x2a ) return; + x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); + if( x2a ){ + x2a->size = 128; + x2a->count = 0; + x2a->tbl = (x2node*)calloc(128, sizeof(x2node) + sizeof(x2node*)); + if( x2a->tbl==0 ){ + free(x2a); + x2a = 0; + }else{ + int i; + x2a->ht = (x2node**)&(x2a->tbl[128]); + for(i=0; i<128; i++) x2a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Symbol_insert(struct symbol *data, const char *key) +{ + x2node *np; + unsigned h; + unsigned ph; + + if( x2a==0 ) return 0; + ph = strhash(key); + h = ph & (x2a->size-1); + np = x2a->ht[h]; + while( np ){ + if( strcmp(np->key,key)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x2a->count>=x2a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x2 array; + array.size = arrSize = x2a->size*2; + array.count = x2a->count; + array.tbl = (x2node*)calloc(arrSize, sizeof(x2node) + sizeof(x2node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x2node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x2node *oldnp, *newnp; + oldnp = &(x2a->tbl[i]); + h = strhash(oldnp->key) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->key = oldnp->key; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + /* free(x2a->tbl); // This program was originally written for 16-bit + ** machines. Don't worry about freeing this trivial amount of memory + ** on modern platforms. Just leak it. */ + *x2a = array; + } + /* Insert the new data */ + h = ph & (x2a->size-1); + np = &(x2a->tbl[x2a->count++]); + np->key = key; + np->data = data; + if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); + np->next = x2a->ht[h]; + x2a->ht[h] = np; + np->from = &(x2a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct symbol *Symbol_find(const char *key) +{ + unsigned h; + x2node *np; + + if( x2a==0 ) return 0; + h = strhash(key) & (x2a->size-1); + np = x2a->ht[h]; + while( np ){ + if( strcmp(np->key,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return the n-th data. Return NULL if n is out of range. */ +struct symbol *Symbol_Nth(int n) +{ + struct symbol *data; + if( x2a && n>0 && n<=x2a->count ){ + data = x2a->tbl[n-1].data; + }else{ + data = 0; + } + return data; +} + +/* Return the size of the array */ +int Symbol_count() +{ + return x2a ? x2a->count : 0; +} + +/* Return an array of pointers to all data in the table. +** The array is obtained from malloc. Return NULL if memory allocation +** problems, or if the array is empty. */ +struct symbol **Symbol_arrayof() +{ + struct symbol **array; + int i,arrSize; + if( x2a==0 ) return 0; + arrSize = x2a->count; + array = (struct symbol **)calloc(arrSize, sizeof(struct symbol *)); + if( array ){ + for(i=0; itbl[i].data; + } + return array; +} + +/* Compare two configurations */ +int Configcmp(const char *_a,const char *_b) +{ + const struct config *a = (struct config *) _a; + const struct config *b = (struct config *) _b; + int x; + x = a->rp->index - b->rp->index; + if( x==0 ) x = a->dot - b->dot; + return x; +} + +/* Compare two states */ +PRIVATE int statecmp(struct config *a, struct config *b) +{ + int rc; + for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ + rc = a->rp->index - b->rp->index; + if( rc==0 ) rc = a->dot - b->dot; + } + if( rc==0 ){ + if( a ) rc = 1; + if( b ) rc = -1; + } + return rc; +} + +/* Hash a state */ +PRIVATE unsigned statehash(struct config *a) +{ + unsigned h=0; + while( a ){ + h = h*571 + a->rp->index*37 + a->dot; + a = a->bp; + } + return h; +} + +/* Allocate a new state structure */ +struct state *State_new() +{ + struct state *newstate; + newstate = (struct state *)calloc(1, sizeof(struct state) ); + MemoryCheck(newstate); + return newstate; +} + +/* There is one instance of the following structure for each +** associative array of type "x3". +*/ +struct s_x3 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x3node *tbl; /* The data stored here */ + struct s_x3node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x3". +*/ +typedef struct s_x3node { + struct state *data; /* The data */ + struct config *key; /* The key */ + struct s_x3node *next; /* Next entry with the same hash */ + struct s_x3node **from; /* Previous link */ +} x3node; + +/* There is only one instance of the array, which is the following */ +static struct s_x3 *x3a; + +/* Allocate a new associative array */ +void State_init(void){ + if( x3a ) return; + x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); + if( x3a ){ + x3a->size = 128; + x3a->count = 0; + x3a->tbl = (x3node*)calloc(128, sizeof(x3node) + sizeof(x3node*)); + if( x3a->tbl==0 ){ + free(x3a); + x3a = 0; + }else{ + int i; + x3a->ht = (x3node**)&(x3a->tbl[128]); + for(i=0; i<128; i++) x3a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int State_insert(struct state *data, struct config *key) +{ + x3node *np; + unsigned h; + unsigned ph; + + if( x3a==0 ) return 0; + ph = statehash(key); + h = ph & (x3a->size-1); + np = x3a->ht[h]; + while( np ){ + if( statecmp(np->key,key)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x3a->count>=x3a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x3 array; + array.size = arrSize = x3a->size*2; + array.count = x3a->count; + array.tbl = (x3node*)calloc(arrSize, sizeof(x3node) + sizeof(x3node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x3node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x3node *oldnp, *newnp; + oldnp = &(x3a->tbl[i]); + h = statehash(oldnp->key) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->key = oldnp->key; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x3a->tbl); + *x3a = array; + } + /* Insert the new data */ + h = ph & (x3a->size-1); + np = &(x3a->tbl[x3a->count++]); + np->key = key; + np->data = data; + if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); + np->next = x3a->ht[h]; + x3a->ht[h] = np; + np->from = &(x3a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct state *State_find(struct config *key) +{ + unsigned h; + x3node *np; + + if( x3a==0 ) return 0; + h = statehash(key) & (x3a->size-1); + np = x3a->ht[h]; + while( np ){ + if( statecmp(np->key,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return an array of pointers to all data in the table. +** The array is obtained from malloc. Return NULL if memory allocation +** problems, or if the array is empty. */ +struct state **State_arrayof(void) +{ + struct state **array; + int i,arrSize; + if( x3a==0 ) return 0; + arrSize = x3a->count; + array = (struct state **)calloc(arrSize, sizeof(struct state *)); + if( array ){ + for(i=0; itbl[i].data; + } + return array; +} + +/* Hash a configuration */ +PRIVATE unsigned confighash(struct config *a) +{ + unsigned h=0; + h = h*571 + a->rp->index*37 + a->dot; + return h; +} + +/* There is one instance of the following structure for each +** associative array of type "x4". +*/ +struct s_x4 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x4node *tbl; /* The data stored here */ + struct s_x4node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x4". +*/ +typedef struct s_x4node { + struct config *data; /* The data */ + struct s_x4node *next; /* Next entry with the same hash */ + struct s_x4node **from; /* Previous link */ +} x4node; + +/* There is only one instance of the array, which is the following */ +static struct s_x4 *x4a; + +/* Allocate a new associative array */ +void Configtable_init(void){ + if( x4a ) return; + x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); + if( x4a ){ + x4a->size = 64; + x4a->count = 0; + x4a->tbl = (x4node*)calloc(64, sizeof(x4node) + sizeof(x4node*)); + if( x4a->tbl==0 ){ + free(x4a); + x4a = 0; + }else{ + int i; + x4a->ht = (x4node**)&(x4a->tbl[64]); + for(i=0; i<64; i++) x4a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Configtable_insert(struct config *data) +{ + x4node *np; + unsigned h; + unsigned ph; + + if( x4a==0 ) return 0; + ph = confighash(data); + h = ph & (x4a->size-1); + np = x4a->ht[h]; + while( np ){ + if( Configcmp((const char *) np->data,(const char *) data)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x4a->count>=x4a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x4 array; + array.size = arrSize = x4a->size*2; + array.count = x4a->count; + array.tbl = (x4node*)calloc(arrSize, sizeof(x4node) + sizeof(x4node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x4node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x4node *oldnp, *newnp; + oldnp = &(x4a->tbl[i]); + h = confighash(oldnp->data) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + /* free(x4a->tbl); // This code was originall written for 16-bit machines. + ** on modern machines, don't worry about freeing this trival amount of + ** memory. */ + *x4a = array; + } + /* Insert the new data */ + h = ph & (x4a->size-1); + np = &(x4a->tbl[x4a->count++]); + np->data = data; + if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); + np->next = x4a->ht[h]; + x4a->ht[h] = np; + np->from = &(x4a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct config *Configtable_find(struct config *key) +{ + int h; + x4node *np; + + if( x4a==0 ) return 0; + h = confighash(key) & (x4a->size-1); + np = x4a->ht[h]; + while( np ){ + if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Remove all data from the table. Pass each data to the function "f" +** as it is removed. ("f" may be null to avoid this step.) */ +void Configtable_clear(int(*f)(struct config *)) +{ + int i; + if( x4a==0 || x4a->count==0 ) return; + if( f ) for(i=0; icount; i++) (*f)(x4a->tbl[i].data); + for(i=0; isize; i++) x4a->ht[i] = 0; + x4a->count = 0; + return; +} diff --git a/tools/lemon/lempar.c b/tools/lemon/lempar.c new file mode 100644 index 00000000..fcb72b8a --- /dev/null +++ b/tools/lemon/lempar.c @@ -0,0 +1,1068 @@ +/* +** 2000-05-29 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Driver template for the LEMON parser generator. +** +** The "lemon" program processes an LALR(1) input grammar file, then uses +** this template to construct a parser. The "lemon" program inserts text +** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the +** interstitial "-" characters) contained in this template is changed into +** the value of the %name directive from the grammar. Otherwise, the content +** of this template is copied straight through into the generate parser +** source file. +** +** The following is the concatenation of all %include directives from the +** input grammar file: +*/ +/************ Begin %include sections from the grammar ************************/ +%% +/**************** End of %include directives **********************************/ +/* These constants specify the various numeric values for terminal symbols. +***************** Begin token definitions *************************************/ +%% +/**************** End token definitions ***************************************/ + +/* The next sections is a series of control #defines. +** various aspects of the generated parser. +** YYCODETYPE is the data type used to store the integer codes +** that represent terminal and non-terminal symbols. +** "unsigned char" is used if there are fewer than +** 256 symbols. Larger types otherwise. +** YYNOCODE is a number of type YYCODETYPE that is not used for +** any terminal or nonterminal symbol. +** YYFALLBACK If defined, this indicates that one or more tokens +** (also known as: "terminal symbols") have fall-back +** values which should be used if the original symbol +** would not parse. This permits keywords to sometimes +** be used as identifiers, for example. +** YYACTIONTYPE is the data type used for "action codes" - numbers +** that indicate what to do in response to the next +** token. +** ParseTOKENTYPE is the data type used for minor type for terminal +** symbols. Background: A "minor type" is a semantic +** value associated with a terminal or non-terminal +** symbols. For example, for an "ID" terminal symbol, +** the minor type might be the name of the identifier. +** Each non-terminal can have a different minor type. +** Terminal symbols all have the same minor type, though. +** This macros defines the minor type for terminal +** symbols. +** YYMINORTYPE is the data type used for all minor types. +** This is typically a union of many types, one of +** which is ParseTOKENTYPE. The entry in the union +** for terminal symbols is called "yy0". +** YYSTACKDEPTH is the maximum depth of the parser's stack. If +** zero the stack is dynamically sized using realloc() +** ParseARG_SDECL A static variable declaration for the %extra_argument +** ParseARG_PDECL A parameter declaration for the %extra_argument +** ParseARG_PARAM Code to pass %extra_argument as a subroutine parameter +** ParseARG_STORE Code to store %extra_argument into yypParser +** ParseARG_FETCH Code to extract %extra_argument from yypParser +** ParseCTX_* As ParseARG_ except for %extra_context +** YYERRORSYMBOL is the code number of the error symbol. If not +** defined, then do no error processing. +** YYNSTATE the combined number of states. +** YYNRULE the number of rules in the grammar +** YYNTOKEN Number of terminal symbols +** YY_MAX_SHIFT Maximum value for shift actions +** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions +** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions +** YY_ERROR_ACTION The yy_action[] code for syntax error +** YY_ACCEPT_ACTION The yy_action[] code for accept +** YY_NO_ACTION The yy_action[] code for no-op +** YY_MIN_REDUCE Minimum value for reduce actions +** YY_MAX_REDUCE Maximum value for reduce actions +*/ +#ifndef INTERFACE +# define INTERFACE 1 +#endif +/************* Begin control #defines *****************************************/ +%% +/************* End control #defines *******************************************/ +#define YY_NLOOKAHEAD ((int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0]))) + +/* Define the yytestcase() macro to be a no-op if is not already defined +** otherwise. +** +** Applications can choose to define yytestcase() in the %include section +** to a macro that can assist in verifying code coverage. For production +** code the yytestcase() macro should be turned off. But it is useful +** for testing. +*/ +#ifndef yytestcase +# define yytestcase(X) +#endif + + +/* Next are the tables used to determine what action to take based on the +** current state and lookahead token. These tables are used to implement +** functions that take a state number and lookahead value and return an +** action integer. +** +** Suppose the action integer is N. Then the action is determined as +** follows +** +** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead +** token onto the stack and goto state N. +** +** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then +** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE. +** +** N == YY_ERROR_ACTION A syntax error has occurred. +** +** N == YY_ACCEPT_ACTION The parser accepts its input. +** +** N == YY_NO_ACTION No such action. Denotes unused +** slots in the yy_action[] table. +** +** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE +** and YY_MAX_REDUCE +** +** The action table is constructed as a single large table named yy_action[]. +** Given state S and lookahead X, the action is computed as either: +** +** (A) N = yy_action[ yy_shift_ofst[S] + X ] +** (B) N = yy_default[S] +** +** The (A) formula is preferred. The B formula is used instead if +** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X. +** +** The formulas above are for computing the action when the lookahead is +** a terminal symbol. If the lookahead is a non-terminal (as occurs after +** a reduce action) then the yy_reduce_ofst[] array is used in place of +** the yy_shift_ofst[] array. +** +** The following are the tables generated in this section: +** +** yy_action[] A single table containing all actions. +** yy_lookahead[] A table containing the lookahead for each entry in +** yy_action. Used to detect hash collisions. +** yy_shift_ofst[] For each state, the offset into yy_action for +** shifting terminals. +** yy_reduce_ofst[] For each state, the offset into yy_action for +** shifting non-terminals after a reduce. +** yy_default[] Default action for each state. +** +*********** Begin parsing tables **********************************************/ +%% +/********** End of lemon-generated parsing tables *****************************/ + +/* The next table maps tokens (terminal symbols) into fallback tokens. +** If a construct like the following: +** +** %fallback ID X Y Z. +** +** appears in the grammar, then ID becomes a fallback token for X, Y, +** and Z. Whenever one of the tokens X, Y, or Z is input to the parser +** but it does not parse, the type of the token is changed to ID and +** the parse is retried before an error is thrown. +** +** This feature can be used, for example, to cause some keywords in a language +** to revert to identifiers if they keyword does not apply in the context where +** it appears. +*/ +#ifdef YYFALLBACK +static const YYCODETYPE yyFallback[] = { +%% +}; +#endif /* YYFALLBACK */ + +/* The following structure represents a single element of the +** parser's stack. Information stored includes: +** +** + The state number for the parser at this level of the stack. +** +** + The value of the token stored at this level of the stack. +** (In other words, the "major" token.) +** +** + The semantic value stored at this level of the stack. This is +** the information used by the action routines in the grammar. +** It is sometimes called the "minor" token. +** +** After the "shift" half of a SHIFTREDUCE action, the stateno field +** actually contains the reduce action for the second half of the +** SHIFTREDUCE. +*/ +struct yyStackEntry { + YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */ + YYCODETYPE major; /* The major token value. This is the code + ** number for the token at this stack level */ + YYMINORTYPE minor; /* The user-supplied minor token value. This + ** is the value of the token */ +}; +typedef struct yyStackEntry yyStackEntry; + +/* The state of the parser is completely contained in an instance of +** the following structure */ +struct yyParser { + yyStackEntry *yytos; /* Pointer to top element of the stack */ +#ifdef YYTRACKMAXSTACKDEPTH + int yyhwm; /* High-water mark of the stack */ +#endif +#ifndef YYNOERRORRECOVERY + int yyerrcnt; /* Shifts left before out of the error */ +#endif + ParseARG_SDECL /* A place to hold %extra_argument */ + ParseCTX_SDECL /* A place to hold %extra_context */ +#if YYSTACKDEPTH<=0 + int yystksz; /* Current side of the stack */ + yyStackEntry *yystack; /* The parser's stack */ + yyStackEntry yystk0; /* First stack entry */ +#else + yyStackEntry yystack[YYSTACKDEPTH]; /* The parser's stack */ + yyStackEntry *yystackEnd; /* Last entry in the stack */ +#endif +}; +typedef struct yyParser yyParser; + +#include +#ifndef NDEBUG +#include +static FILE *yyTraceFILE = 0; +static char *yyTracePrompt = 0; +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* +** Turn parser tracing on by giving a stream to which to write the trace +** and a prompt to preface each trace message. Tracing is turned off +** by making either argument NULL +** +** Inputs: +**
    +**
  • A FILE* to which trace output should be written. +** If NULL, then tracing is turned off. +**
  • A prefix string written at the beginning of every +** line of trace output. If NULL, then tracing is +** turned off. +**
+** +** Outputs: +** None. +*/ +void ParseTrace(FILE *TraceFILE, char *zTracePrompt){ + yyTraceFILE = TraceFILE; + yyTracePrompt = zTracePrompt; + if( yyTraceFILE==0 ) yyTracePrompt = 0; + else if( yyTracePrompt==0 ) yyTraceFILE = 0; +} +#endif /* NDEBUG */ + +#if defined(YYCOVERAGE) || !defined(NDEBUG) +/* For tracing shifts, the names of all terminals and nonterminals +** are required. The following table supplies these names */ +static const char *const yyTokenName[] = { +%% +}; +#endif /* defined(YYCOVERAGE) || !defined(NDEBUG) */ + +#ifndef NDEBUG +/* For tracing reduce actions, the names of all rules are required. +*/ +static const char *const yyRuleName[] = { +%% +}; +#endif /* NDEBUG */ + + +#if YYSTACKDEPTH<=0 +/* +** Try to increase the size of the parser stack. Return the number +** of errors. Return 0 on success. +*/ +static int yyGrowStack(yyParser *p){ + int newSize; + int idx; + yyStackEntry *pNew; + + newSize = p->yystksz*2 + 100; + idx = p->yytos ? (int)(p->yytos - p->yystack) : 0; + if( p->yystack==&p->yystk0 ){ + pNew = malloc(newSize*sizeof(pNew[0])); + if( pNew ) pNew[0] = p->yystk0; + }else{ + pNew = realloc(p->yystack, newSize*sizeof(pNew[0])); + } + if( pNew ){ + p->yystack = pNew; + p->yytos = &p->yystack[idx]; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sStack grows from %d to %d entries.\n", + yyTracePrompt, p->yystksz, newSize); + } +#endif + p->yystksz = newSize; + } + return pNew==0; +} +#endif + +/* Datatype of the argument to the memory allocated passed as the +** second argument to ParseAlloc() below. This can be changed by +** putting an appropriate #define in the %include section of the input +** grammar. +*/ +#ifndef YYMALLOCARGTYPE +# define YYMALLOCARGTYPE size_t +#endif + +/* Initialize a new parser that has already been allocated. +*/ +void ParseInit(void *yypRawParser ParseCTX_PDECL){ + yyParser *yypParser = (yyParser*)yypRawParser; + ParseCTX_STORE +#ifdef YYTRACKMAXSTACKDEPTH + yypParser->yyhwm = 0; +#endif +#if YYSTACKDEPTH<=0 + yypParser->yytos = NULL; + yypParser->yystack = NULL; + yypParser->yystksz = 0; + if( yyGrowStack(yypParser) ){ + yypParser->yystack = &yypParser->yystk0; + yypParser->yystksz = 1; + } +#endif +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + yypParser->yytos = yypParser->yystack; + yypParser->yystack[0].stateno = 0; + yypParser->yystack[0].major = 0; +#if YYSTACKDEPTH>0 + yypParser->yystackEnd = &yypParser->yystack[YYSTACKDEPTH-1]; +#endif +} + +#ifndef Parse_ENGINEALWAYSONSTACK +/* +** This function allocates a new parser. +** The only argument is a pointer to a function which works like +** malloc. +** +** Inputs: +** A pointer to the function used to allocate memory. +** +** Outputs: +** A pointer to a parser. This pointer is used in subsequent calls +** to Parse and ParseFree. +*/ +void *ParseAlloc(void *(*mallocProc)(YYMALLOCARGTYPE) ParseCTX_PDECL){ + yyParser *yypParser; + yypParser = (yyParser*)(*mallocProc)( (YYMALLOCARGTYPE)sizeof(yyParser) ); + if( yypParser ){ + ParseCTX_STORE + ParseInit(yypParser ParseCTX_PARAM); + } + return (void*)yypParser; +} +#endif /* Parse_ENGINEALWAYSONSTACK */ + + +/* The following function deletes the "minor type" or semantic value +** associated with a symbol. The symbol can be either a terminal +** or nonterminal. "yymajor" is the symbol code, and "yypminor" is +** a pointer to the value to be deleted. The code used to do the +** deletions is derived from the %destructor and/or %token_destructor +** directives of the input grammar. +*/ +static void yy_destructor( + yyParser *yypParser, /* The parser */ + YYCODETYPE yymajor, /* Type code for object to destroy */ + YYMINORTYPE *yypminor /* The object to be destroyed */ +){ + ParseARG_FETCH + ParseCTX_FETCH + switch( yymajor ){ + /* Here is inserted the actions which take place when a + ** terminal or non-terminal is destroyed. This can happen + ** when the symbol is popped from the stack during a + ** reduce or during error processing or when a parser is + ** being destroyed before it is finished parsing. + ** + ** Note: during a reduce, the only symbols destroyed are those + ** which appear on the RHS of the rule, but which are *not* used + ** inside the C code. + */ +/********* Begin destructor definitions ***************************************/ +%% +/********* End destructor definitions *****************************************/ + default: break; /* If no destructor action specified: do nothing */ + } +} + +/* +** Pop the parser's stack once. +** +** If there is a destructor routine associated with the token which +** is popped from the stack, then call it. +*/ +static void yy_pop_parser_stack(yyParser *pParser){ + yyStackEntry *yytos; + assert( pParser->yytos!=0 ); + assert( pParser->yytos > pParser->yystack ); + yytos = pParser->yytos--; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sPopping %s\n", + yyTracePrompt, + yyTokenName[yytos->major]); + } +#endif + yy_destructor(pParser, yytos->major, &yytos->minor); +} + +/* +** Clear all secondary memory allocations from the parser +*/ +void ParseFinalize(void *p){ + yyParser *pParser = (yyParser*)p; + while( pParser->yytos>pParser->yystack ) yy_pop_parser_stack(pParser); +#if YYSTACKDEPTH<=0 + if( pParser->yystack!=&pParser->yystk0 ) free(pParser->yystack); +#endif +} + +#ifndef Parse_ENGINEALWAYSONSTACK +/* +** Deallocate and destroy a parser. Destructors are called for +** all stack elements before shutting the parser down. +** +** If the YYPARSEFREENEVERNULL macro exists (for example because it +** is defined in a %include section of the input grammar) then it is +** assumed that the input pointer is never NULL. +*/ +void ParseFree( + void *p, /* The parser to be deleted */ + void (*freeProc)(void*) /* Function used to reclaim memory */ +){ +#ifndef YYPARSEFREENEVERNULL + if( p==0 ) return; +#endif + ParseFinalize(p); + (*freeProc)(p); +} +#endif /* Parse_ENGINEALWAYSONSTACK */ + +/* +** Return the peak depth of the stack for a parser. +*/ +#ifdef YYTRACKMAXSTACKDEPTH +int ParseStackPeak(void *p){ + yyParser *pParser = (yyParser*)p; + return pParser->yyhwm; +} +#endif + +/* This array of booleans keeps track of the parser statement +** coverage. The element yycoverage[X][Y] is set when the parser +** is in state X and has a lookahead token Y. In a well-tested +** systems, every element of this matrix should end up being set. +*/ +#if defined(YYCOVERAGE) +static unsigned char yycoverage[YYNSTATE][YYNTOKEN]; +#endif + +/* +** Write into out a description of every state/lookahead combination that +** +** (1) has not been used by the parser, and +** (2) is not a syntax error. +** +** Return the number of missed state/lookahead combinations. +*/ +#if defined(YYCOVERAGE) +int ParseCoverage(FILE *out){ + int stateno, iLookAhead, i; + int nMissed = 0; + for(stateno=0; statenoYY_MAX_SHIFT ) return stateno; + assert( stateno <= YY_SHIFT_COUNT ); +#if defined(YYCOVERAGE) + yycoverage[stateno][iLookAhead] = 1; +#endif + do{ + i = yy_shift_ofst[stateno]; + assert( i>=0 ); + assert( i<=YY_ACTTAB_COUNT ); + assert( i+YYNTOKEN<=(int)YY_NLOOKAHEAD ); + assert( iLookAhead!=YYNOCODE ); + assert( iLookAhead < YYNTOKEN ); + i += iLookAhead; + assert( i<(int)YY_NLOOKAHEAD ); + if( yy_lookahead[i]!=iLookAhead ){ +#ifdef YYFALLBACK + YYCODETYPE iFallback; /* Fallback token */ + assert( iLookAhead %s\n", + yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[iFallback]); + } +#endif + assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */ + iLookAhead = iFallback; + continue; + } +#endif +#ifdef YYWILDCARD + { + int j = i - iLookAhead + YYWILDCARD; + assert( j<(int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0])) ); + if( yy_lookahead[j]==YYWILDCARD && iLookAhead>0 ){ +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE, "%sWILDCARD %s => %s\n", + yyTracePrompt, yyTokenName[iLookAhead], + yyTokenName[YYWILDCARD]); + } +#endif /* NDEBUG */ + return yy_action[j]; + } + } +#endif /* YYWILDCARD */ + return yy_default[stateno]; + }else{ + assert( i>=0 && i<(int)(sizeof(yy_action)/sizeof(yy_action[0])) ); + return yy_action[i]; + } + }while(1); +} + +/* +** Find the appropriate action for a parser given the non-terminal +** look-ahead token iLookAhead. +*/ +static YYACTIONTYPE yy_find_reduce_action( + YYACTIONTYPE stateno, /* Current state number */ + YYCODETYPE iLookAhead /* The look-ahead token */ +){ + int i; +#ifdef YYERRORSYMBOL + if( stateno>YY_REDUCE_COUNT ){ + return yy_default[stateno]; + } +#else + assert( stateno<=YY_REDUCE_COUNT ); +#endif + i = yy_reduce_ofst[stateno]; + assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; +#ifdef YYERRORSYMBOL + if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ + return yy_default[stateno]; + } +#else + assert( i>=0 && iyytos>yypParser->yystack ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will execute if the parser + ** stack every overflows */ +/******** Begin %stack_overflow code ******************************************/ +%% +/******** End %stack_overflow code ********************************************/ + ParseARG_STORE /* Suppress warning about unused %extra_argument var */ + ParseCTX_STORE +} + +/* +** Print tracing information for a SHIFT action +*/ +#ifndef NDEBUG +static void yyTraceShift(yyParser *yypParser, int yyNewState, const char *zTag){ + if( yyTraceFILE ){ + if( yyNewStateyytos->major], + yyNewState); + }else{ + fprintf(yyTraceFILE,"%s%s '%s', pending reduce %d\n", + yyTracePrompt, zTag, yyTokenName[yypParser->yytos->major], + yyNewState - YY_MIN_REDUCE); + } + } +} +#else +# define yyTraceShift(X,Y,Z) +#endif + +/* +** Perform a shift action. +*/ +static void yy_shift( + yyParser *yypParser, /* The parser to be shifted */ + YYACTIONTYPE yyNewState, /* The new state to shift in */ + YYCODETYPE yyMajor, /* The major token to shift in */ + ParseTOKENTYPE yyMinor /* The minor token to shift in */ +){ + yyStackEntry *yytos; + yypParser->yytos++; +#ifdef YYTRACKMAXSTACKDEPTH + if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){ + yypParser->yyhwm++; + assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack) ); + } +#endif +#if YYSTACKDEPTH>0 + if( yypParser->yytos>yypParser->yystackEnd ){ + yypParser->yytos--; + yyStackOverflow(yypParser); + return; + } +#else + if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz] ){ + if( yyGrowStack(yypParser) ){ + yypParser->yytos--; + yyStackOverflow(yypParser); + return; + } + } +#endif + if( yyNewState > YY_MAX_SHIFT ){ + yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; + } + yytos = yypParser->yytos; + yytos->stateno = yyNewState; + yytos->major = yyMajor; + yytos->minor.yy0 = yyMinor; + yyTraceShift(yypParser, yyNewState, "Shift"); +} + +/* For rule J, yyRuleInfoLhs[J] contains the symbol on the left-hand side +** of that rule */ +static const YYCODETYPE yyRuleInfoLhs[] = { +%% +}; + +/* For rule J, yyRuleInfoNRhs[J] contains the negative of the number +** of symbols on the right-hand side of that rule. */ +static const signed char yyRuleInfoNRhs[] = { +%% +}; + +static void yy_accept(yyParser*); /* Forward Declaration */ + +/* +** Perform a reduce action and the shift that must immediately +** follow the reduce. +** +** The yyLookahead and yyLookaheadToken parameters provide reduce actions +** access to the lookahead token (if any). The yyLookahead will be YYNOCODE +** if the lookahead token has already been consumed. As this procedure is +** only called from one place, optimizing compilers will in-line it, which +** means that the extra parameters have no performance impact. +*/ +static YYACTIONTYPE yy_reduce( + yyParser *yypParser, /* The parser */ + unsigned int yyruleno, /* Number of the rule by which to reduce */ + int yyLookahead, /* Lookahead token, or YYNOCODE if none */ + ParseTOKENTYPE yyLookaheadToken /* Value of the lookahead token */ + ParseCTX_PDECL /* %extra_context */ +){ + int yygoto; /* The next state */ + YYACTIONTYPE yyact; /* The next action */ + yyStackEntry *yymsp; /* The top of the parser's stack */ + int yysize; /* Amount to pop the stack */ + ParseARG_FETCH + (void)yyLookahead; + (void)yyLookaheadToken; + yymsp = yypParser->yytos; + + switch( yyruleno ){ + /* Beginning here are the reduction cases. A typical example + ** follows: + ** case 0: + ** #line + ** { ... } // User supplied code + ** #line + ** break; + */ +/********** Begin reduce actions **********************************************/ +%% +/********** End reduce actions ************************************************/ + }; + assert( yyrulenoYY_MAX_SHIFT && yyact<=YY_MAX_SHIFTREDUCE) ); + + /* It is not possible for a REDUCE to be followed by an error */ + assert( yyact!=YY_ERROR_ACTION ); + + yymsp += yysize+1; + yypParser->yytos = yymsp; + yymsp->stateno = (YYACTIONTYPE)yyact; + yymsp->major = (YYCODETYPE)yygoto; + yyTraceShift(yypParser, yyact, "... then shift"); + return yyact; +} + +/* +** The following code executes when the parse fails +*/ +#ifndef YYNOERRORRECOVERY +static void yy_parse_failed( + yyParser *yypParser /* The parser */ +){ + ParseARG_FETCH + ParseCTX_FETCH +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sFail!\n",yyTracePrompt); + } +#endif + while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will be executed whenever the + ** parser fails */ +/************ Begin %parse_failure code ***************************************/ +%% +/************ End %parse_failure code *****************************************/ + ParseARG_STORE /* Suppress warning about unused %extra_argument variable */ + ParseCTX_STORE +} +#endif /* YYNOERRORRECOVERY */ + +/* +** The following code executes when a syntax error first occurs. +*/ +static void yy_syntax_error( + yyParser *yypParser, /* The parser */ + int yymajor, /* The major type of the error token */ + ParseTOKENTYPE yyminor /* The minor type of the error token */ +){ + ParseARG_FETCH + ParseCTX_FETCH +#define TOKEN yyminor +/************ Begin %syntax_error code ****************************************/ +%% +/************ End %syntax_error code ******************************************/ + ParseARG_STORE /* Suppress warning about unused %extra_argument variable */ + ParseCTX_STORE +} + +/* +** The following is executed when the parser accepts +*/ +static void yy_accept( + yyParser *yypParser /* The parser */ +){ + ParseARG_FETCH + ParseCTX_FETCH +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sAccept!\n",yyTracePrompt); + } +#endif +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + assert( yypParser->yytos==yypParser->yystack ); + /* Here code is inserted which will be executed whenever the + ** parser accepts */ +/*********** Begin %parse_accept code *****************************************/ +%% +/*********** End %parse_accept code *******************************************/ + ParseARG_STORE /* Suppress warning about unused %extra_argument variable */ + ParseCTX_STORE +} + +/* The main parser program. +** The first argument is a pointer to a structure obtained from +** "ParseAlloc" which describes the current state of the parser. +** The second argument is the major token number. The third is +** the minor token. The fourth optional argument is whatever the +** user wants (and specified in the grammar) and is available for +** use by the action routines. +** +** Inputs: +**
    +**
  • A pointer to the parser (an opaque structure.) +**
  • The major token number. +**
  • The minor token number. +**
  • An option argument of a grammar-specified type. +**
+** +** Outputs: +** None. +*/ +void Parse( + void *yyp, /* The parser */ + int yymajor, /* The major token code number */ + ParseTOKENTYPE yyminor /* The value for the token */ + ParseARG_PDECL /* Optional %extra_argument parameter */ +){ + YYMINORTYPE yyminorunion; + YYACTIONTYPE yyact; /* The parser action. */ +#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) + int yyendofinput; /* True if we are at the end of input */ +#endif +#ifdef YYERRORSYMBOL + int yyerrorhit = 0; /* True if yymajor has invoked an error */ +#endif + yyParser *yypParser = (yyParser*)yyp; /* The parser */ + ParseCTX_FETCH + ParseARG_STORE + + assert( yypParser->yytos!=0 ); +#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) + yyendofinput = (yymajor==0); +#endif + + yyact = yypParser->yytos->stateno; +#ifndef NDEBUG + if( yyTraceFILE ){ + if( yyact < YY_MIN_REDUCE ){ + fprintf(yyTraceFILE,"%sInput '%s' in state %d\n", + yyTracePrompt,yyTokenName[yymajor],yyact); + }else{ + fprintf(yyTraceFILE,"%sInput '%s' with pending reduce %d\n", + yyTracePrompt,yyTokenName[yymajor],yyact-YY_MIN_REDUCE); + } + } +#endif + + while(1){ /* Exit by "break" */ + assert( yypParser->yytos>=yypParser->yystack ); + assert( yyact==yypParser->yytos->stateno ); + yyact = yy_find_shift_action((YYCODETYPE)yymajor,yyact); + if( yyact >= YY_MIN_REDUCE ){ + unsigned int yyruleno = yyact - YY_MIN_REDUCE; /* Reduce by this rule */ +#ifndef NDEBUG + assert( yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ); + if( yyTraceFILE ){ + int yysize = yyRuleInfoNRhs[yyruleno]; + if( yysize ){ + fprintf(yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n", + yyTracePrompt, + yyruleno, yyRuleName[yyruleno], + yyrulenoyytos[yysize].stateno); + }else{ + fprintf(yyTraceFILE, "%sReduce %d [%s]%s.\n", + yyTracePrompt, yyruleno, yyRuleName[yyruleno], + yyrulenoyytos - yypParser->yystack)>yypParser->yyhwm ){ + yypParser->yyhwm++; + assert( yypParser->yyhwm == + (int)(yypParser->yytos - yypParser->yystack)); + } +#endif +#if YYSTACKDEPTH>0 + if( yypParser->yytos>=yypParser->yystackEnd ){ + yyStackOverflow(yypParser); + break; + } +#else + if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){ + if( yyGrowStack(yypParser) ){ + yyStackOverflow(yypParser); + break; + } + } +#endif + } + yyact = yy_reduce(yypParser,yyruleno,yymajor,yyminor ParseCTX_PARAM); + }else if( yyact <= YY_MAX_SHIFTREDUCE ){ + yy_shift(yypParser,yyact,(YYCODETYPE)yymajor,yyminor); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt--; +#endif + break; + }else if( yyact==YY_ACCEPT_ACTION ){ + yypParser->yytos--; + yy_accept(yypParser); + return; + }else{ + assert( yyact == YY_ERROR_ACTION ); + yyminorunion.yy0 = yyminor; +#ifdef YYERRORSYMBOL + int yymx; +#endif +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sSyntax Error!\n",yyTracePrompt); + } +#endif +#ifdef YYERRORSYMBOL + /* A syntax error has occurred. + ** The response to an error depends upon whether or not the + ** grammar defines an error token "ERROR". + ** + ** This is what we do if the grammar does define ERROR: + ** + ** * Call the %syntax_error function. + ** + ** * Begin popping the stack until we enter a state where + ** it is legal to shift the error symbol, then shift + ** the error symbol. + ** + ** * Set the error count to three. + ** + ** * Begin accepting and shifting new tokens. No new error + ** processing will occur until three tokens have been + ** shifted successfully. + ** + */ + if( yypParser->yyerrcnt<0 ){ + yy_syntax_error(yypParser,yymajor,yyminor); + } + yymx = yypParser->yytos->major; + if( yymx==YYERRORSYMBOL || yyerrorhit ){ +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sDiscard input token %s\n", + yyTracePrompt,yyTokenName[yymajor]); + } +#endif + yy_destructor(yypParser, (YYCODETYPE)yymajor, &yyminorunion); + yymajor = YYNOCODE; + }else{ + while( yypParser->yytos > yypParser->yystack ){ + yyact = yy_find_reduce_action(yypParser->yytos->stateno, + YYERRORSYMBOL); + if( yyact<=YY_MAX_SHIFTREDUCE ) break; + yy_pop_parser_stack(yypParser); + } + if( yypParser->yytos <= yypParser->yystack || yymajor==0 ){ + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + yy_parse_failed(yypParser); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + yymajor = YYNOCODE; + }else if( yymx!=YYERRORSYMBOL ){ + yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor); + } + } + yypParser->yyerrcnt = 3; + yyerrorhit = 1; + if( yymajor==YYNOCODE ) break; + yyact = yypParser->yytos->stateno; +#elif defined(YYNOERRORRECOVERY) + /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to + ** do any kind of error recovery. Instead, simply invoke the syntax + ** error routine and continue going as if nothing had happened. + ** + ** Applications can set this macro (for example inside %include) if + ** they intend to abandon the parse upon the first syntax error seen. + */ + yy_syntax_error(yypParser,yymajor, yyminor); + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + break; +#else /* YYERRORSYMBOL is not defined */ + /* This is what we do if the grammar does not define ERROR: + ** + ** * Report an error message, and throw away the input token. + ** + ** * If the input token is $, then fail the parse. + ** + ** As before, subsequent error messages are suppressed until + ** three input tokens have been successfully shifted. + */ + if( yypParser->yyerrcnt<=0 ){ + yy_syntax_error(yypParser,yymajor, yyminor); + } + yypParser->yyerrcnt = 3; + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + if( yyendofinput ){ + yy_parse_failed(yypParser); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + } + break; +#endif + } + } +#ifndef NDEBUG + if( yyTraceFILE ){ + yyStackEntry *i; + char cDiv = '['; + fprintf(yyTraceFILE,"%sReturn. Stack=",yyTracePrompt); + for(i=&yypParser->yystack[1]; i<=yypParser->yytos; i++){ + fprintf(yyTraceFILE,"%c%s", cDiv, yyTokenName[i->major]); + cDiv = ' '; + } + fprintf(yyTraceFILE,"]\n"); + } +#endif + return; +} + +/* +** Return the fallback token corresponding to canonical token iToken, or +** 0 if iToken has no fallback. +*/ +int ParseFallback(int iToken){ +#ifdef YYFALLBACK + assert( iToken<(int)(sizeof(yyFallback)/sizeof(yyFallback[0])) ); + return yyFallback[iToken]; +#else + (void)iToken; + return 0; +#endif +} -- cgit v1.2.3