#!/usr/bin/perl # src/interfaces/ecpg/preproc/parse.pl # parser generator for ecpg version 2 # call with backend parser as stdin # # Copyright (c) 2007-2023, PostgreSQL Global Development Group # # Written by Mike Aubury # Michael Meskes # Andy Colson # # Placed under the same license as PostgreSQL. # use strict; use warnings; use Getopt::Long; my $srcdir = '.'; my $outfile = ''; my $parser = ''; GetOptions( 'srcdir=s' => \$srcdir, 'output=s' => \$outfile, 'parser=s' => \$parser,) or die "wrong arguments"; # open parser / output file early, to raise errors early open(my $parserfh, '<', $parser) or die "could not open parser file $parser"; open(my $outfh, '>', $outfile) or die "could not open output file $outfile"; my $copymode = 0; my $brace_indent = 0; my $yaccmode = 0; my $in_rule = 0; my $header_included = 0; my $has_feature_not_supported = 0; my $has_if_command = 0; my $tokenmode = 0; my (%buff, $infield, $comment, %tokens, %addons); my ($stmt_mode, @fields); my $line = ''; my $non_term_id; # some token have to be replaced by other symbols # either in the rule my %replace_token = ( 'BCONST' => 'ecpg_bconst', 'FCONST' => 'ecpg_fconst', 'Sconst' => 'ecpg_sconst', 'XCONST' => 'ecpg_xconst', 'IDENT' => 'ecpg_ident', 'PARAM' => 'ecpg_param',); # or in the block my %replace_string = ( 'FORMAT_LA' => 'format', 'NOT_LA' => 'not', 'NULLS_LA' => 'nulls', 'WITH_LA' => 'with', 'WITHOUT_LA' => 'without', 'TYPECAST' => '::', 'DOT_DOT' => '..', 'COLON_EQUALS' => ':=', 'EQUALS_GREATER' => '=>', 'LESS_EQUALS' => '<=', 'GREATER_EQUALS' => '>=', 'NOT_EQUALS' => '<>',); # specific replace_types for specific non-terminals - never include the ':' # ECPG-only replace_types are defined in ecpg-replace_types my %replace_types = ( 'PrepareStmt' => '', 'ExecuteStmt' => '', 'opt_array_bounds' => '', # "ignore" means: do not create type and rules for this non-term-id 'parse_toplevel' => 'ignore', 'stmtmulti' => 'ignore', 'CreateAsStmt' => 'ignore', 'DeallocateStmt' => 'ignore', 'ColId' => 'ignore', 'type_function_name' => 'ignore', 'ColLabel' => 'ignore', 'Sconst' => 'ignore', 'opt_distinct_clause' => 'ignore', 'PLpgSQL_Expr' => 'ignore', 'PLAssignStmt' => 'ignore', 'plassign_target' => 'ignore', 'plassign_equals' => 'ignore',); # these replace_line commands excise certain keywords from the core keyword # lists. Be sure to account for these in ColLabel and related productions. my %replace_line = ( 'unreserved_keywordCONNECTION' => 'ignore', 'unreserved_keywordCURRENT_P' => 'ignore', 'unreserved_keywordDAY_P' => 'ignore', 'unreserved_keywordHOUR_P' => 'ignore', 'unreserved_keywordINPUT_P' => 'ignore', 'unreserved_keywordMINUTE_P' => 'ignore', 'unreserved_keywordMONTH_P' => 'ignore', 'unreserved_keywordSECOND_P' => 'ignore', 'unreserved_keywordYEAR_P' => 'ignore', 'col_name_keywordCHAR_P' => 'ignore', 'col_name_keywordINT_P' => 'ignore', 'col_name_keywordVALUES' => 'ignore', 'reserved_keywordTO' => 'ignore', 'reserved_keywordUNION' => 'ignore', # some other production rules have to be ignored or replaced 'fetch_argsFORWARDopt_from_incursor_name' => 'ignore', 'fetch_argsBACKWARDopt_from_incursor_name' => 'ignore', "opt_array_boundsopt_array_bounds'['Iconst']'" => 'ignore', 'VariableShowStmtSHOWvar_name' => 'SHOW var_name ecpg_into', 'VariableShowStmtSHOWTIMEZONE' => 'SHOW TIME ZONE ecpg_into', 'VariableShowStmtSHOWTRANSACTIONISOLATIONLEVEL' => 'SHOW TRANSACTION ISOLATION LEVEL ecpg_into', 'VariableShowStmtSHOWSESSIONAUTHORIZATION' => 'SHOW SESSION AUTHORIZATION ecpg_into', 'returning_clauseRETURNINGtarget_list' => 'RETURNING target_list opt_ecpg_into', 'ExecuteStmtEXECUTEnameexecute_param_clause' => 'EXECUTE prepared_name execute_param_clause execute_rest', 'ExecuteStmtCREATEOptTempTABLEcreate_as_targetASEXECUTEnameexecute_param_clauseopt_with_data' => 'CREATE OptTemp TABLE create_as_target AS EXECUTE prepared_name execute_param_clause opt_with_data execute_rest', 'ExecuteStmtCREATEOptTempTABLEIF_PNOTEXISTScreate_as_targetASEXECUTEnameexecute_param_clauseopt_with_data' => 'CREATE OptTemp TABLE IF_P NOT EXISTS create_as_target AS EXECUTE prepared_name execute_param_clause opt_with_data execute_rest', 'PrepareStmtPREPAREnameprep_type_clauseASPreparableStmt' => 'PREPARE prepared_name prep_type_clause AS PreparableStmt', 'var_nameColId' => 'ECPGColId'); preload_addons(); main(); dump_buffer('header'); dump_buffer('tokens'); dump_buffer('types'); dump_buffer('ecpgtype'); dump_buffer('orig_tokens'); print $outfh '%%', "\n"; print $outfh 'prog: statements;', "\n"; dump_buffer('rules'); include_file('trailer', 'ecpg.trailer'); dump_buffer('trailer'); close($parserfh); sub main { line: while (<$parserfh>) { chomp; # comment out the line below to make the result file match (blank line wise) # the prior version. #next if ($_ eq ''); # Dump the action for a rule - # stmt_mode indicates if we are processing the 'stmt:' # rule (mode==0 means normal, mode==1 means stmt:) # flds are the fields to use. These may start with a '$' - in # which case they are the result of a previous non-terminal # # if they don't start with a '$' then they are token name # # len is the number of fields in flds... # leadin is the padding to apply at the beginning (just use for formatting) if (/^%%/) { $tokenmode = 2; $copymode = 1; $yaccmode++; $infield = 0; } if ($yaccmode == 1) { # Check for rules that throw FEATURE_NOT_SUPPORTED $has_feature_not_supported = 1 if /ERRCODE_FEATURE_NOT_SUPPORTED/; $has_if_command = 1 if /^\s*if/; } my $prec = 0; # Make sure any braces are split s/{/ { /g; s/}/ } /g; # Any comments are split s|\/\*| /* |g; s|\*\/| */ |g; # Now split the line into individual fields my @arr = split(' '); if (!@arr) { # empty line: in tokenmode 1, emit an empty line, else ignore if ($tokenmode == 1) { add_to_buffer('orig_tokens', ''); } next line; } if ($arr[0] eq '%token' && $tokenmode == 0) { $tokenmode = 1; include_file('tokens', 'ecpg.tokens'); } elsif ($arr[0] eq '%type' && $header_included == 0) { include_file('header', 'ecpg.header'); include_file('ecpgtype', 'ecpg.type'); $header_included = 1; } if ($tokenmode == 1) { my $str = ''; my $prior = ''; for my $a (@arr) { if ($a eq '/*') { $comment++; next; } if ($a eq '*/') { $comment--; next; } if ($comment) { next; } if (substr($a, 0, 1) eq '<') { next; # its a type } $tokens{$a} = 1; $str = $str . ' ' . $a; if ($a eq 'IDENT' && $prior eq '%nonassoc') { # add more tokens to the list $str = $str . "\n%nonassoc CSTRING"; } $prior = $a; } add_to_buffer('orig_tokens', $str); next line; } # Don't worry about anything if we're not in the right section of gram.y if ($yaccmode != 1) { next line; } # Go through each field in turn for ( my $fieldIndexer = 0; $fieldIndexer < scalar(@arr); $fieldIndexer++) { if ($arr[$fieldIndexer] eq '*/' && $comment) { $comment = 0; next; } elsif ($comment) { next; } elsif ($arr[$fieldIndexer] eq '/*') { # start of a multiline comment $comment = 1; next; } elsif ($arr[$fieldIndexer] eq '//') { next line; } elsif ($arr[$fieldIndexer] eq '}') { $brace_indent--; next; } elsif ($arr[$fieldIndexer] eq '{') { $brace_indent++; next; } if ($brace_indent > 0) { next; } if ($arr[$fieldIndexer] eq ';') { if ($copymode) { if ($infield) { dump_line($stmt_mode, \@fields); } add_to_buffer('rules', ";\n\n"); } else { $copymode = 1; } @fields = (); $infield = 0; $line = ''; $in_rule = 0; next; } if ($arr[$fieldIndexer] eq '|') { if ($copymode) { if ($infield) { $infield = $infield + dump_line($stmt_mode, \@fields); } if ($infield > 1) { $line = '| '; } } @fields = (); next; } if (exists $replace_token{ $arr[$fieldIndexer] }) { $arr[$fieldIndexer] = $replace_token{ $arr[$fieldIndexer] }; } # Are we looking at a declaration of a non-terminal ? if (($arr[$fieldIndexer] =~ /[A-Za-z0-9]+:/) || ( $fieldIndexer + 1 < scalar(@arr) && $arr[ $fieldIndexer + 1 ] eq ':')) { $non_term_id = $arr[$fieldIndexer]; $non_term_id =~ tr/://d; if (not defined $replace_types{$non_term_id}) { $replace_types{$non_term_id} = ''; $copymode = 1; } elsif ($replace_types{$non_term_id} eq 'ignore') { $copymode = 0; $line = ''; next line; } $line = $line . ' ' . $arr[$fieldIndexer]; # Do we have the : attached already ? # If yes, we'll have already printed the ':' if (!($arr[$fieldIndexer] =~ '[A-Za-z0-9]+:')) { # Consume the ':' which is next... $line = $line . ':'; $fieldIndexer++; } # Special mode? if ($non_term_id eq 'stmt') { $stmt_mode = 1; } else { $stmt_mode = 0; } my $tstr = '%type ' . $replace_types{$non_term_id} . ' ' . $non_term_id; add_to_buffer('types', $tstr); if ($copymode) { add_to_buffer('rules', $line); } $line = ''; @fields = (); $infield = 1; die "unterminated rule at grammar line $.\n" if $in_rule; $in_rule = 1; next; } elsif ($copymode) { $line = $line . ' ' . $arr[$fieldIndexer]; } if ($arr[$fieldIndexer] eq '%prec') { $prec = 1; next; } if ( $copymode && !$prec && !$comment && $fieldIndexer < scalar(@arr) && length($arr[$fieldIndexer]) && $infield) { if ($arr[$fieldIndexer] ne 'Op' && (( defined $tokens{ $arr[$fieldIndexer] } && $tokens{ $arr[$fieldIndexer] } > 0) || $arr[$fieldIndexer] =~ /'.+'/) || $stmt_mode == 1) { my $S; if (exists $replace_string{ $arr[$fieldIndexer] }) { $S = $replace_string{ $arr[$fieldIndexer] }; } else { $S = $arr[$fieldIndexer]; } $S =~ s/_P//g; $S =~ tr/'//d; if ($stmt_mode == 1) { push(@fields, $S); } else { push(@fields, lc($S)); } } else { push(@fields, '$' . (scalar(@fields) + 1)); } } } } die "unterminated rule at end of grammar\n" if $in_rule; return; } # append a file onto a buffer. # Arguments: buffer_name, filename (without path) sub include_file { my ($buffer, $filename) = @_; my $full = "$srcdir/$filename"; open(my $fh, '<', $full) or die; while (<$fh>) { chomp; add_to_buffer($buffer, $_); } close($fh); return; } sub include_addon { my ($buffer, $block, $fields, $stmt_mode) = @_; my $rec = $addons{$block}; return 0 unless $rec; my $rectype = (defined $rec->{type}) ? $rec->{type} : ''; if ($rectype eq 'rule') { dump_fields($stmt_mode, $fields, ' { '); } elsif ($rectype eq 'addon') { add_to_buffer('rules', ' { '); } #add_to_buffer( $stream, $_ ); #We have an array to add to the buffer, we'll add it ourself instead of #calling add_to_buffer, which does not know about arrays push(@{ $buff{$buffer} }, @{ $rec->{lines} }); if ($rectype eq 'addon') { dump_fields($stmt_mode, $fields, ''); } # if we added something (ie there are lines in our array), return 1 return 1 if (scalar(@{ $rec->{lines} }) > 0); return 0; } # include_addon does this same thing, but does not call this # sub... so if you change this, you need to fix include_addon too # Pass: buffer_name, string_to_append sub add_to_buffer { push(@{ $buff{ $_[0] } }, "$_[1]\n"); return; } sub dump_buffer { my ($buffer) = @_; print $outfh '/* ', $buffer, ' */', "\n"; my $ref = $buff{$buffer}; print $outfh @$ref; return; } sub dump_fields { my ($mode, $flds, $ln) = @_; my $len = scalar(@$flds); if ($mode == 0) { #Normal add_to_buffer('rules', $ln); if ($has_feature_not_supported and not $has_if_command) { # The backend unconditionally reports # FEATURE_NOT_SUPPORTED in this rule, so let's emit # a warning on the ecpg side. add_to_buffer('rules', 'mmerror(PARSE_ERROR, ET_WARNING, "unsupported feature will be passed to server");' ); } $has_feature_not_supported = 0; $has_if_command = 0; if ($len == 0) { # We have no fields ? add_to_buffer('rules', ' $$=EMPTY; }'); } else { # Go through each field and try to 'aggregate' the tokens # into a single 'mm_strdup' where possible my @flds_new; my $str; for (my $z = 0; $z < $len; $z++) { if (substr($flds->[$z], 0, 1) eq '$') { push(@flds_new, $flds->[$z]); next; } $str = $flds->[$z]; while (1) { if ($z >= $len - 1 || substr($flds->[ $z + 1 ], 0, 1) eq '$') { # We're at the end... push(@flds_new, "mm_strdup(\"$str\")"); last; } $z++; $str = $str . ' ' . $flds->[$z]; } } # So - how many fields did we end up with ? $len = scalar(@flds_new); if ($len == 1) { # Straight assignment $str = ' $$ = ' . $flds_new[0] . ';'; add_to_buffer('rules', $str); } else { # Need to concatenate the results to form # our final string $str = ' $$ = cat_str(' . $len . ',' . join(',', @flds_new) . ');'; add_to_buffer('rules', $str); } add_to_buffer('rules', '}'); } } else { # we're in the stmt: rule if ($len) { # or just the statement ... add_to_buffer('rules', ' { output_statement($1, 0, ECPGst_normal); }'); } else { add_to_buffer('rules', ' { $$ = NULL; }'); } } return; } sub dump_line { my ($stmt_mode, $fields) = @_; my $block = $non_term_id . $line; $block =~ tr/ |//d; my $rep = $replace_line{$block}; if ($rep) { if ($rep eq 'ignore') { return 0; } if (index($line, '|') != -1) { $line = '| ' . $rep; } else { $line = $rep; } $block = $non_term_id . $line; $block =~ tr/ |//d; } add_to_buffer('rules', $line); my $i = include_addon('rules', $block, $fields, $stmt_mode); if ($i == 0) { dump_fields($stmt_mode, $fields, ' { '); } return 1; } =top load addons into cache %addons = { stmtClosePortalStmt => { 'type' => 'block', 'lines' => [ "{", "if (INFORMIX_MODE)" ..., "}" ] }, stmtViewStmt => { 'type' => 'rule', 'lines' => [ "| ECPGAllocateDescr", ... ] } } =cut sub preload_addons { my $filename = $srcdir . "/ecpg.addons"; open(my $fh, '<', $filename) or die; # there may be multiple lines starting ECPG: and then multiple lines of code. # the code need to be add to all prior ECPG records. my (@needsRules, @code, $record); # there may be comments before the first ECPG line, skip them my $skip = 1; while (<$fh>) { if (/^ECPG:\s(\S+)\s?(\w+)?/) { $skip = 0; if (@code) { for my $x (@needsRules) { push(@{ $x->{lines} }, @code); } @code = (); @needsRules = (); } $record = {}; $record->{type} = $2; $record->{lines} = []; if (exists $addons{$1}) { die "Ga! there are dups!\n"; } $addons{$1} = $record; push(@needsRules, $record); } else { next if $skip; push(@code, $_); } } close($fh); if (@code) { for my $x (@needsRules) { push(@{ $x->{lines} }, @code); } } return; }