summaryrefslogtreecommitdiffstats
path: root/src/bin/pgbench/exprscan.l
diff options
context:
space:
mode:
Diffstat (limited to 'src/bin/pgbench/exprscan.l')
-rw-r--r--src/bin/pgbench/exprscan.l463
1 files changed, 463 insertions, 0 deletions
diff --git a/src/bin/pgbench/exprscan.l b/src/bin/pgbench/exprscan.l
new file mode 100644
index 0000000..75432ce
--- /dev/null
+++ b/src/bin/pgbench/exprscan.l
@@ -0,0 +1,463 @@
+%{
+/*-------------------------------------------------------------------------
+ *
+ * exprscan.l
+ * lexical scanner for pgbench backslash commands
+ *
+ * This lexer supports two operating modes:
+ *
+ * In INITIAL state, just parse off whitespace-separated words (this mode
+ * is basically equivalent to strtok(), which is what we used to use).
+ *
+ * In EXPR state, lex for the simple expression syntax of exprparse.y.
+ *
+ * In either mode, stop upon hitting newline or end of string.
+ *
+ * Note that this lexer operates within the framework created by psqlscan.l,
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/bin/pgbench/exprscan.l
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "fe_utils/psqlscan_int.h"
+
+/* context information for reporting errors in expressions */
+static const char *expr_source = NULL;
+static int expr_lineno = 0;
+static int expr_start_offset = 0;
+static const char *expr_command = NULL;
+
+/* indicates whether last yylex() call read a newline */
+static bool last_was_newline = false;
+
+/*
+ * Work around a bug in flex 2.5.35: it emits a couple of functions that
+ * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
+ * this would cause warnings. Providing our own declarations should be
+ * harmless even when the bug gets fixed.
+ */
+extern int expr_yyget_column(yyscan_t yyscanner);
+extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
+
+/* LCOV_EXCL_START */
+
+%}
+
+/* Except for the prefix, these options should match psqlscan.l */
+%option reentrant
+%option bison-bridge
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option warn
+%option prefix="expr_yy"
+
+/* Character classes */
+alpha [a-zA-Z\200-\377_]
+digit [0-9]
+alnum [A-Za-z\200-\377_0-9]
+/* {space} + {nonspace} + {newline} should cover all characters */
+space [ \t\r\f\v]
+nonspace [^ \t\r\f\v\n]
+newline [\n]
+
+/* Line continuation marker */
+continuation \\\r?{newline}
+
+/* case insensitive keywords */
+and [Aa][Nn][Dd]
+or [Oo][Rr]
+not [Nn][Oo][Tt]
+case [Cc][Aa][Ss][Ee]
+when [Ww][Hh][Ee][Nn]
+then [Tt][Hh][Ee][Nn]
+else [Ee][Ll][Ss][Ee]
+end [Ee][Nn][Dd]
+true [Tt][Rr][Uu][Ee]
+false [Ff][Aa][Ll][Ss][Ee]
+null [Nn][Uu][Ll][Ll]
+is [Ii][Ss]
+isnull [Ii][Ss][Nn][Uu][Ll][Ll]
+notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll]
+
+/* Exclusive states */
+%x EXPR
+
+%%
+
+%{
+ /* Declare some local variables inside yylex(), for convenience */
+ PsqlScanState cur_state = yyextra;
+
+ /*
+ * Force flex into the state indicated by start_state. This has a
+ * couple of purposes: it lets some of the functions below set a new
+ * starting state without ugly direct access to flex variables, and it
+ * allows us to transition from one flex lexer to another so that we
+ * can lex different parts of the source string using separate lexers.
+ */
+ BEGIN(cur_state->start_state);
+
+ /* Reset was-newline flag */
+ last_was_newline = false;
+%}
+
+ /* INITIAL state */
+
+{nonspace}+ {
+ /* Found a word, emit and return it */
+ psqlscan_emit(cur_state, yytext, yyleng);
+ return 1;
+ }
+
+ /*
+ * We need this rule to avoid returning "word\" instead of recognizing
+ * a continuation marker just after a word:
+ */
+{nonspace}+{continuation} {
+ /* Found "word\\\r?\n", emit and return just "word" */
+ int wordlen = yyleng - 2;
+ if (yytext[wordlen] == '\r')
+ wordlen--;
+ Assert(yytext[wordlen] == '\\');
+ psqlscan_emit(cur_state, yytext, wordlen);
+ return 1;
+ }
+
+{space}+ { /* ignore */ }
+
+{continuation} { /* ignore */ }
+
+{newline} {
+ /* report end of command */
+ last_was_newline = true;
+ return 0;
+ }
+
+ /* EXPR state */
+
+<EXPR>{
+
+"+" { return '+'; }
+"-" { return '-'; }
+"*" { return '*'; }
+"/" { return '/'; }
+"%" { return '%'; } /* C version, also in Pg SQL */
+"=" { return '='; }
+"<>" { return NE_OP; }
+"!=" { return NE_OP; } /* C version, also in Pg SQL */
+"<=" { return LE_OP; }
+">=" { return GE_OP; }
+"<<" { return LS_OP; }
+">>" { return RS_OP; }
+"<" { return '<'; }
+">" { return '>'; }
+"|" { return '|'; }
+"&" { return '&'; }
+"#" { return '#'; }
+"~" { return '~'; }
+
+"(" { return '('; }
+")" { return ')'; }
+"," { return ','; }
+
+{and} { return AND_OP; }
+{or} { return OR_OP; }
+{not} { return NOT_OP; }
+{is} { return IS_OP; }
+{isnull} { return ISNULL_OP; }
+{notnull} { return NOTNULL_OP; }
+
+{case} { return CASE_KW; }
+{when} { return WHEN_KW; }
+{then} { return THEN_KW; }
+{else} { return ELSE_KW; }
+{end} { return END_KW; }
+
+:{alnum}+ {
+ yylval->str = pg_strdup(yytext + 1);
+ return VARIABLE;
+ }
+
+{null} { return NULL_CONST; }
+{true} {
+ yylval->bval = true;
+ return BOOLEAN_CONST;
+ }
+{false} {
+ yylval->bval = false;
+ return BOOLEAN_CONST;
+ }
+"9223372036854775808" {
+ /*
+ * Special handling for PG_INT64_MIN, which can't
+ * accurately be represented here, as the minus sign is
+ * lexed separately and INT64_MIN can't be represented as
+ * a positive integer.
+ */
+ return MAXINT_PLUS_ONE_CONST;
+ }
+{digit}+ {
+ if (!strtoint64(yytext, true, &yylval->ival))
+ expr_yyerror_more(yyscanner, "bigint constant overflow",
+ strdup(yytext));
+ return INTEGER_CONST;
+ }
+{digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
+ if (!strtodouble(yytext, true, &yylval->dval))
+ expr_yyerror_more(yyscanner, "double constant overflow",
+ strdup(yytext));
+ return DOUBLE_CONST;
+ }
+\.{digit}+([eE][-+]?{digit}+)? {
+ if (!strtodouble(yytext, true, &yylval->dval))
+ expr_yyerror_more(yyscanner, "double constant overflow",
+ strdup(yytext));
+ return DOUBLE_CONST;
+ }
+{alpha}{alnum}* {
+ yylval->str = pg_strdup(yytext);
+ return FUNCTION;
+ }
+
+{space}+ { /* ignore */ }
+
+{continuation} { /* ignore */ }
+
+{newline} {
+ /* report end of command */
+ last_was_newline = true;
+ return 0;
+ }
+
+. {
+ /*
+ * must strdup yytext so that expr_yyerror_more doesn't
+ * change it while finding end of line
+ */
+ expr_yyerror_more(yyscanner, "unexpected character",
+ pg_strdup(yytext));
+ /* NOTREACHED, syntax_error calls exit() */
+ return 0;
+ }
+
+}
+
+<<EOF>> {
+ if (cur_state->buffer_stack == NULL)
+ return 0; /* end of input reached */
+
+ /*
+ * We were expanding a variable, so pop the inclusion
+ * stack and keep lexing
+ */
+ psqlscan_pop_buffer_stack(cur_state);
+ psqlscan_select_top_buffer(cur_state);
+ }
+
+%%
+
+/* LCOV_EXCL_STOP */
+
+void
+expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
+{
+ PsqlScanState state = yyget_extra(yyscanner);
+ int error_detection_offset = expr_scanner_offset(state) - 1;
+ YYSTYPE lval;
+ char *full_line;
+
+ /*
+ * While parsing an expression, we may not have collected the whole line
+ * yet from the input source. Lex till EOL so we can report whole line.
+ * (If we're at EOF, it's okay to call yylex() an extra time.)
+ */
+ if (!last_was_newline)
+ {
+ while (yylex(&lval, yyscanner))
+ /* skip */ ;
+ }
+
+ /* Extract the line, trimming trailing newline if any */
+ full_line = expr_scanner_get_substring(state,
+ expr_start_offset,
+ expr_scanner_offset(state),
+ true);
+
+ syntax_error(expr_source, expr_lineno, full_line, expr_command,
+ message, more, error_detection_offset - expr_start_offset);
+}
+
+void
+expr_yyerror(yyscan_t yyscanner, const char *message)
+{
+ expr_yyerror_more(yyscanner, message, NULL);
+}
+
+/*
+ * Collect a space-separated word from a backslash command and return it
+ * in word_buf, along with its starting string offset in *offset.
+ * Returns true if successful, false if at end of command.
+ */
+bool
+expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
+{
+ int lexresult;
+ YYSTYPE lval;
+
+ /* Must be scanning already */
+ Assert(state->scanbufhandle != NULL);
+
+ /* Set current output target */
+ state->output_buf = word_buf;
+ resetPQExpBuffer(word_buf);
+
+ /* Set input source */
+ if (state->buffer_stack != NULL)
+ yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
+ else
+ yy_switch_to_buffer(state->scanbufhandle, state->scanner);
+
+ /* Set start state */
+ state->start_state = INITIAL;
+
+ /* And lex. */
+ lexresult = yylex(&lval, state->scanner);
+
+ /*
+ * Save start offset of word, if any. We could do this more efficiently,
+ * but for now this seems fine.
+ */
+ if (lexresult)
+ *offset = expr_scanner_offset(state) - word_buf->len;
+ else
+ *offset = -1;
+
+ /*
+ * In case the caller returns to using the regular SQL lexer, reselect the
+ * appropriate initial state.
+ */
+ psql_scan_reselect_sql_lexer(state);
+
+ return (bool) lexresult;
+}
+
+/*
+ * Prepare to lex an expression via expr_yyparse().
+ *
+ * Returns the yyscan_t that is to be passed to expr_yyparse().
+ * (This is just state->scanner, but callers don't need to know that.)
+ */
+yyscan_t
+expr_scanner_init(PsqlScanState state,
+ const char *source, int lineno, int start_offset,
+ const char *command)
+{
+ /* Save error context info */
+ expr_source = source;
+ expr_lineno = lineno;
+ expr_start_offset = start_offset;
+ expr_command = command;
+
+ /* Must be scanning already */
+ Assert(state->scanbufhandle != NULL);
+
+ /* Set current output target */
+ state->output_buf = NULL;
+
+ /* Set input source */
+ if (state->buffer_stack != NULL)
+ yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
+ else
+ yy_switch_to_buffer(state->scanbufhandle, state->scanner);
+
+ /* Set start state */
+ state->start_state = EXPR;
+
+ return state->scanner;
+}
+
+/*
+ * Finish lexing an expression.
+ */
+void
+expr_scanner_finish(yyscan_t yyscanner)
+{
+ PsqlScanState state = yyget_extra(yyscanner);
+
+ /*
+ * Reselect appropriate initial state for SQL lexer.
+ */
+ psql_scan_reselect_sql_lexer(state);
+}
+
+/*
+ * Get offset from start of string to end of current lexer token.
+ *
+ * We rely on the knowledge that flex modifies the scan buffer by storing
+ * a NUL at the end of the current token (yytext). Note that this might
+ * not work quite right if we were parsing a sub-buffer, but since pgbench
+ * never invokes that functionality, it doesn't matter.
+ */
+int
+expr_scanner_offset(PsqlScanState state)
+{
+ return strlen(state->scanbuf);
+}
+
+/*
+ * Get a malloc'd copy of the lexer input string from start_offset
+ * to just before end_offset. If chomp is true, drop any trailing
+ * newline(s).
+ */
+char *
+expr_scanner_get_substring(PsqlScanState state,
+ int start_offset, int end_offset,
+ bool chomp)
+{
+ char *result;
+ const char *scanptr = state->scanbuf + start_offset;
+ int slen = end_offset - start_offset;
+
+ Assert(slen >= 0);
+ Assert(end_offset <= strlen(state->scanbuf));
+
+ if (chomp)
+ {
+ while (slen > 0 &&
+ (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
+ slen--;
+ }
+
+ result = (char *) pg_malloc(slen + 1);
+ memcpy(result, scanptr, slen);
+ result[slen] = '\0';
+
+ return result;
+}
+
+/*
+ * Get the line number associated with the given string offset
+ * (which must not be past the end of where we've lexed to).
+ */
+int
+expr_scanner_get_lineno(PsqlScanState state, int offset)
+{
+ int lineno = 1;
+ const char *p = state->scanbuf;
+
+ while (*p && offset > 0)
+ {
+ if (*p == '\n')
+ lineno++;
+ p++, offset--;
+ }
+ return lineno;
+}