summaryrefslogtreecommitdiffstats
path: root/src/bin/psql/psqlscanslash.l
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/bin/psql/psqlscanslash.l
parentInitial commit. (diff)
downloadpostgresql-14-upstream.tar.xz
postgresql-14-upstream.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/bin/psql/psqlscanslash.l829
1 files changed, 829 insertions, 0 deletions
diff --git a/src/bin/psql/psqlscanslash.l b/src/bin/psql/psqlscanslash.l
new file mode 100644
index 0000000..51aa33e
--- /dev/null
+++ b/src/bin/psql/psqlscanslash.l
@@ -0,0 +1,829 @@
+%top{
+/*-------------------------------------------------------------------------
+ *
+ * psqlscanslash.l
+ * lexical scanner for psql backslash commands
+ *
+ * XXX Avoid creating backtracking cases --- see the backend lexer for info.
+ *
+ * See fe_utils/psqlscan_int.h for additional commentary.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/bin/psql/psqlscanslash.l
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include "psqlscanslash.h"
+#include "common/logging.h"
+#include "fe_utils/conditional.h"
+
+#include "libpq-fe.h"
+}
+
+%{
+#include "fe_utils/psqlscan_int.h"
+
+/*
+ * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
+ * doesn't presently make use of that argument, so just declare it as int.
+ */
+typedef int YYSTYPE;
+
+/*
+ * Set the type of yyextra; we use it as a pointer back to the containing
+ * PsqlScanState.
+ */
+#define YY_EXTRA_TYPE PsqlScanState
+
+/*
+ * These variables do not need to be saved across calls. Yeah, it's a bit
+ * of a hack, but putting them into PsqlScanStateData would be klugy too.
+ */
+static enum slash_option_type option_type;
+static char *option_quote;
+static int unquoted_option_chars;
+static int backtick_start_offset;
+
+
+/* Return values from yylex() */
+#define LEXRES_EOL 0 /* end of input */
+#define LEXRES_OK 1 /* OK completion of backslash argument */
+
+
+static void evaluate_backtick(PsqlScanState state);
+
+#define ECHO psqlscan_emit(cur_state, yytext, yyleng)
+
+/*
+ * Work around a bug in flex 2.5.35: it emits a couple of functions that
+ * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
+ * this would cause warnings. Providing our own declarations should be
+ * harmless even when the bug gets fixed.
+ */
+extern int slash_yyget_column(yyscan_t yyscanner);
+extern void slash_yyset_column(int column_no, yyscan_t yyscanner);
+
+/* LCOV_EXCL_START */
+
+%}
+
+/* Except for the prefix, these options should match psqlscan.l */
+%option reentrant
+%option bison-bridge
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option warn
+%option prefix="slash_yy"
+
+/*
+ * OK, here is a short description of lex/flex rules behavior.
+ * The longest pattern which matches an input string is always chosen.
+ * For equal-length patterns, the first occurring in the rules list is chosen.
+ * INITIAL is the starting state, to which all non-conditional rules apply.
+ * Exclusive states change parsing rules while the state is active. When in
+ * an exclusive state, only those rules defined for that state apply.
+ */
+
+/* Exclusive states for lexing backslash commands */
+%x xslashcmd
+%x xslashargstart
+%x xslasharg
+%x xslashquote
+%x xslashbackquote
+%x xslashdquote
+%x xslashwholeline
+%x xslashend
+
+/*
+ * Assorted character class definitions that should match psqlscan.l.
+ */
+space [ \t\n\r\f]
+quote '
+xeoctesc [\\][0-7]{1,3}
+xehexesc [\\]x[0-9A-Fa-f]{1,2}
+xqdouble {quote}{quote}
+dquote \"
+variable_char [A-Za-z\200-\377_0-9]
+
+other .
+
+%%
+
+%{
+ /* Declare some local variables inside yylex(), for convenience */
+ PsqlScanState cur_state = yyextra;
+ PQExpBuffer output_buf = cur_state->output_buf;
+
+ /*
+ * Force flex into the state indicated by start_state. This has a
+ * couple of purposes: it lets some of the functions below set a new
+ * starting state without ugly direct access to flex variables, and it
+ * allows us to transition from one flex lexer to another so that we
+ * can lex different parts of the source string using separate lexers.
+ */
+ BEGIN(cur_state->start_state);
+%}
+
+ /*
+ * We don't really expect to be invoked in the INITIAL state in this
+ * lexer; but if we are, just spit data to the output_buf until EOF.
+ */
+
+{other}|\n { ECHO; }
+
+ /*
+ * Exclusive lexer states to handle backslash command lexing
+ */
+
+<xslashcmd>{
+ /* command name ends at whitespace or backslash; eat all else */
+
+{space}|"\\" {
+ yyless(0);
+ cur_state->start_state = YY_START;
+ return LEXRES_OK;
+ }
+
+{other} { ECHO; }
+
+}
+
+<xslashargstart>{
+ /*
+ * Discard any whitespace before argument, then go to xslasharg state.
+ * An exception is that "|" is only special at start of argument, so we
+ * check for it here.
+ */
+
+{space}+ { }
+
+"|" {
+ if (option_type == OT_FILEPIPE)
+ {
+ /* treat like whole-string case */
+ ECHO;
+ BEGIN(xslashwholeline);
+ }
+ else
+ {
+ /* vertical bar is not special otherwise */
+ yyless(0);
+ BEGIN(xslasharg);
+ }
+ }
+
+{other} {
+ yyless(0);
+ BEGIN(xslasharg);
+ }
+
+}
+
+<xslasharg>{
+ /*
+ * Default processing of text in a slash command's argument.
+ *
+ * Note: unquoted_option_chars counts the number of characters at the
+ * end of the argument that were not subject to any form of quoting.
+ * psql_scan_slash_option needs this to strip trailing semicolons safely.
+ */
+
+{space}|"\\" {
+ /*
+ * Unquoted space is end of arg; do not eat. Likewise
+ * backslash is end of command or next command, do not eat
+ *
+ * XXX this means we can't conveniently accept options
+ * that include unquoted backslashes; therefore, option
+ * processing that encourages use of backslashes is rather
+ * broken.
+ */
+ yyless(0);
+ cur_state->start_state = YY_START;
+ return LEXRES_OK;
+ }
+
+{quote} {
+ *option_quote = '\'';
+ unquoted_option_chars = 0;
+ BEGIN(xslashquote);
+ }
+
+"`" {
+ backtick_start_offset = output_buf->len;
+ *option_quote = '`';
+ unquoted_option_chars = 0;
+ BEGIN(xslashbackquote);
+ }
+
+{dquote} {
+ ECHO;
+ *option_quote = '"';
+ unquoted_option_chars = 0;
+ BEGIN(xslashdquote);
+ }
+
+:{variable_char}+ {
+ /* Possible psql variable substitution */
+ if (cur_state->callbacks->get_variable == NULL)
+ ECHO;
+ else
+ {
+ char *varname;
+ char *value;
+
+ varname = psqlscan_extract_substring(cur_state,
+ yytext + 1,
+ yyleng - 1);
+ value = cur_state->callbacks->get_variable(varname,
+ PQUOTE_PLAIN,
+ cur_state->cb_passthrough);
+ free(varname);
+
+ /*
+ * The variable value is just emitted without any
+ * further examination. This is consistent with the
+ * pre-8.0 code behavior, if not with the way that
+ * variables are handled outside backslash commands.
+ * Note that we needn't guard against recursion here.
+ */
+ if (value)
+ {
+ appendPQExpBufferStr(output_buf, value);
+ free(value);
+ }
+ else
+ ECHO;
+
+ *option_quote = ':';
+ }
+ unquoted_option_chars = 0;
+ }
+
+:'{variable_char}+' {
+ psqlscan_escape_variable(cur_state, yytext, yyleng,
+ PQUOTE_SQL_LITERAL);
+ *option_quote = ':';
+ unquoted_option_chars = 0;
+ }
+
+
+:\"{variable_char}+\" {
+ psqlscan_escape_variable(cur_state, yytext, yyleng,
+ PQUOTE_SQL_IDENT);
+ *option_quote = ':';
+ unquoted_option_chars = 0;
+ }
+
+:\{\?{variable_char}+\} {
+ psqlscan_test_variable(cur_state, yytext, yyleng);
+ }
+
+:'{variable_char}* {
+ /* Throw back everything but the colon */
+ yyless(1);
+ unquoted_option_chars++;
+ ECHO;
+ }
+
+:\"{variable_char}* {
+ /* Throw back everything but the colon */
+ yyless(1);
+ unquoted_option_chars++;
+ ECHO;
+ }
+
+:\{\?{variable_char}* {
+ /* Throw back everything but the colon */
+ yyless(1);
+ unquoted_option_chars++;
+ ECHO;
+ }
+
+:\{ {
+ /* Throw back everything but the colon */
+ yyless(1);
+ unquoted_option_chars++;
+ ECHO;
+ }
+
+{other} {
+ unquoted_option_chars++;
+ ECHO;
+ }
+
+}
+
+<xslashquote>{
+ /*
+ * single-quoted text: copy literally except for '' and backslash
+ * sequences
+ */
+
+{quote} { BEGIN(xslasharg); }
+
+{xqdouble} { appendPQExpBufferChar(output_buf, '\''); }
+
+"\\n" { appendPQExpBufferChar(output_buf, '\n'); }
+"\\t" { appendPQExpBufferChar(output_buf, '\t'); }
+"\\b" { appendPQExpBufferChar(output_buf, '\b'); }
+"\\r" { appendPQExpBufferChar(output_buf, '\r'); }
+"\\f" { appendPQExpBufferChar(output_buf, '\f'); }
+
+{xeoctesc} {
+ /* octal case */
+ appendPQExpBufferChar(output_buf,
+ (char) strtol(yytext + 1, NULL, 8));
+ }
+
+{xehexesc} {
+ /* hex case */
+ appendPQExpBufferChar(output_buf,
+ (char) strtol(yytext + 2, NULL, 16));
+ }
+
+"\\". { psqlscan_emit(cur_state, yytext + 1, 1); }
+
+{other}|\n { ECHO; }
+
+}
+
+<xslashbackquote>{
+ /*
+ * backticked text: copy everything until next backquote (expanding
+ * variable references, but doing nought else), then evaluate.
+ */
+
+"`" {
+ /* In an inactive \if branch, don't evaluate the command */
+ if (cur_state->cb_passthrough == NULL ||
+ conditional_active((ConditionalStack) cur_state->cb_passthrough))
+ evaluate_backtick(cur_state);
+ BEGIN(xslasharg);
+ }
+
+:{variable_char}+ {
+ /* Possible psql variable substitution */
+ if (cur_state->callbacks->get_variable == NULL)
+ ECHO;
+ else
+ {
+ char *varname;
+ char *value;
+
+ varname = psqlscan_extract_substring(cur_state,
+ yytext + 1,
+ yyleng - 1);
+ value = cur_state->callbacks->get_variable(varname,
+ PQUOTE_PLAIN,
+ cur_state->cb_passthrough);
+ free(varname);
+
+ if (value)
+ {
+ appendPQExpBufferStr(output_buf, value);
+ free(value);
+ }
+ else
+ ECHO;
+ }
+ }
+
+:'{variable_char}+' {
+ psqlscan_escape_variable(cur_state, yytext, yyleng,
+ PQUOTE_SHELL_ARG);
+ }
+
+:'{variable_char}* {
+ /* Throw back everything but the colon */
+ yyless(1);
+ ECHO;
+ }
+
+{other}|\n { ECHO; }
+
+}
+
+<xslashdquote>{
+ /* double-quoted text: copy verbatim, including the double quotes */
+
+{dquote} {
+ ECHO;
+ BEGIN(xslasharg);
+ }
+
+{other}|\n { ECHO; }
+
+}
+
+<xslashwholeline>{
+ /* copy everything until end of input line */
+ /* but suppress leading whitespace */
+
+{space}+ {
+ if (output_buf->len > 0)
+ ECHO;
+ }
+
+{other} { ECHO; }
+
+}
+
+<xslashend>{
+ /* at end of command, eat a double backslash, but not anything else */
+
+"\\\\" {
+ cur_state->start_state = YY_START;
+ return LEXRES_OK;
+ }
+
+{other}|\n {
+ yyless(0);
+ cur_state->start_state = YY_START;
+ return LEXRES_OK;
+ }
+
+}
+
+<<EOF>> {
+ if (cur_state->buffer_stack == NULL)
+ {
+ cur_state->start_state = YY_START;
+ return LEXRES_EOL; /* end of input reached */
+ }
+
+ /*
+ * We were expanding a variable, so pop the inclusion
+ * stack and keep lexing
+ */
+ psqlscan_pop_buffer_stack(cur_state);
+ psqlscan_select_top_buffer(cur_state);
+ }
+
+%%
+
+/* LCOV_EXCL_STOP */
+
+/*
+ * Scan the command name of a psql backslash command. This should be called
+ * after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
+ * has been consumed through the leading backslash.
+ *
+ * The return value is a malloc'd copy of the command name, as parsed off
+ * from the input.
+ */
+char *
+psql_scan_slash_command(PsqlScanState state)
+{
+ PQExpBufferData mybuf;
+
+ /* Must be scanning already */
+ Assert(state->scanbufhandle != NULL);
+
+ /* Build a local buffer that we'll return the data of */
+ initPQExpBuffer(&mybuf);
+
+ /* Set current output target */
+ state->output_buf = &mybuf;
+
+ /* Set input source */
+ if (state->buffer_stack != NULL)
+ yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
+ else
+ yy_switch_to_buffer(state->scanbufhandle, state->scanner);
+
+ /*
+ * Set lexer start state. Note that this is sufficient to switch
+ * state->scanner over to using the tables in this lexer file.
+ */
+ state->start_state = xslashcmd;
+
+ /* And lex. */
+ yylex(NULL, state->scanner);
+
+ /* There are no possible errors in this lex state... */
+
+ /*
+ * In case the caller returns to using the regular SQL lexer, reselect the
+ * appropriate initial state.
+ */
+ psql_scan_reselect_sql_lexer(state);
+
+ return mybuf.data;
+}
+
+/*
+ * Parse off the next argument for a backslash command, and return it as a
+ * malloc'd string. If there are no more arguments, returns NULL.
+ *
+ * type tells what processing, if any, to perform on the option string;
+ * for example, if it's a SQL identifier, we want to downcase any unquoted
+ * letters.
+ *
+ * if quote is not NULL, *quote is set to 0 if no quoting was found, else
+ * the last quote symbol used in the argument.
+ *
+ * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
+ * be taken as part of the option string will be stripped.
+ *
+ * NOTE: the only possible syntax errors for backslash options are unmatched
+ * quotes, which are detected when we run out of input. Therefore, on a
+ * syntax error we just throw away the string and return NULL; there is no
+ * need to worry about flushing remaining input.
+ */
+char *
+psql_scan_slash_option(PsqlScanState state,
+ enum slash_option_type type,
+ char *quote,
+ bool semicolon)
+{
+ PQExpBufferData mybuf;
+ int lexresult PG_USED_FOR_ASSERTS_ONLY;
+ int final_state;
+ char local_quote;
+
+ /* Must be scanning already */
+ Assert(state->scanbufhandle != NULL);
+
+ if (quote == NULL)
+ quote = &local_quote;
+ *quote = 0;
+
+ /* Build a local buffer that we'll return the data of */
+ initPQExpBuffer(&mybuf);
+
+ /* Set up static variables that will be used by yylex */
+ option_type = type;
+ option_quote = quote;
+ unquoted_option_chars = 0;
+
+ /* Set current output target */
+ state->output_buf = &mybuf;
+
+ /* Set input source */
+ if (state->buffer_stack != NULL)
+ yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
+ else
+ yy_switch_to_buffer(state->scanbufhandle, state->scanner);
+
+ /* Set lexer start state */
+ if (type == OT_WHOLE_LINE)
+ state->start_state = xslashwholeline;
+ else
+ state->start_state = xslashargstart;
+
+ /* And lex. */
+ lexresult = yylex(NULL, state->scanner);
+
+ /* Save final state for a moment... */
+ final_state = state->start_state;
+
+ /*
+ * In case the caller returns to using the regular SQL lexer, reselect the
+ * appropriate initial state.
+ */
+ psql_scan_reselect_sql_lexer(state);
+
+ /*
+ * Check the lex result: we should have gotten back either LEXRES_OK
+ * or LEXRES_EOL (the latter indicating end of string). If we were inside
+ * a quoted string, as indicated by final_state, EOL is an error.
+ */
+ Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
+
+ switch (final_state)
+ {
+ case xslashargstart:
+ /* empty arg */
+ break;
+ case xslasharg:
+ /* Strip any unquoted trailing semi-colons if requested */
+ if (semicolon)
+ {
+ while (unquoted_option_chars-- > 0 &&
+ mybuf.len > 0 &&
+ mybuf.data[mybuf.len - 1] == ';')
+ {
+ mybuf.data[--mybuf.len] = '\0';
+ }
+ }
+
+ /*
+ * If SQL identifier processing was requested, then we strip out
+ * excess double quotes and optionally downcase unquoted letters.
+ */
+ if (type == OT_SQLID || type == OT_SQLIDHACK)
+ {
+ dequote_downcase_identifier(mybuf.data,
+ (type != OT_SQLIDHACK),
+ state->encoding);
+ /* update mybuf.len for possible shortening */
+ mybuf.len = strlen(mybuf.data);
+ }
+ break;
+ case xslashquote:
+ case xslashbackquote:
+ case xslashdquote:
+ /* must have hit EOL inside quotes */
+ pg_log_error("unterminated quoted string");
+ termPQExpBuffer(&mybuf);
+ return NULL;
+ case xslashwholeline:
+ /* always okay */
+ break;
+ default:
+ /* can't get here */
+ fprintf(stderr, "invalid YY_START\n");
+ exit(1);
+ }
+
+ /*
+ * An unquoted empty argument isn't possible unless we are at end of
+ * command. Return NULL instead.
+ */
+ if (mybuf.len == 0 && *quote == 0)
+ {
+ termPQExpBuffer(&mybuf);
+ return NULL;
+ }
+
+ /* Else return the completed string. */
+ return mybuf.data;
+}
+
+/*
+ * Eat up any unused \\ to complete a backslash command.
+ */
+void
+psql_scan_slash_command_end(PsqlScanState state)
+{
+ /* Must be scanning already */
+ Assert(state->scanbufhandle != NULL);
+
+ /* Set current output target */
+ state->output_buf = NULL; /* we won't output anything */
+
+ /* Set input source */
+ if (state->buffer_stack != NULL)
+ yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
+ else
+ yy_switch_to_buffer(state->scanbufhandle, state->scanner);
+
+ /* Set lexer start state */
+ state->start_state = xslashend;
+
+ /* And lex. */
+ yylex(NULL, state->scanner);
+
+ /* There are no possible errors in this lex state... */
+
+ /*
+ * We expect the caller to return to using the regular SQL lexer, so
+ * reselect the appropriate initial state.
+ */
+ psql_scan_reselect_sql_lexer(state);
+}
+
+/*
+ * Fetch current paren nesting depth
+ */
+int
+psql_scan_get_paren_depth(PsqlScanState state)
+{
+ return state->paren_depth;
+}
+
+/*
+ * Set paren nesting depth
+ */
+void
+psql_scan_set_paren_depth(PsqlScanState state, int depth)
+{
+ Assert(depth >= 0);
+ state->paren_depth = depth;
+}
+
+/*
+ * De-quote and optionally downcase a SQL identifier.
+ *
+ * The string at *str is modified in-place; it can become shorter,
+ * but not longer.
+ *
+ * If downcase is true then non-quoted letters are folded to lower case.
+ * Ideally this behavior will match the backend's downcase_identifier();
+ * but note that it could differ if LC_CTYPE is different in the frontend.
+ *
+ * Note that a string like FOO"BAR"BAZ will be converted to fooBARbaz;
+ * this is somewhat inconsistent with the SQL spec, which would have us
+ * parse it as several identifiers. But for psql's purposes, we want a
+ * string like "foo"."bar" to be treated as one option, so there's little
+ * choice; this routine doesn't get to change the token boundaries.
+ */
+void
+dequote_downcase_identifier(char *str, bool downcase, int encoding)
+{
+ bool inquotes = false;
+ char *cp = str;
+
+ while (*cp)
+ {
+ if (*cp == '"')
+ {
+ if (inquotes && cp[1] == '"')
+ {
+ /* Keep the first quote, remove the second */
+ cp++;
+ }
+ else
+ inquotes = !inquotes;
+ /* Collapse out quote at *cp */
+ memmove(cp, cp + 1, strlen(cp));
+ /* do not advance cp */
+ }
+ else
+ {
+ if (downcase && !inquotes)
+ *cp = pg_tolower((unsigned char) *cp);
+ cp += PQmblenBounded(cp, encoding);
+ }
+ }
+}
+
+/*
+ * Evaluate a backticked substring of a slash command's argument.
+ *
+ * The portion of output_buf starting at backtick_start_offset is evaluated
+ * as a shell command and then replaced by the command's output.
+ */
+static void
+evaluate_backtick(PsqlScanState state)
+{
+ PQExpBuffer output_buf = state->output_buf;
+ char *cmd = output_buf->data + backtick_start_offset;
+ PQExpBufferData cmd_output;
+ FILE *fd;
+ bool error = false;
+ char buf[512];
+ size_t result;
+
+ initPQExpBuffer(&cmd_output);
+
+ fd = popen(cmd, "r");
+ if (!fd)
+ {
+ pg_log_error("%s: %m", cmd);
+ error = true;
+ }
+
+ if (!error)
+ {
+ do
+ {
+ result = fread(buf, 1, sizeof(buf), fd);
+ if (ferror(fd))
+ {
+ pg_log_error("%s: %m", cmd);
+ error = true;
+ break;
+ }
+ appendBinaryPQExpBuffer(&cmd_output, buf, result);
+ } while (!feof(fd));
+ }
+
+ if (fd && pclose(fd) == -1)
+ {
+ pg_log_error("%s: %m", cmd);
+ error = true;
+ }
+
+ if (PQExpBufferDataBroken(cmd_output))
+ {
+ pg_log_error("%s: out of memory", cmd);
+ error = true;
+ }
+
+ /* Now done with cmd, delete it from output_buf */
+ output_buf->len = backtick_start_offset;
+ output_buf->data[output_buf->len] = '\0';
+
+ /* If no error, transfer result to output_buf */
+ if (!error)
+ {
+ /* strip any trailing newline (but only one) */
+ if (cmd_output.len > 0 &&
+ cmd_output.data[cmd_output.len - 1] == '\n')
+ cmd_output.len--;
+ appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
+ }
+
+ termPQExpBuffer(&cmd_output);
+}