From 46651ce6fe013220ed397add242004d764fc0153 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:15:05 +0200 Subject: Adding upstream version 14.5. Signed-off-by: Daniel Baumann --- src/bin/psql/psqlscanslash.l | 829 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 829 insertions(+) create mode 100644 src/bin/psql/psqlscanslash.l (limited to 'src/bin/psql/psqlscanslash.l') diff --git a/src/bin/psql/psqlscanslash.l b/src/bin/psql/psqlscanslash.l new file mode 100644 index 0000000..51aa33e --- /dev/null +++ b/src/bin/psql/psqlscanslash.l @@ -0,0 +1,829 @@ +%top{ +/*------------------------------------------------------------------------- + * + * psqlscanslash.l + * lexical scanner for psql backslash commands + * + * XXX Avoid creating backtracking cases --- see the backend lexer for info. + * + * See fe_utils/psqlscan_int.h for additional commentary. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/bin/psql/psqlscanslash.l + * + *------------------------------------------------------------------------- + */ +#include "postgres_fe.h" + +#include "psqlscanslash.h" +#include "common/logging.h" +#include "fe_utils/conditional.h" + +#include "libpq-fe.h" +} + +%{ +#include "fe_utils/psqlscan_int.h" + +/* + * We must have a typedef YYSTYPE for yylex's first argument, but this lexer + * doesn't presently make use of that argument, so just declare it as int. + */ +typedef int YYSTYPE; + +/* + * Set the type of yyextra; we use it as a pointer back to the containing + * PsqlScanState. + */ +#define YY_EXTRA_TYPE PsqlScanState + +/* + * These variables do not need to be saved across calls. Yeah, it's a bit + * of a hack, but putting them into PsqlScanStateData would be klugy too. + */ +static enum slash_option_type option_type; +static char *option_quote; +static int unquoted_option_chars; +static int backtick_start_offset; + + +/* Return values from yylex() */ +#define LEXRES_EOL 0 /* end of input */ +#define LEXRES_OK 1 /* OK completion of backslash argument */ + + +static void evaluate_backtick(PsqlScanState state); + +#define ECHO psqlscan_emit(cur_state, yytext, yyleng) + +/* + * Work around a bug in flex 2.5.35: it emits a couple of functions that + * it forgets to emit declarations for. Since we use -Wmissing-prototypes, + * this would cause warnings. Providing our own declarations should be + * harmless even when the bug gets fixed. + */ +extern int slash_yyget_column(yyscan_t yyscanner); +extern void slash_yyset_column(int column_no, yyscan_t yyscanner); + +/* LCOV_EXCL_START */ + +%} + +/* Except for the prefix, these options should match psqlscan.l */ +%option reentrant +%option bison-bridge +%option 8bit +%option never-interactive +%option nodefault +%option noinput +%option nounput +%option noyywrap +%option warn +%option prefix="slash_yy" + +/* + * OK, here is a short description of lex/flex rules behavior. + * The longest pattern which matches an input string is always chosen. + * For equal-length patterns, the first occurring in the rules list is chosen. + * INITIAL is the starting state, to which all non-conditional rules apply. + * Exclusive states change parsing rules while the state is active. When in + * an exclusive state, only those rules defined for that state apply. + */ + +/* Exclusive states for lexing backslash commands */ +%x xslashcmd +%x xslashargstart +%x xslasharg +%x xslashquote +%x xslashbackquote +%x xslashdquote +%x xslashwholeline +%x xslashend + +/* + * Assorted character class definitions that should match psqlscan.l. + */ +space [ \t\n\r\f] +quote ' +xeoctesc [\\][0-7]{1,3} +xehexesc [\\]x[0-9A-Fa-f]{1,2} +xqdouble {quote}{quote} +dquote \" +variable_char [A-Za-z\200-\377_0-9] + +other . + +%% + +%{ + /* Declare some local variables inside yylex(), for convenience */ + PsqlScanState cur_state = yyextra; + PQExpBuffer output_buf = cur_state->output_buf; + + /* + * Force flex into the state indicated by start_state. This has a + * couple of purposes: it lets some of the functions below set a new + * starting state without ugly direct access to flex variables, and it + * allows us to transition from one flex lexer to another so that we + * can lex different parts of the source string using separate lexers. + */ + BEGIN(cur_state->start_state); +%} + + /* + * We don't really expect to be invoked in the INITIAL state in this + * lexer; but if we are, just spit data to the output_buf until EOF. + */ + +{other}|\n { ECHO; } + + /* + * Exclusive lexer states to handle backslash command lexing + */ + +{ + /* command name ends at whitespace or backslash; eat all else */ + +{space}|"\\" { + yyless(0); + cur_state->start_state = YY_START; + return LEXRES_OK; + } + +{other} { ECHO; } + +} + +{ + /* + * Discard any whitespace before argument, then go to xslasharg state. + * An exception is that "|" is only special at start of argument, so we + * check for it here. + */ + +{space}+ { } + +"|" { + if (option_type == OT_FILEPIPE) + { + /* treat like whole-string case */ + ECHO; + BEGIN(xslashwholeline); + } + else + { + /* vertical bar is not special otherwise */ + yyless(0); + BEGIN(xslasharg); + } + } + +{other} { + yyless(0); + BEGIN(xslasharg); + } + +} + +{ + /* + * Default processing of text in a slash command's argument. + * + * Note: unquoted_option_chars counts the number of characters at the + * end of the argument that were not subject to any form of quoting. + * psql_scan_slash_option needs this to strip trailing semicolons safely. + */ + +{space}|"\\" { + /* + * Unquoted space is end of arg; do not eat. Likewise + * backslash is end of command or next command, do not eat + * + * XXX this means we can't conveniently accept options + * that include unquoted backslashes; therefore, option + * processing that encourages use of backslashes is rather + * broken. + */ + yyless(0); + cur_state->start_state = YY_START; + return LEXRES_OK; + } + +{quote} { + *option_quote = '\''; + unquoted_option_chars = 0; + BEGIN(xslashquote); + } + +"`" { + backtick_start_offset = output_buf->len; + *option_quote = '`'; + unquoted_option_chars = 0; + BEGIN(xslashbackquote); + } + +{dquote} { + ECHO; + *option_quote = '"'; + unquoted_option_chars = 0; + BEGIN(xslashdquote); + } + +:{variable_char}+ { + /* Possible psql variable substitution */ + if (cur_state->callbacks->get_variable == NULL) + ECHO; + else + { + char *varname; + char *value; + + varname = psqlscan_extract_substring(cur_state, + yytext + 1, + yyleng - 1); + value = cur_state->callbacks->get_variable(varname, + PQUOTE_PLAIN, + cur_state->cb_passthrough); + free(varname); + + /* + * The variable value is just emitted without any + * further examination. This is consistent with the + * pre-8.0 code behavior, if not with the way that + * variables are handled outside backslash commands. + * Note that we needn't guard against recursion here. + */ + if (value) + { + appendPQExpBufferStr(output_buf, value); + free(value); + } + else + ECHO; + + *option_quote = ':'; + } + unquoted_option_chars = 0; + } + +:'{variable_char}+' { + psqlscan_escape_variable(cur_state, yytext, yyleng, + PQUOTE_SQL_LITERAL); + *option_quote = ':'; + unquoted_option_chars = 0; + } + + +:\"{variable_char}+\" { + psqlscan_escape_variable(cur_state, yytext, yyleng, + PQUOTE_SQL_IDENT); + *option_quote = ':'; + unquoted_option_chars = 0; + } + +:\{\?{variable_char}+\} { + psqlscan_test_variable(cur_state, yytext, yyleng); + } + +:'{variable_char}* { + /* Throw back everything but the colon */ + yyless(1); + unquoted_option_chars++; + ECHO; + } + +:\"{variable_char}* { + /* Throw back everything but the colon */ + yyless(1); + unquoted_option_chars++; + ECHO; + } + +:\{\?{variable_char}* { + /* Throw back everything but the colon */ + yyless(1); + unquoted_option_chars++; + ECHO; + } + +:\{ { + /* Throw back everything but the colon */ + yyless(1); + unquoted_option_chars++; + ECHO; + } + +{other} { + unquoted_option_chars++; + ECHO; + } + +} + +{ + /* + * single-quoted text: copy literally except for '' and backslash + * sequences + */ + +{quote} { BEGIN(xslasharg); } + +{xqdouble} { appendPQExpBufferChar(output_buf, '\''); } + +"\\n" { appendPQExpBufferChar(output_buf, '\n'); } +"\\t" { appendPQExpBufferChar(output_buf, '\t'); } +"\\b" { appendPQExpBufferChar(output_buf, '\b'); } +"\\r" { appendPQExpBufferChar(output_buf, '\r'); } +"\\f" { appendPQExpBufferChar(output_buf, '\f'); } + +{xeoctesc} { + /* octal case */ + appendPQExpBufferChar(output_buf, + (char) strtol(yytext + 1, NULL, 8)); + } + +{xehexesc} { + /* hex case */ + appendPQExpBufferChar(output_buf, + (char) strtol(yytext + 2, NULL, 16)); + } + +"\\". { psqlscan_emit(cur_state, yytext + 1, 1); } + +{other}|\n { ECHO; } + +} + +{ + /* + * backticked text: copy everything until next backquote (expanding + * variable references, but doing nought else), then evaluate. + */ + +"`" { + /* In an inactive \if branch, don't evaluate the command */ + if (cur_state->cb_passthrough == NULL || + conditional_active((ConditionalStack) cur_state->cb_passthrough)) + evaluate_backtick(cur_state); + BEGIN(xslasharg); + } + +:{variable_char}+ { + /* Possible psql variable substitution */ + if (cur_state->callbacks->get_variable == NULL) + ECHO; + else + { + char *varname; + char *value; + + varname = psqlscan_extract_substring(cur_state, + yytext + 1, + yyleng - 1); + value = cur_state->callbacks->get_variable(varname, + PQUOTE_PLAIN, + cur_state->cb_passthrough); + free(varname); + + if (value) + { + appendPQExpBufferStr(output_buf, value); + free(value); + } + else + ECHO; + } + } + +:'{variable_char}+' { + psqlscan_escape_variable(cur_state, yytext, yyleng, + PQUOTE_SHELL_ARG); + } + +:'{variable_char}* { + /* Throw back everything but the colon */ + yyless(1); + ECHO; + } + +{other}|\n { ECHO; } + +} + +{ + /* double-quoted text: copy verbatim, including the double quotes */ + +{dquote} { + ECHO; + BEGIN(xslasharg); + } + +{other}|\n { ECHO; } + +} + +{ + /* copy everything until end of input line */ + /* but suppress leading whitespace */ + +{space}+ { + if (output_buf->len > 0) + ECHO; + } + +{other} { ECHO; } + +} + +{ + /* at end of command, eat a double backslash, but not anything else */ + +"\\\\" { + cur_state->start_state = YY_START; + return LEXRES_OK; + } + +{other}|\n { + yyless(0); + cur_state->start_state = YY_START; + return LEXRES_OK; + } + +} + +<> { + if (cur_state->buffer_stack == NULL) + { + cur_state->start_state = YY_START; + return LEXRES_EOL; /* end of input reached */ + } + + /* + * We were expanding a variable, so pop the inclusion + * stack and keep lexing + */ + psqlscan_pop_buffer_stack(cur_state); + psqlscan_select_top_buffer(cur_state); + } + +%% + +/* LCOV_EXCL_STOP */ + +/* + * Scan the command name of a psql backslash command. This should be called + * after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input + * has been consumed through the leading backslash. + * + * The return value is a malloc'd copy of the command name, as parsed off + * from the input. + */ +char * +psql_scan_slash_command(PsqlScanState state) +{ + PQExpBufferData mybuf; + + /* Must be scanning already */ + Assert(state->scanbufhandle != NULL); + + /* Build a local buffer that we'll return the data of */ + initPQExpBuffer(&mybuf); + + /* Set current output target */ + state->output_buf = &mybuf; + + /* Set input source */ + if (state->buffer_stack != NULL) + yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); + else + yy_switch_to_buffer(state->scanbufhandle, state->scanner); + + /* + * Set lexer start state. Note that this is sufficient to switch + * state->scanner over to using the tables in this lexer file. + */ + state->start_state = xslashcmd; + + /* And lex. */ + yylex(NULL, state->scanner); + + /* There are no possible errors in this lex state... */ + + /* + * In case the caller returns to using the regular SQL lexer, reselect the + * appropriate initial state. + */ + psql_scan_reselect_sql_lexer(state); + + return mybuf.data; +} + +/* + * Parse off the next argument for a backslash command, and return it as a + * malloc'd string. If there are no more arguments, returns NULL. + * + * type tells what processing, if any, to perform on the option string; + * for example, if it's a SQL identifier, we want to downcase any unquoted + * letters. + * + * if quote is not NULL, *quote is set to 0 if no quoting was found, else + * the last quote symbol used in the argument. + * + * if semicolon is true, unquoted trailing semicolon(s) that would otherwise + * be taken as part of the option string will be stripped. + * + * NOTE: the only possible syntax errors for backslash options are unmatched + * quotes, which are detected when we run out of input. Therefore, on a + * syntax error we just throw away the string and return NULL; there is no + * need to worry about flushing remaining input. + */ +char * +psql_scan_slash_option(PsqlScanState state, + enum slash_option_type type, + char *quote, + bool semicolon) +{ + PQExpBufferData mybuf; + int lexresult PG_USED_FOR_ASSERTS_ONLY; + int final_state; + char local_quote; + + /* Must be scanning already */ + Assert(state->scanbufhandle != NULL); + + if (quote == NULL) + quote = &local_quote; + *quote = 0; + + /* Build a local buffer that we'll return the data of */ + initPQExpBuffer(&mybuf); + + /* Set up static variables that will be used by yylex */ + option_type = type; + option_quote = quote; + unquoted_option_chars = 0; + + /* Set current output target */ + state->output_buf = &mybuf; + + /* Set input source */ + if (state->buffer_stack != NULL) + yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); + else + yy_switch_to_buffer(state->scanbufhandle, state->scanner); + + /* Set lexer start state */ + if (type == OT_WHOLE_LINE) + state->start_state = xslashwholeline; + else + state->start_state = xslashargstart; + + /* And lex. */ + lexresult = yylex(NULL, state->scanner); + + /* Save final state for a moment... */ + final_state = state->start_state; + + /* + * In case the caller returns to using the regular SQL lexer, reselect the + * appropriate initial state. + */ + psql_scan_reselect_sql_lexer(state); + + /* + * Check the lex result: we should have gotten back either LEXRES_OK + * or LEXRES_EOL (the latter indicating end of string). If we were inside + * a quoted string, as indicated by final_state, EOL is an error. + */ + Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK); + + switch (final_state) + { + case xslashargstart: + /* empty arg */ + break; + case xslasharg: + /* Strip any unquoted trailing semi-colons if requested */ + if (semicolon) + { + while (unquoted_option_chars-- > 0 && + mybuf.len > 0 && + mybuf.data[mybuf.len - 1] == ';') + { + mybuf.data[--mybuf.len] = '\0'; + } + } + + /* + * If SQL identifier processing was requested, then we strip out + * excess double quotes and optionally downcase unquoted letters. + */ + if (type == OT_SQLID || type == OT_SQLIDHACK) + { + dequote_downcase_identifier(mybuf.data, + (type != OT_SQLIDHACK), + state->encoding); + /* update mybuf.len for possible shortening */ + mybuf.len = strlen(mybuf.data); + } + break; + case xslashquote: + case xslashbackquote: + case xslashdquote: + /* must have hit EOL inside quotes */ + pg_log_error("unterminated quoted string"); + termPQExpBuffer(&mybuf); + return NULL; + case xslashwholeline: + /* always okay */ + break; + default: + /* can't get here */ + fprintf(stderr, "invalid YY_START\n"); + exit(1); + } + + /* + * An unquoted empty argument isn't possible unless we are at end of + * command. Return NULL instead. + */ + if (mybuf.len == 0 && *quote == 0) + { + termPQExpBuffer(&mybuf); + return NULL; + } + + /* Else return the completed string. */ + return mybuf.data; +} + +/* + * Eat up any unused \\ to complete a backslash command. + */ +void +psql_scan_slash_command_end(PsqlScanState state) +{ + /* Must be scanning already */ + Assert(state->scanbufhandle != NULL); + + /* Set current output target */ + state->output_buf = NULL; /* we won't output anything */ + + /* Set input source */ + if (state->buffer_stack != NULL) + yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); + else + yy_switch_to_buffer(state->scanbufhandle, state->scanner); + + /* Set lexer start state */ + state->start_state = xslashend; + + /* And lex. */ + yylex(NULL, state->scanner); + + /* There are no possible errors in this lex state... */ + + /* + * We expect the caller to return to using the regular SQL lexer, so + * reselect the appropriate initial state. + */ + psql_scan_reselect_sql_lexer(state); +} + +/* + * Fetch current paren nesting depth + */ +int +psql_scan_get_paren_depth(PsqlScanState state) +{ + return state->paren_depth; +} + +/* + * Set paren nesting depth + */ +void +psql_scan_set_paren_depth(PsqlScanState state, int depth) +{ + Assert(depth >= 0); + state->paren_depth = depth; +} + +/* + * De-quote and optionally downcase a SQL identifier. + * + * The string at *str is modified in-place; it can become shorter, + * but not longer. + * + * If downcase is true then non-quoted letters are folded to lower case. + * Ideally this behavior will match the backend's downcase_identifier(); + * but note that it could differ if LC_CTYPE is different in the frontend. + * + * Note that a string like FOO"BAR"BAZ will be converted to fooBARbaz; + * this is somewhat inconsistent with the SQL spec, which would have us + * parse it as several identifiers. But for psql's purposes, we want a + * string like "foo"."bar" to be treated as one option, so there's little + * choice; this routine doesn't get to change the token boundaries. + */ +void +dequote_downcase_identifier(char *str, bool downcase, int encoding) +{ + bool inquotes = false; + char *cp = str; + + while (*cp) + { + if (*cp == '"') + { + if (inquotes && cp[1] == '"') + { + /* Keep the first quote, remove the second */ + cp++; + } + else + inquotes = !inquotes; + /* Collapse out quote at *cp */ + memmove(cp, cp + 1, strlen(cp)); + /* do not advance cp */ + } + else + { + if (downcase && !inquotes) + *cp = pg_tolower((unsigned char) *cp); + cp += PQmblenBounded(cp, encoding); + } + } +} + +/* + * Evaluate a backticked substring of a slash command's argument. + * + * The portion of output_buf starting at backtick_start_offset is evaluated + * as a shell command and then replaced by the command's output. + */ +static void +evaluate_backtick(PsqlScanState state) +{ + PQExpBuffer output_buf = state->output_buf; + char *cmd = output_buf->data + backtick_start_offset; + PQExpBufferData cmd_output; + FILE *fd; + bool error = false; + char buf[512]; + size_t result; + + initPQExpBuffer(&cmd_output); + + fd = popen(cmd, "r"); + if (!fd) + { + pg_log_error("%s: %m", cmd); + error = true; + } + + if (!error) + { + do + { + result = fread(buf, 1, sizeof(buf), fd); + if (ferror(fd)) + { + pg_log_error("%s: %m", cmd); + error = true; + break; + } + appendBinaryPQExpBuffer(&cmd_output, buf, result); + } while (!feof(fd)); + } + + if (fd && pclose(fd) == -1) + { + pg_log_error("%s: %m", cmd); + error = true; + } + + if (PQExpBufferDataBroken(cmd_output)) + { + pg_log_error("%s: out of memory", cmd); + error = true; + } + + /* Now done with cmd, delete it from output_buf */ + output_buf->len = backtick_start_offset; + output_buf->data[output_buf->len] = '\0'; + + /* If no error, transfer result to output_buf */ + if (!error) + { + /* strip any trailing newline (but only one) */ + if (cmd_output.len > 0 && + cmd_output.data[cmd_output.len - 1] == '\n') + cmd_output.len--; + appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len); + } + + termPQExpBuffer(&cmd_output); +} -- cgit v1.2.3