diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
commit | 46651ce6fe013220ed397add242004d764fc0153 (patch) | |
tree | 6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/include/fe_utils/psqlscan_int.h | |
parent | Initial commit. (diff) | |
download | postgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip |
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/include/fe_utils/psqlscan_int.h')
-rw-r--r-- | src/include/fe_utils/psqlscan_int.h | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/src/include/fe_utils/psqlscan_int.h b/src/include/fe_utils/psqlscan_int.h new file mode 100644 index 0000000..8ada977 --- /dev/null +++ b/src/include/fe_utils/psqlscan_int.h @@ -0,0 +1,157 @@ +/*------------------------------------------------------------------------- + * + * psqlscan_int.h + * lexical scanner internal declarations + * + * This file declares the PsqlScanStateData structure used by psqlscan.l + * and shared by other lexers compatible with it, such as psqlscanslash.l. + * + * One difficult aspect of this code is that we need to work in multibyte + * encodings that are not ASCII-safe. A "safe" encoding is one in which each + * byte of a multibyte character has the high bit set (it's >= 0x80). Since + * all our lexing rules treat all high-bit-set characters alike, we don't + * really need to care whether such a byte is part of a sequence or not. + * In an "unsafe" encoding, we still expect the first byte of a multibyte + * sequence to be >= 0x80, but later bytes might not be. If we scan such + * a sequence as-is, the lexing rules could easily be fooled into matching + * such bytes to ordinary ASCII characters. Our solution for this is to + * substitute 0xFF for each non-first byte within the data presented to flex. + * The flex rules will then pass the FF's through unmolested. The + * psqlscan_emit() subroutine is responsible for looking back to the original + * string and replacing FF's with the corresponding original bytes. + * + * Another interesting thing we do here is scan different parts of the same + * input with physically separate flex lexers (ie, lexers written in separate + * .l files). We can get away with this because the only part of the + * persistent state of a flex lexer that depends on its parsing rule tables + * is the start state number, which is easy enough to manage --- usually, + * in fact, we just need to set it to INITIAL when changing lexers. But to + * make that work at all, we must use re-entrant lexers, so that all the + * relevant state is in the yyscan_t attached to the PsqlScanState; + * if we were using lexers with separate static state we would soon end up + * with dangling buffer pointers in one or the other. Also note that this + * is unlikely to work very nicely if the lexers aren't all built with the + * same flex version, or if they don't use the same flex options. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/fe_utils/psqlscan_int.h + * + *------------------------------------------------------------------------- + */ +#ifndef PSQLSCAN_INT_H +#define PSQLSCAN_INT_H + +#include "fe_utils/psqlscan.h" + +/* + * These are just to allow this file to be compilable standalone for header + * validity checking; in actual use, this file should always be included + * from the body of a flex file, where these symbols are already defined. + */ +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void *yyscan_t; +#endif + +/* + * We use a stack of flex buffers to handle substitution of psql variables. + * Each stacked buffer contains the as-yet-unread text from one psql variable. + * When we pop the stack all the way, we resume reading from the outer buffer + * identified by scanbufhandle. + */ +typedef struct StackElem +{ + YY_BUFFER_STATE buf; /* flex input control structure */ + char *bufstring; /* data actually being scanned by flex */ + char *origstring; /* copy of original data, if needed */ + char *varname; /* name of variable providing data, or NULL */ + struct StackElem *next; +} StackElem; + +/* + * All working state of the lexer must be stored in PsqlScanStateData + * between calls. This allows us to have multiple open lexer operations, + * which is needed for nested include files. The lexer itself is not + * recursive, but it must be re-entrant. + */ +typedef struct PsqlScanStateData +{ + yyscan_t scanner; /* Flex's state for this PsqlScanState */ + + PQExpBuffer output_buf; /* current output buffer */ + + StackElem *buffer_stack; /* stack of variable expansion buffers */ + + /* + * These variables always refer to the outer buffer, never to any stacked + * variable-expansion buffer. + */ + YY_BUFFER_STATE scanbufhandle; + char *scanbuf; /* start of outer-level input buffer */ + const char *scanline; /* current input line at outer level */ + + /* safe_encoding, curline, refline are used by emit() to replace FFs */ + int encoding; /* encoding being used now */ + bool safe_encoding; /* is current encoding "safe"? */ + bool std_strings; /* are string literals standard? */ + const char *curline; /* actual flex input string for cur buf */ + const char *refline; /* original data for cur buffer */ + + /* + * All this state lives across successive input lines, until explicitly + * reset by psql_scan_reset. start_state is adopted by yylex() on entry, + * and updated with its finishing state on exit. + */ + int start_state; /* yylex's starting/finishing state */ + int state_before_str_stop; /* start cond. before end quote */ + int paren_depth; /* depth of nesting in parentheses */ + int xcdepth; /* depth of nesting in slash-star comments */ + char *dolqstart; /* current $foo$ quote start string */ + + /* + * State to track boundaries of BEGIN ... END blocks in function + * definitions, so that semicolons do not send query too early. + */ + int identifier_count; /* identifiers since start of statement */ + char identifiers[4]; /* records the first few identifiers */ + int begin_depth; /* depth of begin/end pairs */ + + /* + * Callback functions provided by the program making use of the lexer, + * plus a void* callback passthrough argument. + */ + const PsqlScanCallbacks *callbacks; + void *cb_passthrough; +} PsqlScanStateData; + + +/* + * Functions exported by psqlscan.l, but only meant for use within + * compatible lexers. + */ +extern void psqlscan_push_new_buffer(PsqlScanState state, + const char *newstr, const char *varname); +extern void psqlscan_pop_buffer_stack(PsqlScanState state); +extern void psqlscan_select_top_buffer(PsqlScanState state); +extern bool psqlscan_var_is_current_source(PsqlScanState state, + const char *varname); +extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state, + const char *txt, int len, + char **txtcopy); +extern void psqlscan_emit(PsqlScanState state, const char *txt, int len); +extern char *psqlscan_extract_substring(PsqlScanState state, + const char *txt, int len); +extern void psqlscan_escape_variable(PsqlScanState state, + const char *txt, int len, + PsqlScanQuoteType quote); +extern void psqlscan_test_variable(PsqlScanState state, + const char *txt, int len); + +#endif /* PSQLSCAN_INT_H */ |