diff options
Diffstat (limited to 'src/fe_utils/string_utils.c')
-rw-r--r-- | src/fe_utils/string_utils.c | 1163 |
1 files changed, 1163 insertions, 0 deletions
diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c new file mode 100644 index 0000000..5f34741 --- /dev/null +++ b/src/fe_utils/string_utils.c @@ -0,0 +1,1163 @@ +/*------------------------------------------------------------------------- + * + * String-processing utility routines for frontend code + * + * Assorted utility functions that are useful in constructing SQL queries + * and interpreting backend output. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/fe_utils/string_utils.c + * + *------------------------------------------------------------------------- + */ +#include "postgres_fe.h" + +#include <ctype.h> + +#include "common/keywords.h" +#include "fe_utils/string_utils.h" + +static PQExpBuffer defaultGetLocalPQExpBuffer(void); + +/* Globals exported by this file */ +int quote_all_identifiers = 0; +PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer; + + +/* + * Returns a temporary PQExpBuffer, valid until the next call to the function. + * This is used by fmtId and fmtQualifiedId. + * + * Non-reentrant and non-thread-safe but reduces memory leakage. You can + * replace this with a custom version by setting the getLocalPQExpBuffer + * function pointer. + */ +static PQExpBuffer +defaultGetLocalPQExpBuffer(void) +{ + static PQExpBuffer id_return = NULL; + + if (id_return) /* first time through? */ + { + /* same buffer, just wipe contents */ + resetPQExpBuffer(id_return); + } + else + { + /* new buffer */ + id_return = createPQExpBuffer(); + } + + return id_return; +} + +/* + * Quotes input string if it's not a legitimate SQL identifier as-is. + * + * Note that the returned string must be used before calling fmtId again, + * since we re-use the same return buffer each time. + */ +const char * +fmtId(const char *rawid) +{ + PQExpBuffer id_return = getLocalPQExpBuffer(); + + const char *cp; + bool need_quotes = false; + + /* + * These checks need to match the identifier production in scan.l. Don't + * use islower() etc. + */ + if (quote_all_identifiers) + need_quotes = true; + /* slightly different rules for first character */ + else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_')) + need_quotes = true; + else + { + /* otherwise check the entire string */ + for (cp = rawid; *cp; cp++) + { + if (!((*cp >= 'a' && *cp <= 'z') + || (*cp >= '0' && *cp <= '9') + || (*cp == '_'))) + { + need_quotes = true; + break; + } + } + } + + if (!need_quotes) + { + /* + * Check for keyword. We quote keywords except for unreserved ones. + * (In some cases we could avoid quoting a col_name or type_func_name + * keyword, but it seems much harder than it's worth to tell that.) + * + * Note: ScanKeywordLookup() does case-insensitive comparison, but + * that's fine, since we already know we have all-lower-case. + */ + int kwnum = ScanKeywordLookup(rawid, &ScanKeywords); + + if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD) + need_quotes = true; + } + + if (!need_quotes) + { + /* no quoting needed */ + appendPQExpBufferStr(id_return, rawid); + } + else + { + appendPQExpBufferChar(id_return, '"'); + for (cp = rawid; *cp; cp++) + { + /* + * Did we find a double-quote in the string? Then make this a + * double double-quote per SQL99. Before, we put in a + * backslash/double-quote pair. - thomas 2000-08-05 + */ + if (*cp == '"') + appendPQExpBufferChar(id_return, '"'); + appendPQExpBufferChar(id_return, *cp); + } + appendPQExpBufferChar(id_return, '"'); + } + + return id_return->data; +} + +/* + * fmtQualifiedId - construct a schema-qualified name, with quoting as needed. + * + * Like fmtId, use the result before calling again. + * + * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot + * use that buffer until we're finished with calling fmtId(). + */ +const char * +fmtQualifiedId(const char *schema, const char *id) +{ + PQExpBuffer id_return; + PQExpBuffer lcl_pqexp = createPQExpBuffer(); + + /* Some callers might fail to provide a schema name */ + if (schema && *schema) + { + appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema)); + } + appendPQExpBufferStr(lcl_pqexp, fmtId(id)); + + id_return = getLocalPQExpBuffer(); + + appendPQExpBufferStr(id_return, lcl_pqexp->data); + destroyPQExpBuffer(lcl_pqexp); + + return id_return->data; +} + + +/* + * Format a Postgres version number (in the PG_VERSION_NUM integer format + * returned by PQserverVersion()) as a string. This exists mainly to + * encapsulate knowledge about two-part vs. three-part version numbers. + * + * For reentrancy, caller must supply the buffer the string is put in. + * Recommended size of the buffer is 32 bytes. + * + * Returns address of 'buf', as a notational convenience. + */ +char * +formatPGVersionNumber(int version_number, bool include_minor, + char *buf, size_t buflen) +{ + if (version_number >= 100000) + { + /* New two-part style */ + if (include_minor) + snprintf(buf, buflen, "%d.%d", version_number / 10000, + version_number % 10000); + else + snprintf(buf, buflen, "%d", version_number / 10000); + } + else + { + /* Old three-part style */ + if (include_minor) + snprintf(buf, buflen, "%d.%d.%d", version_number / 10000, + (version_number / 100) % 100, + version_number % 100); + else + snprintf(buf, buflen, "%d.%d", version_number / 10000, + (version_number / 100) % 100); + } + return buf; +} + + +/* + * Convert a string value to an SQL string literal and append it to + * the given buffer. We assume the specified client_encoding and + * standard_conforming_strings settings. + * + * This is essentially equivalent to libpq's PQescapeStringInternal, + * except for the output buffer structure. We need it in situations + * where we do not have a PGconn available. Where we do, + * appendStringLiteralConn is a better choice. + */ +void +appendStringLiteral(PQExpBuffer buf, const char *str, + int encoding, bool std_strings) +{ + size_t length = strlen(str); + const char *source = str; + char *target; + + if (!enlargePQExpBuffer(buf, 2 * length + 2)) + return; + + target = buf->data + buf->len; + *target++ = '\''; + + while (*source != '\0') + { + char c = *source; + int len; + int i; + + /* Fast path for plain ASCII */ + if (!IS_HIGHBIT_SET(c)) + { + /* Apply quoting if needed */ + if (SQL_STR_DOUBLE(c, !std_strings)) + *target++ = c; + /* Copy the character */ + *target++ = c; + source++; + continue; + } + + /* Slow path for possible multibyte characters */ + len = PQmblen(source, encoding); + + /* Copy the character */ + for (i = 0; i < len; i++) + { + if (*source == '\0') + break; + *target++ = *source++; + } + + /* + * If we hit premature end of string (ie, incomplete multibyte + * character), try to pad out to the correct length with spaces. We + * may not be able to pad completely, but we will always be able to + * insert at least one pad space (since we'd not have quoted a + * multibyte character). This should be enough to make a string that + * the server will error out on. + */ + if (i < len) + { + char *stop = buf->data + buf->maxlen - 2; + + for (; i < len; i++) + { + if (target >= stop) + break; + *target++ = ' '; + } + break; + } + } + + /* Write the terminating quote and NUL character. */ + *target++ = '\''; + *target = '\0'; + + buf->len = target - buf->data; +} + + +/* + * Convert a string value to an SQL string literal and append it to + * the given buffer. Encoding and string syntax rules are as indicated + * by current settings of the PGconn. + */ +void +appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn) +{ + size_t length = strlen(str); + + /* + * XXX This is a kluge to silence escape_string_warning in our utility + * programs. It should go away someday. + */ + if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100) + { + /* ensure we are not adjacent to an identifier */ + if (buf->len > 0 && buf->data[buf->len - 1] != ' ') + appendPQExpBufferChar(buf, ' '); + appendPQExpBufferChar(buf, ESCAPE_STRING_SYNTAX); + appendStringLiteral(buf, str, PQclientEncoding(conn), false); + return; + } + /* XXX end kluge */ + + if (!enlargePQExpBuffer(buf, 2 * length + 2)) + return; + appendPQExpBufferChar(buf, '\''); + buf->len += PQescapeStringConn(conn, buf->data + buf->len, + str, length, NULL); + appendPQExpBufferChar(buf, '\''); +} + + +/* + * Convert a string value to a dollar quoted literal and append it to + * the given buffer. If the dqprefix parameter is not NULL then the + * dollar quote delimiter will begin with that (after the opening $). + * + * No escaping is done at all on str, in compliance with the rules + * for parsing dollar quoted strings. Also, we need not worry about + * encoding issues. + */ +void +appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix) +{ + static const char suffixes[] = "_XXXXXXX"; + int nextchar = 0; + PQExpBuffer delimBuf = createPQExpBuffer(); + + /* start with $ + dqprefix if not NULL */ + appendPQExpBufferChar(delimBuf, '$'); + if (dqprefix) + appendPQExpBufferStr(delimBuf, dqprefix); + + /* + * Make sure we choose a delimiter which (without the trailing $) is not + * present in the string being quoted. We don't check with the trailing $ + * because a string ending in $foo must not be quoted with $foo$. + */ + while (strstr(str, delimBuf->data) != NULL) + { + appendPQExpBufferChar(delimBuf, suffixes[nextchar++]); + nextchar %= sizeof(suffixes) - 1; + } + + /* add trailing $ */ + appendPQExpBufferChar(delimBuf, '$'); + + /* quote it and we are all done */ + appendPQExpBufferStr(buf, delimBuf->data); + appendPQExpBufferStr(buf, str); + appendPQExpBufferStr(buf, delimBuf->data); + + destroyPQExpBuffer(delimBuf); +} + + +/* + * Convert a bytea value (presented as raw bytes) to an SQL string literal + * and append it to the given buffer. We assume the specified + * standard_conforming_strings setting. + * + * This is needed in situations where we do not have a PGconn available. + * Where we do, PQescapeByteaConn is a better choice. + */ +void +appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length, + bool std_strings) +{ + const unsigned char *source = str; + char *target; + + static const char hextbl[] = "0123456789abcdef"; + + /* + * This implementation is hard-wired to produce hex-format output. We do + * not know the server version the output will be loaded into, so making + * an intelligent format choice is impossible. It might be better to + * always use the old escaped format. + */ + if (!enlargePQExpBuffer(buf, 2 * length + 5)) + return; + + target = buf->data + buf->len; + *target++ = '\''; + if (!std_strings) + *target++ = '\\'; + *target++ = '\\'; + *target++ = 'x'; + + while (length-- > 0) + { + unsigned char c = *source++; + + *target++ = hextbl[(c >> 4) & 0xF]; + *target++ = hextbl[c & 0xF]; + } + + /* Write the terminating quote and NUL character. */ + *target++ = '\''; + *target = '\0'; + + buf->len = target - buf->data; +} + + +/* + * Append the given string to the shell command being built in the buffer, + * with shell-style quoting as needed to create exactly one argument. + * + * Forbid LF or CR characters, which have scant practical use beyond designing + * security breaches. The Windows command shell is unusable as a conduit for + * arguments containing LF or CR characters. A future major release should + * reject those characters in CREATE ROLE and CREATE DATABASE, because use + * there eventually leads to errors here. + * + * appendShellString() simply prints an error and dies if LF or CR appears. + * appendShellStringNoError() omits those characters from the result, and + * returns false if there were any. + */ +void +appendShellString(PQExpBuffer buf, const char *str) +{ + if (!appendShellStringNoError(buf, str)) + { + fprintf(stderr, + _("shell command argument contains a newline or carriage return: \"%s\"\n"), + str); + exit(EXIT_FAILURE); + } +} + +bool +appendShellStringNoError(PQExpBuffer buf, const char *str) +{ +#ifdef WIN32 + int backslash_run_length = 0; +#endif + bool ok = true; + const char *p; + + /* + * Don't bother with adding quotes if the string is nonempty and clearly + * contains only safe characters. + */ + if (*str != '\0' && + strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str)) + { + appendPQExpBufferStr(buf, str); + return ok; + } + +#ifndef WIN32 + appendPQExpBufferChar(buf, '\''); + for (p = str; *p; p++) + { + if (*p == '\n' || *p == '\r') + { + ok = false; + continue; + } + + if (*p == '\'') + appendPQExpBufferStr(buf, "'\"'\"'"); + else + appendPQExpBufferChar(buf, *p); + } + appendPQExpBufferChar(buf, '\''); +#else /* WIN32 */ + + /* + * A Windows system() argument experiences two layers of interpretation. + * First, cmd.exe interprets the string. Its behavior is undocumented, + * but a caret escapes any byte except LF or CR that would otherwise have + * special meaning. Handling of a caret before LF or CR differs between + * "cmd.exe /c" and other modes, and it is unusable here. + * + * Second, the new process parses its command line to construct argv (see + * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx). This treats + * backslash-double quote sequences specially. + */ + appendPQExpBufferStr(buf, "^\""); + for (p = str; *p; p++) + { + if (*p == '\n' || *p == '\r') + { + ok = false; + continue; + } + + /* Change N backslashes before a double quote to 2N+1 backslashes. */ + if (*p == '"') + { + while (backslash_run_length) + { + appendPQExpBufferStr(buf, "^\\"); + backslash_run_length--; + } + appendPQExpBufferStr(buf, "^\\"); + } + else if (*p == '\\') + backslash_run_length++; + else + backslash_run_length = 0; + + /* + * Decline to caret-escape the most mundane characters, to ease + * debugging and lest we approach the command length limit. + */ + if (!((*p >= 'a' && *p <= 'z') || + (*p >= 'A' && *p <= 'Z') || + (*p >= '0' && *p <= '9'))) + appendPQExpBufferChar(buf, '^'); + appendPQExpBufferChar(buf, *p); + } + + /* + * Change N backslashes at end of argument to 2N backslashes, because they + * precede the double quote that terminates the argument. + */ + while (backslash_run_length) + { + appendPQExpBufferStr(buf, "^\\"); + backslash_run_length--; + } + appendPQExpBufferStr(buf, "^\""); +#endif /* WIN32 */ + + return ok; +} + + +/* + * Append the given string to the buffer, with suitable quoting for passing + * the string as a value in a keyword/value pair in a libpq connection string. + */ +void +appendConnStrVal(PQExpBuffer buf, const char *str) +{ + const char *s; + bool needquotes; + + /* + * If the string is one or more plain ASCII characters, no need to quote + * it. This is quite conservative, but better safe than sorry. + */ + needquotes = true; + for (s = str; *s; s++) + { + if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || + (*s >= '0' && *s <= '9') || *s == '_' || *s == '.')) + { + needquotes = true; + break; + } + needquotes = false; + } + + if (needquotes) + { + appendPQExpBufferChar(buf, '\''); + while (*str) + { + /* ' and \ must be escaped by to \' and \\ */ + if (*str == '\'' || *str == '\\') + appendPQExpBufferChar(buf, '\\'); + + appendPQExpBufferChar(buf, *str); + str++; + } + appendPQExpBufferChar(buf, '\''); + } + else + appendPQExpBufferStr(buf, str); +} + + +/* + * Append a psql meta-command that connects to the given database with the + * then-current connection's user, host and port. + */ +void +appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname) +{ + const char *s; + bool complex; + + /* + * If the name is plain ASCII characters, emit a trivial "\connect "foo"". + * For other names, even many not technically requiring it, skip to the + * general case. No database has a zero-length name. + */ + complex = false; + + for (s = dbname; *s; s++) + { + if (*s == '\n' || *s == '\r') + { + fprintf(stderr, + _("database name contains a newline or carriage return: \"%s\"\n"), + dbname); + exit(EXIT_FAILURE); + } + + if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || + (*s >= '0' && *s <= '9') || *s == '_' || *s == '.')) + { + complex = true; + } + } + + appendPQExpBufferStr(buf, "\\connect "); + if (complex) + { + PQExpBufferData connstr; + + initPQExpBuffer(&connstr); + appendPQExpBufferStr(&connstr, "dbname="); + appendConnStrVal(&connstr, dbname); + + appendPQExpBufferStr(buf, "-reuse-previous=on "); + + /* + * As long as the name does not contain a newline, SQL identifier + * quoting satisfies the psql meta-command parser. Prefer not to + * involve psql-interpreted single quotes, which behaved differently + * before PostgreSQL 9.2. + */ + appendPQExpBufferStr(buf, fmtId(connstr.data)); + + termPQExpBuffer(&connstr); + } + else + appendPQExpBufferStr(buf, fmtId(dbname)); + appendPQExpBufferChar(buf, '\n'); +} + + +/* + * Deconstruct the text representation of a 1-dimensional Postgres array + * into individual items. + * + * On success, returns true and sets *itemarray and *nitems to describe + * an array of individual strings. On parse failure, returns false; + * *itemarray may exist or be NULL. + * + * NOTE: free'ing itemarray is sufficient to deallocate the working storage. + */ +bool +parsePGArray(const char *atext, char ***itemarray, int *nitems) +{ + int inputlen; + char **items; + char *strings; + int curitem; + + /* + * We expect input in the form of "{item,item,item}" where any item is + * either raw data, or surrounded by double quotes (in which case embedded + * characters including backslashes and quotes are backslashed). + * + * We build the result as an array of pointers followed by the actual + * string data, all in one malloc block for convenience of deallocation. + * The worst-case storage need is not more than one pointer and one + * character for each input character (consider "{,,,,,,,,,,}"). + */ + *itemarray = NULL; + *nitems = 0; + inputlen = strlen(atext); + if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}') + return false; /* bad input */ + items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char))); + if (items == NULL) + return false; /* out of memory */ + *itemarray = items; + strings = (char *) (items + inputlen); + + atext++; /* advance over initial '{' */ + curitem = 0; + while (*atext != '}') + { + if (*atext == '\0') + return false; /* premature end of string */ + items[curitem] = strings; + while (*atext != '}' && *atext != ',') + { + if (*atext == '\0') + return false; /* premature end of string */ + if (*atext != '"') + *strings++ = *atext++; /* copy unquoted data */ + else + { + /* process quoted substring */ + atext++; + while (*atext != '"') + { + if (*atext == '\0') + return false; /* premature end of string */ + if (*atext == '\\') + { + atext++; + if (*atext == '\0') + return false; /* premature end of string */ + } + *strings++ = *atext++; /* copy quoted data */ + } + atext++; + } + } + *strings++ = '\0'; + if (*atext == ',') + atext++; + curitem++; + } + if (atext[1] != '\0') + return false; /* bogus syntax (embedded '}') */ + *nitems = curitem; + return true; +} + + +/* + * Format a reloptions array and append it to the given buffer. + * + * "prefix" is prepended to the option names; typically it's "" or "toast.". + * + * Returns false if the reloptions array could not be parsed (in which case + * nothing will have been appended to the buffer), or true on success. + * + * Note: this logic should generally match the backend's flatten_reloptions() + * (in adt/ruleutils.c). + */ +bool +appendReloptionsArray(PQExpBuffer buffer, const char *reloptions, + const char *prefix, int encoding, bool std_strings) +{ + char **options; + int noptions; + int i; + + if (!parsePGArray(reloptions, &options, &noptions)) + { + if (options) + free(options); + return false; + } + + for (i = 0; i < noptions; i++) + { + char *option = options[i]; + char *name; + char *separator; + char *value; + + /* + * Each array element should have the form name=value. If the "=" is + * missing for some reason, treat it like an empty value. + */ + name = option; + separator = strchr(option, '='); + if (separator) + { + *separator = '\0'; + value = separator + 1; + } + else + value = ""; + + if (i > 0) + appendPQExpBufferStr(buffer, ", "); + appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name)); + + /* + * In general we need to quote the value; but to avoid unnecessary + * clutter, do not quote if it is an identifier that would not need + * quoting. (We could also allow numbers, but that is a bit trickier + * than it looks --- for example, are leading zeroes significant? We + * don't want to assume very much here about what custom reloptions + * might mean.) + */ + if (strcmp(fmtId(value), value) == 0) + appendPQExpBufferStr(buffer, value); + else + appendStringLiteral(buffer, value, encoding, std_strings); + } + + if (options) + free(options); + + return true; +} + + +/* + * processSQLNamePattern + * + * Scan a wildcard-pattern string and generate appropriate WHERE clauses + * to limit the set of objects returned. The WHERE clauses are appended + * to the already-partially-constructed query in buf. Returns whether + * any clause was added. + * + * conn: connection query will be sent to (consulted for escaping rules). + * buf: output parameter. + * pattern: user-specified pattern option, or NULL if none ("*" is implied). + * have_where: true if caller already emitted "WHERE" (clauses will be ANDed + * onto the existing WHERE clause). + * force_escape: always quote regexp special characters, even outside + * double quotes (else they are quoted only between double quotes). + * schemavar: name of query variable to match against a schema-name pattern. + * Can be NULL if no schema. + * namevar: name of query variable to match against an object-name pattern. + * altnamevar: NULL, or name of an alternative variable to match against name. + * visibilityrule: clause to use if we want to restrict to visible objects + * (for example, "pg_catalog.pg_table_is_visible(p.oid)"). Can be NULL. + * dbnamebuf: output parameter receiving the database name portion of the + * pattern, if any. Can be NULL. + * dotcnt: how many separators were parsed from the pattern, by reference. + * + * Formatting note: the text already present in buf should end with a newline. + * The appended text, if any, will end with one too. + */ +bool +processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern, + bool have_where, bool force_escape, + const char *schemavar, const char *namevar, + const char *altnamevar, const char *visibilityrule, + PQExpBuffer dbnamebuf, int *dotcnt) +{ + PQExpBufferData schemabuf; + PQExpBufferData namebuf; + bool added_clause = false; + int dcnt; + +#define WHEREAND() \ + (appendPQExpBufferStr(buf, have_where ? " AND " : "WHERE "), \ + have_where = true, added_clause = true) + + if (dotcnt == NULL) + dotcnt = &dcnt; + *dotcnt = 0; + if (pattern == NULL) + { + /* Default: select all visible objects */ + if (visibilityrule) + { + WHEREAND(); + appendPQExpBuffer(buf, "%s\n", visibilityrule); + } + return added_clause; + } + + initPQExpBuffer(&schemabuf); + initPQExpBuffer(&namebuf); + + /* + * Convert shell-style 'pattern' into the regular expression(s) we want to + * execute. Quoting/escaping into SQL literal format will be done below + * using appendStringLiteralConn(). + */ + patternToSQLRegex(PQclientEncoding(conn), + (schemavar ? dbnamebuf : NULL), + (schemavar ? &schemabuf: NULL), + &namebuf, pattern, force_escape, true, dotcnt); + + /* + * Now decide what we need to emit. We may run under a hostile + * search_path, so qualify EVERY name. Note there will be a leading "^(" + * in the patterns in any case. + * + * We want the regex matches to use the database's default collation where + * collation-sensitive behavior is required (for example, which characters + * match '\w'). That happened by default before PG v12, but if the server + * is >= v12 then we need to force it through explicit COLLATE clauses, + * otherwise the "C" collation attached to "name" catalog columns wins. + */ + if (namevar && namebuf.len > 2) + { + /* We have a name pattern, so constrain the namevar(s) */ + + /* Optimize away a "*" pattern */ + if (strcmp(namebuf.data, "^(.*)$") != 0) + { + WHEREAND(); + if (altnamevar) + { + appendPQExpBuffer(buf, + "(%s OPERATOR(pg_catalog.~) ", namevar); + appendStringLiteralConn(buf, namebuf.data, conn); + if (PQserverVersion(conn) >= 120000) + appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); + appendPQExpBuffer(buf, + "\n OR %s OPERATOR(pg_catalog.~) ", + altnamevar); + appendStringLiteralConn(buf, namebuf.data, conn); + if (PQserverVersion(conn) >= 120000) + appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); + appendPQExpBufferStr(buf, ")\n"); + } + else + { + appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", namevar); + appendStringLiteralConn(buf, namebuf.data, conn); + if (PQserverVersion(conn) >= 120000) + appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); + appendPQExpBufferChar(buf, '\n'); + } + } + } + + if (schemavar && schemabuf.len > 2) + { + /* We have a schema pattern, so constrain the schemavar */ + + /* Optimize away a "*" pattern */ + if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar) + { + WHEREAND(); + appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", schemavar); + appendStringLiteralConn(buf, schemabuf.data, conn); + if (PQserverVersion(conn) >= 120000) + appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); + appendPQExpBufferChar(buf, '\n'); + } + } + else + { + /* No schema pattern given, so select only visible objects */ + if (visibilityrule) + { + WHEREAND(); + appendPQExpBuffer(buf, "%s\n", visibilityrule); + } + } + + termPQExpBuffer(&schemabuf); + termPQExpBuffer(&namebuf); + + return added_clause; +#undef WHEREAND +} + +/* + * Transform a possibly qualified shell-style object name pattern into up to + * three SQL-style regular expressions, converting quotes, lower-casing + * unquoted letters, and adjusting shell-style wildcard characters into regexp + * notation. + * + * If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern + * contains two or more dbname/schema/name separators, we parse the portions of + * the pattern prior to the first and second separators into dbnamebuf and + * schemabuf, and the rest into namebuf. + * + * If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at + * least one separator, we parse the first portion into schemabuf and the rest + * into namebuf. + * + * Otherwise, we parse all the pattern into namebuf. + * + * If the pattern contains more dotted parts than buffers to parse into, the + * extra dots will be treated as literal characters and written into the + * namebuf, though they will be counted. Callers should always check the value + * returned by reference in dotcnt and handle this error case appropriately. + * + * We surround the regexps with "^(...)$" to force them to match whole strings, + * as per SQL practice. We have to have parens in case strings contain "|", + * else the "^" and "$" will be bound into the first and last alternatives + * which is not what we want. Whether this is done for dbnamebuf is controlled + * by the want_literal_dbname parameter. + * + * The regexps we parse into the buffers are appended to the data (if any) + * already present. If we parse fewer fields than the number of buffers we + * were given, the extra buffers are unaltered. + * + * encoding: the character encoding for the given pattern + * dbnamebuf: output parameter receiving the database name portion of the + * pattern, if any. Can be NULL. + * schemabuf: output parameter receiving the schema name portion of the + * pattern, if any. Can be NULL. + * namebuf: output parameter receiving the database name portion of the + * pattern, if any. Can be NULL. + * pattern: user-specified pattern option, or NULL if none ("*" is implied). + * force_escape: always quote regexp special characters, even outside + * double quotes (else they are quoted only between double quotes). + * want_literal_dbname: if true, regexp special characters within the database + * name portion of the pattern will not be escaped, nor will the dbname be + * converted into a regular expression. + * dotcnt: output parameter receiving the number of separators parsed from the + * pattern. + */ +void +patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf, + PQExpBuffer namebuf, const char *pattern, bool force_escape, + bool want_literal_dbname, int *dotcnt) +{ + PQExpBufferData buf[3]; + PQExpBufferData left_literal; + PQExpBuffer curbuf; + PQExpBuffer maxbuf; + int i; + bool inquotes; + bool left; + const char *cp; + + Assert(pattern != NULL); + Assert(namebuf != NULL); + + /* callers should never expect "dbname.relname" format */ + Assert(dbnamebuf == NULL || schemabuf != NULL); + Assert(dotcnt != NULL); + + *dotcnt = 0; + inquotes = false; + cp = pattern; + + if (dbnamebuf != NULL) + maxbuf = &buf[2]; + else if (schemabuf != NULL) + maxbuf = &buf[1]; + else + maxbuf = &buf[0]; + + curbuf = &buf[0]; + if (want_literal_dbname) + { + left = true; + initPQExpBuffer(&left_literal); + } + else + left = false; + initPQExpBuffer(curbuf); + appendPQExpBufferStr(curbuf, "^("); + while (*cp) + { + char ch = *cp; + + if (ch == '"') + { + if (inquotes && cp[1] == '"') + { + /* emit one quote, stay in inquotes mode */ + appendPQExpBufferChar(curbuf, '"'); + if (left) + appendPQExpBufferChar(&left_literal, '"'); + cp++; + } + else + inquotes = !inquotes; + cp++; + } + else if (!inquotes && isupper((unsigned char) ch)) + { + appendPQExpBufferChar(curbuf, + pg_tolower((unsigned char) ch)); + if (left) + appendPQExpBufferChar(&left_literal, + pg_tolower((unsigned char) ch)); + cp++; + } + else if (!inquotes && ch == '*') + { + appendPQExpBufferStr(curbuf, ".*"); + if (left) + appendPQExpBufferChar(&left_literal, '*'); + cp++; + } + else if (!inquotes && ch == '?') + { + appendPQExpBufferChar(curbuf, '.'); + if (left) + appendPQExpBufferChar(&left_literal, '?'); + cp++; + } + else if (!inquotes && ch == '.') + { + left = false; + if (dotcnt) + (*dotcnt)++; + if (curbuf < maxbuf) + { + appendPQExpBufferStr(curbuf, ")$"); + curbuf++; + initPQExpBuffer(curbuf); + appendPQExpBufferStr(curbuf, "^("); + cp++; + } + else + appendPQExpBufferChar(curbuf, *cp++); + } + else if (ch == '$') + { + /* + * Dollar is always quoted, whether inside quotes or not. The + * reason is that it's allowed in SQL identifiers, so there's a + * significant use-case for treating it literally, while because + * we anchor the pattern automatically there is no use-case for + * having it possess its regexp meaning. + */ + appendPQExpBufferStr(curbuf, "\\$"); + if (left) + appendPQExpBufferChar(&left_literal, '$'); + cp++; + } + else + { + /* + * Ordinary data character, transfer to pattern + * + * Inside double quotes, or at all times if force_escape is true, + * quote regexp special characters with a backslash to avoid + * regexp errors. Outside quotes, however, let them pass through + * as-is; this lets knowledgeable users build regexp expressions + * that are more powerful than shell-style patterns. + * + * As an exception to that, though, always quote "[]", as that's + * much more likely to be an attempt to write an array type name + * than it is to be the start of a regexp bracket expression. + */ + if ((inquotes || force_escape) && + strchr("|*+?()[]{}.^$\\", ch)) + appendPQExpBufferChar(curbuf, '\\'); + else if (ch == '[' && cp[1] == ']') + appendPQExpBufferChar(curbuf, '\\'); + i = PQmblenBounded(cp, encoding); + while (i--) + { + if (left) + appendPQExpBufferChar(&left_literal, *cp); + appendPQExpBufferChar(curbuf, *cp++); + } + } + } + appendPQExpBufferStr(curbuf, ")$"); + + if (namebuf) + { + appendPQExpBufferStr(namebuf, curbuf->data); + termPQExpBuffer(curbuf); + curbuf--; + } + + if (schemabuf && curbuf >= buf) + { + appendPQExpBufferStr(schemabuf, curbuf->data); + termPQExpBuffer(curbuf); + curbuf--; + } + + if (dbnamebuf && curbuf >= buf) + { + if (want_literal_dbname) + appendPQExpBufferStr(dbnamebuf, left_literal.data); + else + appendPQExpBufferStr(dbnamebuf, curbuf->data); + termPQExpBuffer(curbuf); + } +} |