/*------------------------------------------------------------------------- * * String-processing utility routines for frontend code * * Assorted utility functions that are useful in constructing SQL queries * and interpreting backend output. * * * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/fe_utils/string_utils.c * *------------------------------------------------------------------------- */ #include "postgres_fe.h" #include #include "common/keywords.h" #include "fe_utils/string_utils.h" static PQExpBuffer defaultGetLocalPQExpBuffer(void); /* Globals exported by this file */ int quote_all_identifiers = 0; PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer; /* * Returns a temporary PQExpBuffer, valid until the next call to the function. * This is used by fmtId and fmtQualifiedId. * * Non-reentrant and non-thread-safe but reduces memory leakage. You can * replace this with a custom version by setting the getLocalPQExpBuffer * function pointer. */ static PQExpBuffer defaultGetLocalPQExpBuffer(void) { static PQExpBuffer id_return = NULL; if (id_return) /* first time through? */ { /* same buffer, just wipe contents */ resetPQExpBuffer(id_return); } else { /* new buffer */ id_return = createPQExpBuffer(); } return id_return; } /* * Quotes input string if it's not a legitimate SQL identifier as-is. * * Note that the returned string must be used before calling fmtId again, * since we re-use the same return buffer each time. */ const char * fmtId(const char *rawid) { PQExpBuffer id_return = getLocalPQExpBuffer(); const char *cp; bool need_quotes = false; /* * These checks need to match the identifier production in scan.l. Don't * use islower() etc. */ if (quote_all_identifiers) need_quotes = true; /* slightly different rules for first character */ else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_')) need_quotes = true; else { /* otherwise check the entire string */ for (cp = rawid; *cp; cp++) { if (!((*cp >= 'a' && *cp <= 'z') || (*cp >= '0' && *cp <= '9') || (*cp == '_'))) { need_quotes = true; break; } } } if (!need_quotes) { /* * Check for keyword. We quote keywords except for unreserved ones. * (In some cases we could avoid quoting a col_name or type_func_name * keyword, but it seems much harder than it's worth to tell that.) * * Note: ScanKeywordLookup() does case-insensitive comparison, but * that's fine, since we already know we have all-lower-case. */ int kwnum = ScanKeywordLookup(rawid, &ScanKeywords); if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD) need_quotes = true; } if (!need_quotes) { /* no quoting needed */ appendPQExpBufferStr(id_return, rawid); } else { appendPQExpBufferChar(id_return, '"'); for (cp = rawid; *cp; cp++) { /* * Did we find a double-quote in the string? Then make this a * double double-quote per SQL99. Before, we put in a * backslash/double-quote pair. - thomas 2000-08-05 */ if (*cp == '"') appendPQExpBufferChar(id_return, '"'); appendPQExpBufferChar(id_return, *cp); } appendPQExpBufferChar(id_return, '"'); } return id_return->data; } /* * fmtQualifiedId - construct a schema-qualified name, with quoting as needed. * * Like fmtId, use the result before calling again. * * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot * use that buffer until we're finished with calling fmtId(). */ const char * fmtQualifiedId(const char *schema, const char *id) { PQExpBuffer id_return; PQExpBuffer lcl_pqexp = createPQExpBuffer(); /* Some callers might fail to provide a schema name */ if (schema && *schema) { appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema)); } appendPQExpBufferStr(lcl_pqexp, fmtId(id)); id_return = getLocalPQExpBuffer(); appendPQExpBufferStr(id_return, lcl_pqexp->data); destroyPQExpBuffer(lcl_pqexp); return id_return->data; } /* * Format a Postgres version number (in the PG_VERSION_NUM integer format * returned by PQserverVersion()) as a string. This exists mainly to * encapsulate knowledge about two-part vs. three-part version numbers. * * For reentrancy, caller must supply the buffer the string is put in. * Recommended size of the buffer is 32 bytes. * * Returns address of 'buf', as a notational convenience. */ char * formatPGVersionNumber(int version_number, bool include_minor, char *buf, size_t buflen) { if (version_number >= 100000) { /* New two-part style */ if (include_minor) snprintf(buf, buflen, "%d.%d", version_number / 10000, version_number % 10000); else snprintf(buf, buflen, "%d", version_number / 10000); } else { /* Old three-part style */ if (include_minor) snprintf(buf, buflen, "%d.%d.%d", version_number / 10000, (version_number / 100) % 100, version_number % 100); else snprintf(buf, buflen, "%d.%d", version_number / 10000, (version_number / 100) % 100); } return buf; } /* * Convert a string value to an SQL string literal and append it to * the given buffer. We assume the specified client_encoding and * standard_conforming_strings settings. * * This is essentially equivalent to libpq's PQescapeStringInternal, * except for the output buffer structure. We need it in situations * where we do not have a PGconn available. Where we do, * appendStringLiteralConn is a better choice. */ void appendStringLiteral(PQExpBuffer buf, const char *str, int encoding, bool std_strings) { size_t length = strlen(str); const char *source = str; char *target; if (!enlargePQExpBuffer(buf, 2 * length + 2)) return; target = buf->data + buf->len; *target++ = '\''; while (*source != '\0') { char c = *source; int len; int i; /* Fast path for plain ASCII */ if (!IS_HIGHBIT_SET(c)) { /* Apply quoting if needed */ if (SQL_STR_DOUBLE(c, !std_strings)) *target++ = c; /* Copy the character */ *target++ = c; source++; continue; } /* Slow path for possible multibyte characters */ len = PQmblen(source, encoding); /* Copy the character */ for (i = 0; i < len; i++) { if (*source == '\0') break; *target++ = *source++; } /* * If we hit premature end of string (ie, incomplete multibyte * character), try to pad out to the correct length with spaces. We * may not be able to pad completely, but we will always be able to * insert at least one pad space (since we'd not have quoted a * multibyte character). This should be enough to make a string that * the server will error out on. */ if (i < len) { char *stop = buf->data + buf->maxlen - 2; for (; i < len; i++) { if (target >= stop) break; *target++ = ' '; } break; } } /* Write the terminating quote and NUL character. */ *target++ = '\''; *target = '\0'; buf->len = target - buf->data; } /* * Convert a string value to an SQL string literal and append it to * the given buffer. Encoding and string syntax rules are as indicated * by current settings of the PGconn. */ void appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn) { size_t length = strlen(str); /* * XXX This is a kluge to silence escape_string_warning in our utility * programs. It should go away someday. */ if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100) { /* ensure we are not adjacent to an identifier */ if (buf->len > 0 && buf->data[buf->len - 1] != ' ') appendPQExpBufferChar(buf, ' '); appendPQExpBufferChar(buf, ESCAPE_STRING_SYNTAX); appendStringLiteral(buf, str, PQclientEncoding(conn), false); return; } /* XXX end kluge */ if (!enlargePQExpBuffer(buf, 2 * length + 2)) return; appendPQExpBufferChar(buf, '\''); buf->len += PQescapeStringConn(conn, buf->data + buf->len, str, length, NULL); appendPQExpBufferChar(buf, '\''); } /* * Convert a string value to a dollar quoted literal and append it to * the given buffer. If the dqprefix parameter is not NULL then the * dollar quote delimiter will begin with that (after the opening $). * * No escaping is done at all on str, in compliance with the rules * for parsing dollar quoted strings. Also, we need not worry about * encoding issues. */ void appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix) { static const char suffixes[] = "_XXXXXXX"; int nextchar = 0; PQExpBuffer delimBuf = createPQExpBuffer(); /* start with $ + dqprefix if not NULL */ appendPQExpBufferChar(delimBuf, '$'); if (dqprefix) appendPQExpBufferStr(delimBuf, dqprefix); /* * Make sure we choose a delimiter which (without the trailing $) is not * present in the string being quoted. We don't check with the trailing $ * because a string ending in $foo must not be quoted with $foo$. */ while (strstr(str, delimBuf->data) != NULL) { appendPQExpBufferChar(delimBuf, suffixes[nextchar++]); nextchar %= sizeof(suffixes) - 1; } /* add trailing $ */ appendPQExpBufferChar(delimBuf, '$'); /* quote it and we are all done */ appendPQExpBufferStr(buf, delimBuf->data); appendPQExpBufferStr(buf, str); appendPQExpBufferStr(buf, delimBuf->data); destroyPQExpBuffer(delimBuf); } /* * Convert a bytea value (presented as raw bytes) to an SQL string literal * and append it to the given buffer. We assume the specified * standard_conforming_strings setting. * * This is needed in situations where we do not have a PGconn available. * Where we do, PQescapeByteaConn is a better choice. */ void appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length, bool std_strings) { const unsigned char *source = str; char *target; static const char hextbl[] = "0123456789abcdef"; /* * This implementation is hard-wired to produce hex-format output. We do * not know the server version the output will be loaded into, so making * an intelligent format choice is impossible. It might be better to * always use the old escaped format. */ if (!enlargePQExpBuffer(buf, 2 * length + 5)) return; target = buf->data + buf->len; *target++ = '\''; if (!std_strings) *target++ = '\\'; *target++ = '\\'; *target++ = 'x'; while (length-- > 0) { unsigned char c = *source++; *target++ = hextbl[(c >> 4) & 0xF]; *target++ = hextbl[c & 0xF]; } /* Write the terminating quote and NUL character. */ *target++ = '\''; *target = '\0'; buf->len = target - buf->data; } /* * Append the given string to the shell command being built in the buffer, * with shell-style quoting as needed to create exactly one argument. * * Forbid LF or CR characters, which have scant practical use beyond designing * security breaches. The Windows command shell is unusable as a conduit for * arguments containing LF or CR characters. A future major release should * reject those characters in CREATE ROLE and CREATE DATABASE, because use * there eventually leads to errors here. * * appendShellString() simply prints an error and dies if LF or CR appears. * appendShellStringNoError() omits those characters from the result, and * returns false if there were any. */ void appendShellString(PQExpBuffer buf, const char *str) { if (!appendShellStringNoError(buf, str)) { fprintf(stderr, _("shell command argument contains a newline or carriage return: \"%s\"\n"), str); exit(EXIT_FAILURE); } } bool appendShellStringNoError(PQExpBuffer buf, const char *str) { #ifdef WIN32 int backslash_run_length = 0; #endif bool ok = true; const char *p; /* * Don't bother with adding quotes if the string is nonempty and clearly * contains only safe characters. */ if (*str != '\0' && strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str)) { appendPQExpBufferStr(buf, str); return ok; } #ifndef WIN32 appendPQExpBufferChar(buf, '\''); for (p = str; *p; p++) { if (*p == '\n' || *p == '\r') { ok = false; continue; } if (*p == '\'') appendPQExpBufferStr(buf, "'\"'\"'"); else appendPQExpBufferChar(buf, *p); } appendPQExpBufferChar(buf, '\''); #else /* WIN32 */ /* * A Windows system() argument experiences two layers of interpretation. * First, cmd.exe interprets the string. Its behavior is undocumented, * but a caret escapes any byte except LF or CR that would otherwise have * special meaning. Handling of a caret before LF or CR differs between * "cmd.exe /c" and other modes, and it is unusable here. * * Second, the new process parses its command line to construct argv (see * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx). This treats * backslash-double quote sequences specially. */ appendPQExpBufferStr(buf, "^\""); for (p = str; *p; p++) { if (*p == '\n' || *p == '\r') { ok = false; continue; } /* Change N backslashes before a double quote to 2N+1 backslashes. */ if (*p == '"') { while (backslash_run_length) { appendPQExpBufferStr(buf, "^\\"); backslash_run_length--; } appendPQExpBufferStr(buf, "^\\"); } else if (*p == '\\') backslash_run_length++; else backslash_run_length = 0; /* * Decline to caret-escape the most mundane characters, to ease * debugging and lest we approach the command length limit. */ if (!((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9'))) appendPQExpBufferChar(buf, '^'); appendPQExpBufferChar(buf, *p); } /* * Change N backslashes at end of argument to 2N backslashes, because they * precede the double quote that terminates the argument. */ while (backslash_run_length) { appendPQExpBufferStr(buf, "^\\"); backslash_run_length--; } appendPQExpBufferStr(buf, "^\""); #endif /* WIN32 */ return ok; } /* * Append the given string to the buffer, with suitable quoting for passing * the string as a value in a keyword/value pair in a libpq connection string. */ void appendConnStrVal(PQExpBuffer buf, const char *str) { const char *s; bool needquotes; /* * If the string is one or more plain ASCII characters, no need to quote * it. This is quite conservative, but better safe than sorry. */ needquotes = true; for (s = str; *s; s++) { if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || (*s >= '0' && *s <= '9') || *s == '_' || *s == '.')) { needquotes = true; break; } needquotes = false; } if (needquotes) { appendPQExpBufferChar(buf, '\''); while (*str) { /* ' and \ must be escaped by to \' and \\ */ if (*str == '\'' || *str == '\\') appendPQExpBufferChar(buf, '\\'); appendPQExpBufferChar(buf, *str); str++; } appendPQExpBufferChar(buf, '\''); } else appendPQExpBufferStr(buf, str); } /* * Append a psql meta-command that connects to the given database with the * then-current connection's user, host and port. */ void appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname) { const char *s; bool complex; /* * If the name is plain ASCII characters, emit a trivial "\connect "foo"". * For other names, even many not technically requiring it, skip to the * general case. No database has a zero-length name. */ complex = false; for (s = dbname; *s; s++) { if (*s == '\n' || *s == '\r') { fprintf(stderr, _("database name contains a newline or carriage return: \"%s\"\n"), dbname); exit(EXIT_FAILURE); } if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || (*s >= '0' && *s <= '9') || *s == '_' || *s == '.')) { complex = true; } } appendPQExpBufferStr(buf, "\\connect "); if (complex) { PQExpBufferData connstr; initPQExpBuffer(&connstr); appendPQExpBufferStr(&connstr, "dbname="); appendConnStrVal(&connstr, dbname); appendPQExpBufferStr(buf, "-reuse-previous=on "); /* * As long as the name does not contain a newline, SQL identifier * quoting satisfies the psql meta-command parser. Prefer not to * involve psql-interpreted single quotes, which behaved differently * before PostgreSQL 9.2. */ appendPQExpBufferStr(buf, fmtId(connstr.data)); termPQExpBuffer(&connstr); } else appendPQExpBufferStr(buf, fmtId(dbname)); appendPQExpBufferChar(buf, '\n'); } /* * Deconstruct the text representation of a 1-dimensional Postgres array * into individual items. * * On success, returns true and sets *itemarray and *nitems to describe * an array of individual strings. On parse failure, returns false; * *itemarray may exist or be NULL. * * NOTE: free'ing itemarray is sufficient to deallocate the working storage. */ bool parsePGArray(const char *atext, char ***itemarray, int *nitems) { int inputlen; char **items; char *strings; int curitem; /* * We expect input in the form of "{item,item,item}" where any item is * either raw data, or surrounded by double quotes (in which case embedded * characters including backslashes and quotes are backslashed). * * We build the result as an array of pointers followed by the actual * string data, all in one malloc block for convenience of deallocation. * The worst-case storage need is not more than one pointer and one * character for each input character (consider "{,,,,,,,,,,}"). */ *itemarray = NULL; *nitems = 0; inputlen = strlen(atext); if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}') return false; /* bad input */ items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char))); if (items == NULL) return false; /* out of memory */ *itemarray = items; strings = (char *) (items + inputlen); atext++; /* advance over initial '{' */ curitem = 0; while (*atext != '}') { if (*atext == '\0') return false; /* premature end of string */ items[curitem] = strings; while (*atext != '}' && *atext != ',') { if (*atext == '\0') return false; /* premature end of string */ if (*atext != '"') *strings++ = *atext++; /* copy unquoted data */ else { /* process quoted substring */ atext++; while (*atext != '"') { if (*atext == '\0') return false; /* premature end of string */ if (*atext == '\\') { atext++; if (*atext == '\0') return false; /* premature end of string */ } *strings++ = *atext++; /* copy quoted data */ } atext++; } } *strings++ = '\0'; if (*atext == ',') atext++; curitem++; } if (atext[1] != '\0') return false; /* bogus syntax (embedded '}') */ *nitems = curitem; return true; } /* * Format a reloptions array and append it to the given buffer. * * "prefix" is prepended to the option names; typically it's "" or "toast.". * * Returns false if the reloptions array could not be parsed (in which case * nothing will have been appended to the buffer), or true on success. * * Note: this logic should generally match the backend's flatten_reloptions() * (in adt/ruleutils.c). */ bool appendReloptionsArray(PQExpBuffer buffer, const char *reloptions, const char *prefix, int encoding, bool std_strings) { char **options; int noptions; int i; if (!parsePGArray(reloptions, &options, &noptions)) { if (options) free(options); return false; } for (i = 0; i < noptions; i++) { char *option = options[i]; char *name; char *separator; char *value; /* * Each array element should have the form name=value. If the "=" is * missing for some reason, treat it like an empty value. */ name = option; separator = strchr(option, '='); if (separator) { *separator = '\0'; value = separator + 1; } else value = ""; if (i > 0) appendPQExpBufferStr(buffer, ", "); appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name)); /* * In general we need to quote the value; but to avoid unnecessary * clutter, do not quote if it is an identifier that would not need * quoting. (We could also allow numbers, but that is a bit trickier * than it looks --- for example, are leading zeroes significant? We * don't want to assume very much here about what custom reloptions * might mean.) */ if (strcmp(fmtId(value), value) == 0) appendPQExpBufferStr(buffer, value); else appendStringLiteral(buffer, value, encoding, std_strings); } if (options) free(options); return true; } /* * processSQLNamePattern * * Scan a wildcard-pattern string and generate appropriate WHERE clauses * to limit the set of objects returned. The WHERE clauses are appended * to the already-partially-constructed query in buf. Returns whether * any clause was added. * * conn: connection query will be sent to (consulted for escaping rules). * buf: output parameter. * pattern: user-specified pattern option, or NULL if none ("*" is implied). * have_where: true if caller already emitted "WHERE" (clauses will be ANDed * onto the existing WHERE clause). * force_escape: always quote regexp special characters, even outside * double quotes (else they are quoted only between double quotes). * schemavar: name of query variable to match against a schema-name pattern. * Can be NULL if no schema. * namevar: name of query variable to match against an object-name pattern. * altnamevar: NULL, or name of an alternative variable to match against name. * visibilityrule: clause to use if we want to restrict to visible objects * (for example, "pg_catalog.pg_table_is_visible(p.oid)"). Can be NULL. * dbnamebuf: output parameter receiving the database name portion of the * pattern, if any. Can be NULL. * dotcnt: how many separators were parsed from the pattern, by reference. * * Formatting note: the text already present in buf should end with a newline. * The appended text, if any, will end with one too. */ bool processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern, bool have_where, bool force_escape, const char *schemavar, const char *namevar, const char *altnamevar, const char *visibilityrule, PQExpBuffer dbnamebuf, int *dotcnt) { PQExpBufferData schemabuf; PQExpBufferData namebuf; bool added_clause = false; int dcnt; #define WHEREAND() \ (appendPQExpBufferStr(buf, have_where ? " AND " : "WHERE "), \ have_where = true, added_clause = true) if (dotcnt == NULL) dotcnt = &dcnt; *dotcnt = 0; if (pattern == NULL) { /* Default: select all visible objects */ if (visibilityrule) { WHEREAND(); appendPQExpBuffer(buf, "%s\n", visibilityrule); } return added_clause; } initPQExpBuffer(&schemabuf); initPQExpBuffer(&namebuf); /* * Convert shell-style 'pattern' into the regular expression(s) we want to * execute. Quoting/escaping into SQL literal format will be done below * using appendStringLiteralConn(). */ patternToSQLRegex(PQclientEncoding(conn), (schemavar ? dbnamebuf : NULL), (schemavar ? &schemabuf: NULL), &namebuf, pattern, force_escape, true, dotcnt); /* * Now decide what we need to emit. We may run under a hostile * search_path, so qualify EVERY name. Note there will be a leading "^(" * in the patterns in any case. * * We want the regex matches to use the database's default collation where * collation-sensitive behavior is required (for example, which characters * match '\w'). That happened by default before PG v12, but if the server * is >= v12 then we need to force it through explicit COLLATE clauses, * otherwise the "C" collation attached to "name" catalog columns wins. */ if (namevar && namebuf.len > 2) { /* We have a name pattern, so constrain the namevar(s) */ /* Optimize away a "*" pattern */ if (strcmp(namebuf.data, "^(.*)$") != 0) { WHEREAND(); if (altnamevar) { appendPQExpBuffer(buf, "(%s OPERATOR(pg_catalog.~) ", namevar); appendStringLiteralConn(buf, namebuf.data, conn); if (PQserverVersion(conn) >= 120000) appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); appendPQExpBuffer(buf, "\n OR %s OPERATOR(pg_catalog.~) ", altnamevar); appendStringLiteralConn(buf, namebuf.data, conn); if (PQserverVersion(conn) >= 120000) appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); appendPQExpBufferStr(buf, ")\n"); } else { appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", namevar); appendStringLiteralConn(buf, namebuf.data, conn); if (PQserverVersion(conn) >= 120000) appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); appendPQExpBufferChar(buf, '\n'); } } } if (schemavar && schemabuf.len > 2) { /* We have a schema pattern, so constrain the schemavar */ /* Optimize away a "*" pattern */ if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar) { WHEREAND(); appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", schemavar); appendStringLiteralConn(buf, schemabuf.data, conn); if (PQserverVersion(conn) >= 120000) appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); appendPQExpBufferChar(buf, '\n'); } } else { /* No schema pattern given, so select only visible objects */ if (visibilityrule) { WHEREAND(); appendPQExpBuffer(buf, "%s\n", visibilityrule); } } termPQExpBuffer(&schemabuf); termPQExpBuffer(&namebuf); return added_clause; #undef WHEREAND } /* * Transform a possibly qualified shell-style object name pattern into up to * three SQL-style regular expressions, converting quotes, lower-casing * unquoted letters, and adjusting shell-style wildcard characters into regexp * notation. * * If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern * contains two or more dbname/schema/name separators, we parse the portions of * the pattern prior to the first and second separators into dbnamebuf and * schemabuf, and the rest into namebuf. * * If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at * least one separator, we parse the first portion into schemabuf and the rest * into namebuf. * * Otherwise, we parse all the pattern into namebuf. * * If the pattern contains more dotted parts than buffers to parse into, the * extra dots will be treated as literal characters and written into the * namebuf, though they will be counted. Callers should always check the value * returned by reference in dotcnt and handle this error case appropriately. * * We surround the regexps with "^(...)$" to force them to match whole strings, * as per SQL practice. We have to have parens in case strings contain "|", * else the "^" and "$" will be bound into the first and last alternatives * which is not what we want. Whether this is done for dbnamebuf is controlled * by the want_literal_dbname parameter. * * The regexps we parse into the buffers are appended to the data (if any) * already present. If we parse fewer fields than the number of buffers we * were given, the extra buffers are unaltered. * * encoding: the character encoding for the given pattern * dbnamebuf: output parameter receiving the database name portion of the * pattern, if any. Can be NULL. * schemabuf: output parameter receiving the schema name portion of the * pattern, if any. Can be NULL. * namebuf: output parameter receiving the database name portion of the * pattern, if any. Can be NULL. * pattern: user-specified pattern option, or NULL if none ("*" is implied). * force_escape: always quote regexp special characters, even outside * double quotes (else they are quoted only between double quotes). * want_literal_dbname: if true, regexp special characters within the database * name portion of the pattern will not be escaped, nor will the dbname be * converted into a regular expression. * dotcnt: output parameter receiving the number of separators parsed from the * pattern. */ void patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf, PQExpBuffer namebuf, const char *pattern, bool force_escape, bool want_literal_dbname, int *dotcnt) { PQExpBufferData buf[3]; PQExpBufferData left_literal; PQExpBuffer curbuf; PQExpBuffer maxbuf; int i; bool inquotes; bool left; const char *cp; Assert(pattern != NULL); Assert(namebuf != NULL); /* callers should never expect "dbname.relname" format */ Assert(dbnamebuf == NULL || schemabuf != NULL); Assert(dotcnt != NULL); *dotcnt = 0; inquotes = false; cp = pattern; if (dbnamebuf != NULL) maxbuf = &buf[2]; else if (schemabuf != NULL) maxbuf = &buf[1]; else maxbuf = &buf[0]; curbuf = &buf[0]; if (want_literal_dbname) { left = true; initPQExpBuffer(&left_literal); } else left = false; initPQExpBuffer(curbuf); appendPQExpBufferStr(curbuf, "^("); while (*cp) { char ch = *cp; if (ch == '"') { if (inquotes && cp[1] == '"') { /* emit one quote, stay in inquotes mode */ appendPQExpBufferChar(curbuf, '"'); if (left) appendPQExpBufferChar(&left_literal, '"'); cp++; } else inquotes = !inquotes; cp++; } else if (!inquotes && isupper((unsigned char) ch)) { appendPQExpBufferChar(curbuf, pg_tolower((unsigned char) ch)); if (left) appendPQExpBufferChar(&left_literal, pg_tolower((unsigned char) ch)); cp++; } else if (!inquotes && ch == '*') { appendPQExpBufferStr(curbuf, ".*"); if (left) appendPQExpBufferChar(&left_literal, '*'); cp++; } else if (!inquotes && ch == '?') { appendPQExpBufferChar(curbuf, '.'); if (left) appendPQExpBufferChar(&left_literal, '?'); cp++; } else if (!inquotes && ch == '.') { left = false; if (dotcnt) (*dotcnt)++; if (curbuf < maxbuf) { appendPQExpBufferStr(curbuf, ")$"); curbuf++; initPQExpBuffer(curbuf); appendPQExpBufferStr(curbuf, "^("); cp++; } else appendPQExpBufferChar(curbuf, *cp++); } else if (ch == '$') { /* * Dollar is always quoted, whether inside quotes or not. The * reason is that it's allowed in SQL identifiers, so there's a * significant use-case for treating it literally, while because * we anchor the pattern automatically there is no use-case for * having it possess its regexp meaning. */ appendPQExpBufferStr(curbuf, "\\$"); if (left) appendPQExpBufferChar(&left_literal, '$'); cp++; } else { /* * Ordinary data character, transfer to pattern * * Inside double quotes, or at all times if force_escape is true, * quote regexp special characters with a backslash to avoid * regexp errors. Outside quotes, however, let them pass through * as-is; this lets knowledgeable users build regexp expressions * that are more powerful than shell-style patterns. * * As an exception to that, though, always quote "[]", as that's * much more likely to be an attempt to write an array type name * than it is to be the start of a regexp bracket expression. */ if ((inquotes || force_escape) && strchr("|*+?()[]{}.^$\\", ch)) appendPQExpBufferChar(curbuf, '\\'); else if (ch == '[' && cp[1] == ']') appendPQExpBufferChar(curbuf, '\\'); i = PQmblenBounded(cp, encoding); while (i--) { if (left) appendPQExpBufferChar(&left_literal, *cp); appendPQExpBufferChar(curbuf, *cp++); } } } appendPQExpBufferStr(curbuf, ")$"); if (namebuf) { appendPQExpBufferStr(namebuf, curbuf->data); termPQExpBuffer(curbuf); curbuf--; } if (schemabuf && curbuf >= buf) { appendPQExpBufferStr(schemabuf, curbuf->data); termPQExpBuffer(curbuf); curbuf--; } if (dbnamebuf && curbuf >= buf) { if (want_literal_dbname) appendPQExpBufferStr(dbnamebuf, left_literal.data); else appendPQExpBufferStr(dbnamebuf, curbuf->data); termPQExpBuffer(curbuf); } }