diff options
Diffstat (limited to '')
-rw-r--r-- | src/bin/psql/stringutils.c | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/src/bin/psql/stringutils.c b/src/bin/psql/stringutils.c new file mode 100644 index 0000000..c96b2fe --- /dev/null +++ b/src/bin/psql/stringutils.c @@ -0,0 +1,344 @@ +/* + * psql - the PostgreSQL interactive terminal + * + * Copyright (c) 2000-2020, PostgreSQL Global Development Group + * + * src/bin/psql/stringutils.c + */ +#include "postgres_fe.h" + +#include <ctype.h> + +#include "common.h" +#include "stringutils.h" + +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + + +/* + * Replacement for strtok() (a.k.a. poor man's flex) + * + * Splits a string into tokens, returning one token per call, then NULL + * when no more tokens exist in the given string. + * + * The calling convention is similar to that of strtok, but with more + * frammishes. + * + * s - string to parse, if NULL continue parsing the last string + * whitespace - set of whitespace characters that separate tokens + * delim - set of non-whitespace separator characters (or NULL) + * quote - set of characters that can quote a token (NULL if none) + * escape - character that can quote quotes (0 if none) + * e_strings - if true, treat E'...' syntax as a valid token + * del_quotes - if true, strip quotes from the returned token, else return + * it exactly as found in the string + * encoding - the active character-set encoding + * + * Characters in 'delim', if any, will be returned as single-character + * tokens unless part of a quoted token. + * + * Double occurrences of the quoting character are always taken to represent + * a single quote character in the data. If escape isn't 0, then escape + * followed by anything (except \0) is a data character too. + * + * The combination of e_strings and del_quotes both true is not currently + * handled. This could be fixed but it's not needed anywhere at the moment. + * + * Note that the string s is _not_ overwritten in this implementation. + * + * NB: it's okay to vary delim, quote, and escape from one call to the + * next on a single source string, but changing whitespace is a bad idea + * since you might lose data. + */ +char * +strtokx(const char *s, + const char *whitespace, + const char *delim, + const char *quote, + char escape, + bool e_strings, + bool del_quotes, + int encoding) +{ + static char *storage = NULL; /* store the local copy of the users + * string here */ + static char *string = NULL; /* pointer into storage where to continue on + * next call */ + + /* variously abused variables: */ + unsigned int offset; + char *start; + char *p; + + if (s) + { + free(storage); + + /* + * We may need extra space to insert delimiter nulls for adjacent + * tokens. 2X the space is a gross overestimate, but it's unlikely + * that this code will be used on huge strings anyway. + */ + storage = pg_malloc(2 * strlen(s) + 1); + strcpy(storage, s); + string = storage; + } + + if (!storage) + return NULL; + + /* skip leading whitespace */ + offset = strspn(string, whitespace); + start = &string[offset]; + + /* end of string reached? */ + if (*start == '\0') + { + /* technically we don't need to free here, but we're nice */ + free(storage); + storage = NULL; + string = NULL; + return NULL; + } + + /* test if delimiter character */ + if (delim && strchr(delim, *start)) + { + /* + * If not at end of string, we need to insert a null to terminate the + * returned token. We can just overwrite the next character if it + * happens to be in the whitespace set ... otherwise move over the + * rest of the string to make room. (This is why we allocated extra + * space above). + */ + p = start + 1; + if (*p != '\0') + { + if (!strchr(whitespace, *p)) + memmove(p + 1, p, strlen(p) + 1); + *p = '\0'; + string = p + 1; + } + else + { + /* at end of string, so no extra work */ + string = p; + } + + return start; + } + + /* check for E string */ + p = start; + if (e_strings && + (*p == 'E' || *p == 'e') && + p[1] == '\'') + { + quote = "'"; + escape = '\\'; /* if std strings before, not any more */ + p++; + } + + /* test if quoting character */ + if (quote && strchr(quote, *p)) + { + /* okay, we have a quoted token, now scan for the closer */ + char thisquote = *p++; + + for (; *p; p += PQmblenBounded(p, encoding)) + { + if (*p == escape && p[1] != '\0') + p++; /* process escaped anything */ + else if (*p == thisquote && p[1] == thisquote) + p++; /* process doubled quote */ + else if (*p == thisquote) + { + p++; /* skip trailing quote */ + break; + } + } + + /* + * If not at end of string, we need to insert a null to terminate the + * returned token. See notes above. + */ + if (*p != '\0') + { + if (!strchr(whitespace, *p)) + memmove(p + 1, p, strlen(p) + 1); + *p = '\0'; + string = p + 1; + } + else + { + /* at end of string, so no extra work */ + string = p; + } + + /* Clean up the token if caller wants that */ + if (del_quotes) + strip_quotes(start, thisquote, escape, encoding); + + return start; + } + + /* + * Otherwise no quoting character. Scan till next whitespace, delimiter + * or quote. NB: at this point, *start is known not to be '\0', + * whitespace, delim, or quote, so we will consume at least one character. + */ + offset = strcspn(start, whitespace); + + if (delim) + { + unsigned int offset2 = strcspn(start, delim); + + if (offset > offset2) + offset = offset2; + } + + if (quote) + { + unsigned int offset2 = strcspn(start, quote); + + if (offset > offset2) + offset = offset2; + } + + p = start + offset; + + /* + * If not at end of string, we need to insert a null to terminate the + * returned token. See notes above. + */ + if (*p != '\0') + { + if (!strchr(whitespace, *p)) + memmove(p + 1, p, strlen(p) + 1); + *p = '\0'; + string = p + 1; + } + else + { + /* at end of string, so no extra work */ + string = p; + } + + return start; +} + + +/* + * strip_quotes + * + * Remove quotes from the string at *source. Leading and trailing occurrences + * of 'quote' are removed; embedded double occurrences of 'quote' are reduced + * to single occurrences; if 'escape' is not 0 then 'escape' removes special + * significance of next character. + * + * Note that the source string is overwritten in-place. + */ +void +strip_quotes(char *source, char quote, char escape, int encoding) +{ + char *src; + char *dst; + + Assert(source != NULL); + Assert(quote != '\0'); + + src = dst = source; + + if (*src && *src == quote) + src++; /* skip leading quote */ + + while (*src) + { + char c = *src; + int i; + + if (c == quote && src[1] == '\0') + break; /* skip trailing quote */ + else if (c == quote && src[1] == quote) + src++; /* process doubled quote */ + else if (c == escape && src[1] != '\0') + src++; /* process escaped character */ + + i = PQmblenBounded(src, encoding); + while (i--) + *dst++ = *src++; + } + + *dst = '\0'; +} + + +/* + * quote_if_needed + * + * Opposite of strip_quotes(). If "source" denotes itself literally without + * quoting or escaping, returns NULL. Otherwise, returns a malloc'd copy with + * quoting and escaping applied: + * + * source - string to parse + * entails_quote - any of these present? need outer quotes + * quote - doubled within string, affixed to both ends + * escape - doubled within string + * force_quote - if true, quote the output even if it doesn't "need" it + * encoding - the active character-set encoding + * + * Do not use this as a substitute for PQescapeStringConn(). Use it for + * strings to be parsed by strtokx() or psql_scan_slash_option(). + */ +char * +quote_if_needed(const char *source, const char *entails_quote, + char quote, char escape, bool force_quote, + int encoding) +{ + const char *src; + char *ret; + char *dst; + bool need_quotes = force_quote; + + Assert(source != NULL); + Assert(quote != '\0'); + + src = source; + dst = ret = pg_malloc(2 * strlen(src) + 3); /* excess */ + + *dst++ = quote; + + while (*src) + { + char c = *src; + int i; + + if (c == quote) + { + need_quotes = true; + *dst++ = quote; + } + else if (c == escape) + { + need_quotes = true; + *dst++ = escape; + } + else if (strchr(entails_quote, c)) + need_quotes = true; + + i = PQmblenBounded(src, encoding); + while (i--) + *dst++ = *src++; + } + + *dst++ = quote; + *dst = '\0'; + + if (!need_quotes) + { + free(ret); + ret = NULL; + } + + return ret; +} |