diff options
Diffstat (limited to 'src/backend/utils/adt/ascii.c')
-rw-r--r-- | src/backend/utils/adt/ascii.c | 199 |
1 files changed, 199 insertions, 0 deletions
diff --git a/src/backend/utils/adt/ascii.c b/src/backend/utils/adt/ascii.c new file mode 100644 index 0000000..b6944d8 --- /dev/null +++ b/src/backend/utils/adt/ascii.c @@ -0,0 +1,199 @@ +/*----------------------------------------------------------------------- + * ascii.c + * The PostgreSQL routine for string to ascii conversion. + * + * Portions Copyright (c) 1999-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/ascii.c + * + *----------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "mb/pg_wchar.h" +#include "utils/ascii.h" +#include "utils/builtins.h" +#include "varatt.h" + +static void pg_to_ascii(unsigned char *src, unsigned char *src_end, + unsigned char *dest, int enc); +static text *encode_to_ascii(text *data, int enc); + + +/* ---------- + * to_ascii + * ---------- + */ +static void +pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc) +{ + unsigned char *x; + const unsigned char *ascii; + int range; + + /* + * relevant start for an encoding + */ +#define RANGE_128 128 +#define RANGE_160 160 + + if (enc == PG_LATIN1) + { + /* + * ISO-8859-1 <range: 160 -- 255> + */ + ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty"; + range = RANGE_160; + } + else if (enc == PG_LATIN2) + { + /* + * ISO-8859-2 <range: 160 -- 255> + */ + ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt."; + range = RANGE_160; + } + else if (enc == PG_LATIN9) + { + /* + * ISO-8859-15 <range: 160 -- 255> + */ + ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty"; + range = RANGE_160; + } + else if (enc == PG_WIN1250) + { + /* + * Window CP1250 <range: 128 -- 255> + */ + ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt "; + range = RANGE_128; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("encoding conversion from %s to ASCII not supported", + pg_encoding_to_char(enc)))); + return; /* keep compiler quiet */ + } + + /* + * Encode + */ + for (x = src; x < src_end; x++) + { + if (*x < 128) + *dest++ = *x; + else if (*x < range) + *dest++ = ' '; /* bogus 128 to 'range' */ + else + *dest++ = ascii[*x - range]; + } +} + +/* ---------- + * encode text + * + * The text datum is overwritten in-place, therefore this coding method + * cannot support conversions that change the string length! + * ---------- + */ +static text * +encode_to_ascii(text *data, int enc) +{ + pg_to_ascii((unsigned char *) VARDATA(data), /* src */ + (unsigned char *) (data) + VARSIZE(data), /* src end */ + (unsigned char *) VARDATA(data), /* dest */ + enc); /* encoding */ + + return data; +} + +/* ---------- + * convert to ASCII - enc is set as 'name' arg. + * ---------- + */ +Datum +to_ascii_encname(PG_FUNCTION_ARGS) +{ + text *data = PG_GETARG_TEXT_P_COPY(0); + char *encname = NameStr(*PG_GETARG_NAME(1)); + int enc = pg_char_to_encoding(encname); + + if (enc < 0) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("%s is not a valid encoding name", encname))); + + PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); +} + +/* ---------- + * convert to ASCII - enc is set as int4 + * ---------- + */ +Datum +to_ascii_enc(PG_FUNCTION_ARGS) +{ + text *data = PG_GETARG_TEXT_P_COPY(0); + int enc = PG_GETARG_INT32(1); + + if (!PG_VALID_ENCODING(enc)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("%d is not a valid encoding code", enc))); + + PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); +} + +/* ---------- + * convert to ASCII - current enc is DatabaseEncoding + * ---------- + */ +Datum +to_ascii_default(PG_FUNCTION_ARGS) +{ + text *data = PG_GETARG_TEXT_P_COPY(0); + int enc = GetDatabaseEncoding(); + + PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); +} + +/* ---------- + * Copy a string in an arbitrary backend-safe encoding, converting it to a + * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the + * behavior is identical to strlcpy(), except that we don't bother with a + * return value. + * + * This must not trigger ereport(ERROR), as it is called in postmaster. + * ---------- + */ +void +ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz) +{ + if (destsiz == 0) /* corner case: no room for trailing nul */ + return; + + while (--destsiz > 0) + { + /* use unsigned char here to avoid compiler warning */ + unsigned char ch = *src++; + + if (ch == '\0') + break; + /* Keep printable ASCII characters */ + if (32 <= ch && ch <= 127) + *dest = ch; + /* White-space is also OK */ + else if (ch == '\n' || ch == '\r' || ch == '\t') + *dest = ch; + /* Everything else is replaced with '?' */ + else + *dest = '?'; + dest++; + } + + *dest = '\0'; +} |