diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
commit | 293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch) | |
tree | fc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /src/backend/utils/adt/oracle_compat.c | |
parent | Initial commit. (diff) | |
download | postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip |
Adding upstream version 16.2.upstream/16.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/utils/adt/oracle_compat.c')
-rw-r--r-- | src/backend/utils/adt/oracle_compat.c | 1157 |
1 files changed, 1157 insertions, 0 deletions
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c new file mode 100644 index 0000000..3b5b794 --- /dev/null +++ b/src/backend/utils/adt/oracle_compat.c @@ -0,0 +1,1157 @@ +/*------------------------------------------------------------------------- + * oracle_compat.c + * Oracle compatible functions. + * + * Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * Author: Edmund Mergl <E.Mergl@bawue.de> + * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org> + * + * + * IDENTIFICATION + * src/backend/utils/adt/oracle_compat.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "common/int.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/formatting.h" +#include "utils/memutils.h" +#include "varatt.h" + + +static text *dotrim(const char *string, int stringlen, + const char *set, int setlen, + bool doltrim, bool dortrim); +static bytea *dobyteatrim(bytea *string, bytea *set, + bool doltrim, bool dortrim); + + +/******************************************************************** + * + * lower + * + * Syntax: + * + * text lower(text string) + * + * Purpose: + * + * Returns string, with all letters forced to lowercase. + * + ********************************************************************/ + +Datum +lower(PG_FUNCTION_ARGS) +{ + text *in_string = PG_GETARG_TEXT_PP(0); + char *out_string; + text *result; + + out_string = str_tolower(VARDATA_ANY(in_string), + VARSIZE_ANY_EXHDR(in_string), + PG_GET_COLLATION()); + result = cstring_to_text(out_string); + pfree(out_string); + + PG_RETURN_TEXT_P(result); +} + + +/******************************************************************** + * + * upper + * + * Syntax: + * + * text upper(text string) + * + * Purpose: + * + * Returns string, with all letters forced to uppercase. + * + ********************************************************************/ + +Datum +upper(PG_FUNCTION_ARGS) +{ + text *in_string = PG_GETARG_TEXT_PP(0); + char *out_string; + text *result; + + out_string = str_toupper(VARDATA_ANY(in_string), + VARSIZE_ANY_EXHDR(in_string), + PG_GET_COLLATION()); + result = cstring_to_text(out_string); + pfree(out_string); + + PG_RETURN_TEXT_P(result); +} + + +/******************************************************************** + * + * initcap + * + * Syntax: + * + * text initcap(text string) + * + * Purpose: + * + * Returns string, with first letter of each word in uppercase, all + * other letters in lowercase. A word is defined as a sequence of + * alphanumeric characters, delimited by non-alphanumeric + * characters. + * + ********************************************************************/ + +Datum +initcap(PG_FUNCTION_ARGS) +{ + text *in_string = PG_GETARG_TEXT_PP(0); + char *out_string; + text *result; + + out_string = str_initcap(VARDATA_ANY(in_string), + VARSIZE_ANY_EXHDR(in_string), + PG_GET_COLLATION()); + result = cstring_to_text(out_string); + pfree(out_string); + + PG_RETURN_TEXT_P(result); +} + + +/******************************************************************** + * + * lpad + * + * Syntax: + * + * text lpad(text string1, int4 len, text string2) + * + * Purpose: + * + * Returns string1, left-padded to length len with the sequence of + * characters in string2. If len is less than the length of string1, + * instead truncate (on the right) to len. + * + ********************************************************************/ + +Datum +lpad(PG_FUNCTION_ARGS) +{ + text *string1 = PG_GETARG_TEXT_PP(0); + int32 len = PG_GETARG_INT32(1); + text *string2 = PG_GETARG_TEXT_PP(2); + text *ret; + char *ptr1, + *ptr2, + *ptr2start, + *ptr2end, + *ptr_ret; + int m, + s1len, + s2len; + int bytelen; + + /* Negative len is silently taken as zero */ + if (len < 0) + len = 0; + + s1len = VARSIZE_ANY_EXHDR(string1); + if (s1len < 0) + s1len = 0; /* shouldn't happen */ + + s2len = VARSIZE_ANY_EXHDR(string2); + if (s2len < 0) + s2len = 0; /* shouldn't happen */ + + s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len); + + if (s1len > len) + s1len = len; /* truncate string1 to len chars */ + + if (s2len <= 0) + len = s1len; /* nothing to pad with, so don't pad */ + + /* compute worst-case output length */ + if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), len, + &bytelen)) || + unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) || + unlikely(!AllocSizeIsValid(bytelen))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested length too large"))); + + ret = (text *) palloc(bytelen); + + m = len - s1len; + + ptr2 = ptr2start = VARDATA_ANY(string2); + ptr2end = ptr2 + s2len; + ptr_ret = VARDATA(ret); + + while (m--) + { + int mlen = pg_mblen(ptr2); + + memcpy(ptr_ret, ptr2, mlen); + ptr_ret += mlen; + ptr2 += mlen; + if (ptr2 == ptr2end) /* wrap around at end of s2 */ + ptr2 = ptr2start; + } + + ptr1 = VARDATA_ANY(string1); + + while (s1len--) + { + int mlen = pg_mblen(ptr1); + + memcpy(ptr_ret, ptr1, mlen); + ptr_ret += mlen; + ptr1 += mlen; + } + + SET_VARSIZE(ret, ptr_ret - (char *) ret); + + PG_RETURN_TEXT_P(ret); +} + + +/******************************************************************** + * + * rpad + * + * Syntax: + * + * text rpad(text string1, int4 len, text string2) + * + * Purpose: + * + * Returns string1, right-padded to length len with the sequence of + * characters in string2. If len is less than the length of string1, + * instead truncate (on the right) to len. + * + ********************************************************************/ + +Datum +rpad(PG_FUNCTION_ARGS) +{ + text *string1 = PG_GETARG_TEXT_PP(0); + int32 len = PG_GETARG_INT32(1); + text *string2 = PG_GETARG_TEXT_PP(2); + text *ret; + char *ptr1, + *ptr2, + *ptr2start, + *ptr2end, + *ptr_ret; + int m, + s1len, + s2len; + int bytelen; + + /* Negative len is silently taken as zero */ + if (len < 0) + len = 0; + + s1len = VARSIZE_ANY_EXHDR(string1); + if (s1len < 0) + s1len = 0; /* shouldn't happen */ + + s2len = VARSIZE_ANY_EXHDR(string2); + if (s2len < 0) + s2len = 0; /* shouldn't happen */ + + s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len); + + if (s1len > len) + s1len = len; /* truncate string1 to len chars */ + + if (s2len <= 0) + len = s1len; /* nothing to pad with, so don't pad */ + + /* compute worst-case output length */ + if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), len, + &bytelen)) || + unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) || + unlikely(!AllocSizeIsValid(bytelen))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested length too large"))); + + ret = (text *) palloc(bytelen); + + m = len - s1len; + + ptr1 = VARDATA_ANY(string1); + ptr_ret = VARDATA(ret); + + while (s1len--) + { + int mlen = pg_mblen(ptr1); + + memcpy(ptr_ret, ptr1, mlen); + ptr_ret += mlen; + ptr1 += mlen; + } + + ptr2 = ptr2start = VARDATA_ANY(string2); + ptr2end = ptr2 + s2len; + + while (m--) + { + int mlen = pg_mblen(ptr2); + + memcpy(ptr_ret, ptr2, mlen); + ptr_ret += mlen; + ptr2 += mlen; + if (ptr2 == ptr2end) /* wrap around at end of s2 */ + ptr2 = ptr2start; + } + + SET_VARSIZE(ret, ptr_ret - (char *) ret); + + PG_RETURN_TEXT_P(ret); +} + + +/******************************************************************** + * + * btrim + * + * Syntax: + * + * text btrim(text string, text set) + * + * Purpose: + * + * Returns string with characters removed from the front and back + * up to the first character not in set. + * + ********************************************************************/ + +Datum +btrim(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *set = PG_GETARG_TEXT_PP(1); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), + true, true); + + PG_RETURN_TEXT_P(ret); +} + +/******************************************************************** + * + * btrim1 --- btrim with set fixed as ' ' + * + ********************************************************************/ + +Datum +btrim1(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + " ", 1, + true, true); + + PG_RETURN_TEXT_P(ret); +} + +/* + * Common implementation for btrim, ltrim, rtrim + */ +static text * +dotrim(const char *string, int stringlen, + const char *set, int setlen, + bool doltrim, bool dortrim) +{ + int i; + + /* Nothing to do if either string or set is empty */ + if (stringlen > 0 && setlen > 0) + { + if (pg_database_encoding_max_length() > 1) + { + /* + * In the multibyte-encoding case, build arrays of pointers to + * character starts, so that we can avoid inefficient checks in + * the inner loops. + */ + const char **stringchars; + const char **setchars; + int *stringmblen; + int *setmblen; + int stringnchars; + int setnchars; + int resultndx; + int resultnchars; + const char *p; + int len; + int mblen; + const char *str_pos; + int str_len; + + stringchars = (const char **) palloc(stringlen * sizeof(char *)); + stringmblen = (int *) palloc(stringlen * sizeof(int)); + stringnchars = 0; + p = string; + len = stringlen; + while (len > 0) + { + stringchars[stringnchars] = p; + stringmblen[stringnchars] = mblen = pg_mblen(p); + stringnchars++; + p += mblen; + len -= mblen; + } + + setchars = (const char **) palloc(setlen * sizeof(char *)); + setmblen = (int *) palloc(setlen * sizeof(int)); + setnchars = 0; + p = set; + len = setlen; + while (len > 0) + { + setchars[setnchars] = p; + setmblen[setnchars] = mblen = pg_mblen(p); + setnchars++; + p += mblen; + len -= mblen; + } + + resultndx = 0; /* index in stringchars[] */ + resultnchars = stringnchars; + + if (doltrim) + { + while (resultnchars > 0) + { + str_pos = stringchars[resultndx]; + str_len = stringmblen[resultndx]; + for (i = 0; i < setnchars; i++) + { + if (str_len == setmblen[i] && + memcmp(str_pos, setchars[i], str_len) == 0) + break; + } + if (i >= setnchars) + break; /* no match here */ + string += str_len; + stringlen -= str_len; + resultndx++; + resultnchars--; + } + } + + if (dortrim) + { + while (resultnchars > 0) + { + str_pos = stringchars[resultndx + resultnchars - 1]; + str_len = stringmblen[resultndx + resultnchars - 1]; + for (i = 0; i < setnchars; i++) + { + if (str_len == setmblen[i] && + memcmp(str_pos, setchars[i], str_len) == 0) + break; + } + if (i >= setnchars) + break; /* no match here */ + stringlen -= str_len; + resultnchars--; + } + } + + pfree(stringchars); + pfree(stringmblen); + pfree(setchars); + pfree(setmblen); + } + else + { + /* + * In the single-byte-encoding case, we don't need such overhead. + */ + if (doltrim) + { + while (stringlen > 0) + { + char str_ch = *string; + + for (i = 0; i < setlen; i++) + { + if (str_ch == set[i]) + break; + } + if (i >= setlen) + break; /* no match here */ + string++; + stringlen--; + } + } + + if (dortrim) + { + while (stringlen > 0) + { + char str_ch = string[stringlen - 1]; + + for (i = 0; i < setlen; i++) + { + if (str_ch == set[i]) + break; + } + if (i >= setlen) + break; /* no match here */ + stringlen--; + } + } + } + } + + /* Return selected portion of string */ + return cstring_to_text_with_len(string, stringlen); +} + +/* + * Common implementation for bytea versions of btrim, ltrim, rtrim + */ +bytea * +dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim) +{ + bytea *ret; + char *ptr, + *end, + *ptr2, + *ptr2start, + *end2; + int m, + stringlen, + setlen; + + stringlen = VARSIZE_ANY_EXHDR(string); + setlen = VARSIZE_ANY_EXHDR(set); + + if (stringlen <= 0 || setlen <= 0) + return string; + + m = stringlen; + ptr = VARDATA_ANY(string); + end = ptr + stringlen - 1; + ptr2start = VARDATA_ANY(set); + end2 = ptr2start + setlen - 1; + + if (doltrim) + { + while (m > 0) + { + ptr2 = ptr2start; + while (ptr2 <= end2) + { + if (*ptr == *ptr2) + break; + ++ptr2; + } + if (ptr2 > end2) + break; + ptr++; + m--; + } + } + + if (dortrim) + { + while (m > 0) + { + ptr2 = ptr2start; + while (ptr2 <= end2) + { + if (*end == *ptr2) + break; + ++ptr2; + } + if (ptr2 > end2) + break; + end--; + m--; + } + } + + ret = (bytea *) palloc(VARHDRSZ + m); + SET_VARSIZE(ret, VARHDRSZ + m); + memcpy(VARDATA(ret), ptr, m); + return ret; +} + +/******************************************************************** + * + * byteatrim + * + * Syntax: + * + * bytea byteatrim(bytea string, bytea set) + * + * Purpose: + * + * Returns string with characters removed from the front and back + * up to the first character not in set. + * + * Cloned from btrim and modified as required. + ********************************************************************/ + +Datum +byteatrim(PG_FUNCTION_ARGS) +{ + bytea *string = PG_GETARG_BYTEA_PP(0); + bytea *set = PG_GETARG_BYTEA_PP(1); + bytea *ret; + + ret = dobyteatrim(string, set, true, true); + + PG_RETURN_BYTEA_P(ret); +} + +/******************************************************************** + * + * bytealtrim + * + * Syntax: + * + * bytea bytealtrim(bytea string, bytea set) + * + * Purpose: + * + * Returns string with initial characters removed up to the first + * character not in set. + * + ********************************************************************/ + +Datum +bytealtrim(PG_FUNCTION_ARGS) +{ + bytea *string = PG_GETARG_BYTEA_PP(0); + bytea *set = PG_GETARG_BYTEA_PP(1); + bytea *ret; + + ret = dobyteatrim(string, set, true, false); + + PG_RETURN_BYTEA_P(ret); +} + +/******************************************************************** + * + * byteartrim + * + * Syntax: + * + * bytea byteartrim(bytea string, bytea set) + * + * Purpose: + * + * Returns string with final characters removed after the last + * character not in set. + * + ********************************************************************/ + +Datum +byteartrim(PG_FUNCTION_ARGS) +{ + bytea *string = PG_GETARG_BYTEA_PP(0); + bytea *set = PG_GETARG_BYTEA_PP(1); + bytea *ret; + + ret = dobyteatrim(string, set, false, true); + + PG_RETURN_BYTEA_P(ret); +} + +/******************************************************************** + * + * ltrim + * + * Syntax: + * + * text ltrim(text string, text set) + * + * Purpose: + * + * Returns string with initial characters removed up to the first + * character not in set. + * + ********************************************************************/ + +Datum +ltrim(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *set = PG_GETARG_TEXT_PP(1); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), + true, false); + + PG_RETURN_TEXT_P(ret); +} + +/******************************************************************** + * + * ltrim1 --- ltrim with set fixed as ' ' + * + ********************************************************************/ + +Datum +ltrim1(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + " ", 1, + true, false); + + PG_RETURN_TEXT_P(ret); +} + +/******************************************************************** + * + * rtrim + * + * Syntax: + * + * text rtrim(text string, text set) + * + * Purpose: + * + * Returns string with final characters removed after the last + * character not in set. + * + ********************************************************************/ + +Datum +rtrim(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *set = PG_GETARG_TEXT_PP(1); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), + false, true); + + PG_RETURN_TEXT_P(ret); +} + +/******************************************************************** + * + * rtrim1 --- rtrim with set fixed as ' ' + * + ********************************************************************/ + +Datum +rtrim1(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *ret; + + ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), + " ", 1, + false, true); + + PG_RETURN_TEXT_P(ret); +} + + +/******************************************************************** + * + * translate + * + * Syntax: + * + * text translate(text string, text from, text to) + * + * Purpose: + * + * Returns string after replacing all occurrences of characters in from + * with the corresponding character in to. If from is longer than to, + * occurrences of the extra characters in from are deleted. + * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>. + * + ********************************************************************/ + +Datum +translate(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + text *from = PG_GETARG_TEXT_PP(1); + text *to = PG_GETARG_TEXT_PP(2); + text *result; + char *from_ptr, + *to_ptr, + *to_end; + char *source, + *target; + int m, + fromlen, + tolen, + retlen, + i; + int bytelen; + int len; + int source_len; + int from_index; + + m = VARSIZE_ANY_EXHDR(string); + if (m <= 0) + PG_RETURN_TEXT_P(string); + source = VARDATA_ANY(string); + + fromlen = VARSIZE_ANY_EXHDR(from); + from_ptr = VARDATA_ANY(from); + tolen = VARSIZE_ANY_EXHDR(to); + to_ptr = VARDATA_ANY(to); + to_end = to_ptr + tolen; + + /* + * The worst-case expansion is to substitute a max-length character for a + * single-byte character at each position of the string. + */ + if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), m, + &bytelen)) || + unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) || + unlikely(!AllocSizeIsValid(bytelen))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested length too large"))); + + result = (text *) palloc(bytelen); + + target = VARDATA(result); + retlen = 0; + + while (m > 0) + { + source_len = pg_mblen(source); + from_index = 0; + + for (i = 0; i < fromlen; i += len) + { + len = pg_mblen(&from_ptr[i]); + if (len == source_len && + memcmp(source, &from_ptr[i], len) == 0) + break; + + from_index++; + } + if (i < fromlen) + { + /* substitute, or delete if no corresponding "to" character */ + char *p = to_ptr; + + for (i = 0; i < from_index; i++) + { + if (p >= to_end) + break; + p += pg_mblen(p); + } + if (p < to_end) + { + len = pg_mblen(p); + memcpy(target, p, len); + target += len; + retlen += len; + } + } + else + { + /* no match, so copy */ + memcpy(target, source, source_len); + target += source_len; + retlen += source_len; + } + + source += source_len; + m -= source_len; + } + + SET_VARSIZE(result, retlen + VARHDRSZ); + + /* + * The function result is probably much bigger than needed, if we're using + * a multibyte encoding, but it's not worth reallocating it; the result + * probably won't live long anyway. + */ + + PG_RETURN_TEXT_P(result); +} + +/******************************************************************** + * + * ascii + * + * Syntax: + * + * int ascii(text string) + * + * Purpose: + * + * Returns the decimal representation of the first character from + * string. + * If the string is empty we return 0. + * If the database encoding is UTF8, we return the Unicode codepoint. + * If the database encoding is any other multi-byte encoding, we + * return the value of the first byte if it is an ASCII character + * (range 1 .. 127), or raise an error. + * For all other encodings we return the value of the first byte, + * (range 1..255). + * + ********************************************************************/ + +Datum +ascii(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + int encoding = GetDatabaseEncoding(); + unsigned char *data; + + if (VARSIZE_ANY_EXHDR(string) <= 0) + PG_RETURN_INT32(0); + + data = (unsigned char *) VARDATA_ANY(string); + + if (encoding == PG_UTF8 && *data > 127) + { + /* return the code point for Unicode */ + + int result = 0, + tbytes = 0, + i; + + if (*data >= 0xF0) + { + result = *data & 0x07; + tbytes = 3; + } + else if (*data >= 0xE0) + { + result = *data & 0x0F; + tbytes = 2; + } + else + { + Assert(*data > 0xC0); + result = *data & 0x1f; + tbytes = 1; + } + + Assert(tbytes > 0); + + for (i = 1; i <= tbytes; i++) + { + Assert((data[i] & 0xC0) == 0x80); + result = (result << 6) + (data[i] & 0x3f); + } + + PG_RETURN_INT32(result); + } + else + { + if (pg_encoding_max_length(encoding) > 1 && *data > 127) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested character too large"))); + + + PG_RETURN_INT32((int32) *data); + } +} + +/******************************************************************** + * + * chr + * + * Syntax: + * + * text chr(int val) + * + * Purpose: + * + * Returns the character having the binary equivalent to val. + * + * For UTF8 we treat the argument as a Unicode code point. + * For other multi-byte encodings we raise an error for arguments + * outside the strict ASCII range (1..127). + * + * It's important that we don't ever return a value that is not valid + * in the database encoding, so that this doesn't become a way for + * invalid data to enter the database. + * + ********************************************************************/ + +Datum +chr (PG_FUNCTION_ARGS) +{ + int32 arg = PG_GETARG_INT32(0); + uint32 cvalue; + text *result; + int encoding = GetDatabaseEncoding(); + + /* + * Error out on arguments that make no sense or that we can't validly + * represent in the encoding. + */ + if (arg < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("character number must be positive"))); + else if (arg == 0) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("null character not permitted"))); + + cvalue = arg; + + if (encoding == PG_UTF8 && cvalue > 127) + { + /* for Unicode we treat the argument as a code point */ + int bytes; + unsigned char *wch; + + /* + * We only allow valid Unicode code points; per RFC3629 that stops at + * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to + * U+1FFFFF. + */ + if (cvalue > 0x0010ffff) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested character too large for encoding: %u", + cvalue))); + + if (cvalue > 0xffff) + bytes = 4; + else if (cvalue > 0x07ff) + bytes = 3; + else + bytes = 2; + + result = (text *) palloc(VARHDRSZ + bytes); + SET_VARSIZE(result, VARHDRSZ + bytes); + wch = (unsigned char *) VARDATA(result); + + if (bytes == 2) + { + wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F); + wch[1] = 0x80 | (cvalue & 0x3F); + } + else if (bytes == 3) + { + wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F); + wch[1] = 0x80 | ((cvalue >> 6) & 0x3F); + wch[2] = 0x80 | (cvalue & 0x3F); + } + else + { + wch[0] = 0xF0 | ((cvalue >> 18) & 0x07); + wch[1] = 0x80 | ((cvalue >> 12) & 0x3F); + wch[2] = 0x80 | ((cvalue >> 6) & 0x3F); + wch[3] = 0x80 | (cvalue & 0x3F); + } + + /* + * The preceding range check isn't sufficient, because UTF8 excludes + * Unicode "surrogate pair" codes. Make sure what we created is valid + * UTF8. + */ + if (!pg_utf8_islegal(wch, bytes)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested character not valid for encoding: %u", + cvalue))); + } + else + { + bool is_mb; + + is_mb = pg_encoding_max_length(encoding) > 1; + + if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested character too large for encoding: %u", + cvalue))); + + result = (text *) palloc(VARHDRSZ + 1); + SET_VARSIZE(result, VARHDRSZ + 1); + *VARDATA(result) = (char) cvalue; + } + + PG_RETURN_TEXT_P(result); +} + +/******************************************************************** + * + * repeat + * + * Syntax: + * + * text repeat(text string, int val) + * + * Purpose: + * + * Repeat string by val. + * + ********************************************************************/ + +Datum +repeat(PG_FUNCTION_ARGS) +{ + text *string = PG_GETARG_TEXT_PP(0); + int32 count = PG_GETARG_INT32(1); + text *result; + int slen, + tlen; + int i; + char *cp, + *sp; + + if (count < 0) + count = 0; + + slen = VARSIZE_ANY_EXHDR(string); + + if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) || + unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)) || + unlikely(!AllocSizeIsValid(tlen))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("requested length too large"))); + + result = (text *) palloc(tlen); + + SET_VARSIZE(result, tlen); + cp = VARDATA(result); + sp = VARDATA_ANY(string); + for (i = 0; i < count; i++) + { + memcpy(cp, sp, slen); + cp += slen; + CHECK_FOR_INTERRUPTS(); + } + + PG_RETURN_TEXT_P(result); +} |