diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
commit | 293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch) | |
tree | fc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /src/backend/utils/adt/varchar.c | |
parent | Initial commit. (diff) | |
download | postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip |
Adding upstream version 16.2.upstream/16.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/utils/adt/varchar.c')
-rw-r--r-- | src/backend/utils/adt/varchar.c | 1231 |
1 files changed, 1231 insertions, 0 deletions
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c new file mode 100644 index 0000000..b92ff4d --- /dev/null +++ b/src/backend/utils/adt/varchar.c @@ -0,0 +1,1231 @@ +/*------------------------------------------------------------------------- + * + * varchar.c + * Functions for the built-in types char(n) and varchar(n). + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/varchar.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/detoast.h" +#include "access/htup_details.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "libpq/pqformat.h" +#include "mb/pg_wchar.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/pg_locale.h" +#include "utils/varlena.h" + +/* common code for bpchartypmodin and varchartypmodin */ +static int32 +anychar_typmodin(ArrayType *ta, const char *typename) +{ + int32 typmod; + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + /* + * we're not too tense about good error message here because grammar + * shouldn't allow wrong number of modifiers for CHAR + */ + if (n != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid type modifier"))); + + if (*tl < 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("length for type %s must be at least 1", typename))); + if (*tl > MaxAttrSize) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("length for type %s cannot exceed %d", + typename, MaxAttrSize))); + + /* + * For largely historical reasons, the typmod is VARHDRSZ plus the number + * of characters; there is enough client-side code that knows about that + * that we'd better not change it. + */ + typmod = VARHDRSZ + *tl; + + return typmod; +} + +/* common code for bpchartypmodout and varchartypmodout */ +static char * +anychar_typmodout(int32 typmod) +{ + char *res = (char *) palloc(64); + + if (typmod > VARHDRSZ) + snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ)); + else + *res = '\0'; + + return res; +} + + +/* + * CHAR() and VARCHAR() types are part of the SQL standard. CHAR() + * is for blank-padded string whose length is specified in CREATE TABLE. + * VARCHAR is for storing string whose length is at most the length specified + * at CREATE TABLE time. + * + * It's hard to implement these types because we cannot figure out + * the length of the type from the type itself. I changed (hopefully all) the + * fmgr calls that invoke input functions of a data type to supply the + * length also. (eg. in INSERTs, we have the tupleDescriptor which contains + * the length of the attributes and hence the exact length of the char() or + * varchar(). We pass this to bpcharin() or varcharin().) In the case where + * we cannot determine the length, we pass in -1 instead and the input + * converter does not enforce any length check. + * + * We actually implement this as a varlena so that we don't have to pass in + * the length for the comparison functions. (The difference between these + * types and "text" is that we truncate and possibly blank-pad the string + * at insertion time.) + * + * - ay 6/95 + */ + + +/***************************************************************************** + * bpchar - char() * + *****************************************************************************/ + +/* + * bpchar_input -- common guts of bpcharin and bpcharrecv + * + * s is the input text of length len (may not be null-terminated) + * atttypmod is the typmod value to apply + * + * Note that atttypmod is measured in characters, which + * is not necessarily the same as the number of bytes. + * + * If the input string is too long, raise an error, unless the extra + * characters are spaces, in which case they're truncated. (per SQL) + * + * If escontext points to an ErrorSaveContext node, that is filled instead + * of throwing an error; the caller must check SOFT_ERROR_OCCURRED() + * to detect errors. + */ +static BpChar * +bpchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext) +{ + BpChar *result; + char *r; + size_t maxlen; + + /* If typmod is -1 (or invalid), use the actual string length */ + if (atttypmod < (int32) VARHDRSZ) + maxlen = len; + else + { + size_t charlen; /* number of CHARACTERS in the input */ + + maxlen = atttypmod - VARHDRSZ; + charlen = pg_mbstrlen_with_len(s, len); + if (charlen > maxlen) + { + /* Verify that extra characters are spaces, and clip them off */ + size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen); + size_t j; + + /* + * at this point, len is the actual BYTE length of the input + * string, maxlen is the max number of CHARACTERS allowed for this + * bpchar type, mbmaxlen is the length in BYTES of those chars. + */ + for (j = mbmaxlen; j < len; j++) + { + if (s[j] != ' ') + ereturn(escontext, NULL, + (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), + errmsg("value too long for type character(%d)", + (int) maxlen))); + } + + /* + * Now we set maxlen to the necessary byte length, not the number + * of CHARACTERS! + */ + maxlen = len = mbmaxlen; + } + else + { + /* + * Now we set maxlen to the necessary byte length, not the number + * of CHARACTERS! + */ + maxlen = len + (maxlen - charlen); + } + } + + result = (BpChar *) palloc(maxlen + VARHDRSZ); + SET_VARSIZE(result, maxlen + VARHDRSZ); + r = VARDATA(result); + memcpy(r, s, len); + + /* blank pad the string if necessary */ + if (maxlen > len) + memset(r + len, ' ', maxlen - len); + + return result; +} + +/* + * Convert a C string to CHARACTER internal representation. atttypmod + * is the declared length of the type plus VARHDRSZ. + */ +Datum +bpcharin(PG_FUNCTION_ARGS) +{ + char *s = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + BpChar *result; + + result = bpchar_input(s, strlen(s), atttypmod, fcinfo->context); + PG_RETURN_BPCHAR_P(result); +} + + +/* + * Convert a CHARACTER value to a C string. + * + * Uses the text conversion functions, which is only appropriate if BpChar + * and text are equivalent types. + */ +Datum +bpcharout(PG_FUNCTION_ARGS) +{ + Datum txt = PG_GETARG_DATUM(0); + + PG_RETURN_CSTRING(TextDatumGetCString(txt)); +} + +/* + * bpcharrecv - converts external binary format to bpchar + */ +Datum +bpcharrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + BpChar *result; + char *str; + int nbytes; + + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + result = bpchar_input(str, nbytes, atttypmod, NULL); + pfree(str); + PG_RETURN_BPCHAR_P(result); +} + +/* + * bpcharsend - converts bpchar to binary format + */ +Datum +bpcharsend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as textsend, so share code */ + return textsend(fcinfo); +} + + +/* + * Converts a CHARACTER type to the specified size. + * + * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes. + * isExplicit is true if this is for an explicit cast to char(N). + * + * Truncation rules: for an explicit cast, silently truncate to the given + * length; for an implicit cast, raise error unless extra characters are + * all spaces. (This is sort-of per SQL: the spec would actually have us + * raise a "completion condition" for the explicit cast case, but Postgres + * hasn't got such a concept.) + */ +Datum +bpchar(PG_FUNCTION_ARGS) +{ + BpChar *source = PG_GETARG_BPCHAR_PP(0); + int32 maxlen = PG_GETARG_INT32(1); + bool isExplicit = PG_GETARG_BOOL(2); + BpChar *result; + int32 len; + char *r; + char *s; + int i; + int charlen; /* number of characters in the input string + + * VARHDRSZ */ + + /* No work if typmod is invalid */ + if (maxlen < (int32) VARHDRSZ) + PG_RETURN_BPCHAR_P(source); + + maxlen -= VARHDRSZ; + + len = VARSIZE_ANY_EXHDR(source); + s = VARDATA_ANY(source); + + charlen = pg_mbstrlen_with_len(s, len); + + /* No work if supplied data matches typmod already */ + if (charlen == maxlen) + PG_RETURN_BPCHAR_P(source); + + if (charlen > maxlen) + { + /* Verify that extra characters are spaces, and clip them off */ + size_t maxmblen; + + maxmblen = pg_mbcharcliplen(s, len, maxlen); + + if (!isExplicit) + { + for (i = maxmblen; i < len; i++) + if (s[i] != ' ') + ereport(ERROR, + (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), + errmsg("value too long for type character(%d)", + maxlen))); + } + + len = maxmblen; + + /* + * At this point, maxlen is the necessary byte length, not the number + * of CHARACTERS! + */ + maxlen = len; + } + else + { + /* + * At this point, maxlen is the necessary byte length, not the number + * of CHARACTERS! + */ + maxlen = len + (maxlen - charlen); + } + + Assert(maxlen >= len); + + result = palloc(maxlen + VARHDRSZ); + SET_VARSIZE(result, maxlen + VARHDRSZ); + r = VARDATA(result); + + memcpy(r, s, len); + + /* blank pad the string if necessary */ + if (maxlen > len) + memset(r + len, ' ', maxlen - len); + + PG_RETURN_BPCHAR_P(result); +} + + +/* char_bpchar() + * Convert char to bpchar(1). + */ +Datum +char_bpchar(PG_FUNCTION_ARGS) +{ + char c = PG_GETARG_CHAR(0); + BpChar *result; + + result = (BpChar *) palloc(VARHDRSZ + 1); + + SET_VARSIZE(result, VARHDRSZ + 1); + *(VARDATA(result)) = c; + + PG_RETURN_BPCHAR_P(result); +} + + +/* bpchar_name() + * Converts a bpchar() type to a NameData type. + */ +Datum +bpchar_name(PG_FUNCTION_ARGS) +{ + BpChar *s = PG_GETARG_BPCHAR_PP(0); + char *s_data; + Name result; + int len; + + len = VARSIZE_ANY_EXHDR(s); + s_data = VARDATA_ANY(s); + + /* Truncate oversize input */ + if (len >= NAMEDATALEN) + len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1); + + /* Remove trailing blanks */ + while (len > 0) + { + if (s_data[len - 1] != ' ') + break; + len--; + } + + /* We use palloc0 here to ensure result is zero-padded */ + result = (Name) palloc0(NAMEDATALEN); + memcpy(NameStr(*result), s_data, len); + + PG_RETURN_NAME(result); +} + +/* name_bpchar() + * Converts a NameData type to a bpchar type. + * + * Uses the text conversion functions, which is only appropriate if BpChar + * and text are equivalent types. + */ +Datum +name_bpchar(PG_FUNCTION_ARGS) +{ + Name s = PG_GETARG_NAME(0); + BpChar *result; + + result = (BpChar *) cstring_to_text(NameStr(*s)); + PG_RETURN_BPCHAR_P(result); +} + +Datum +bpchartypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + + PG_RETURN_INT32(anychar_typmodin(ta, "char")); +} + +Datum +bpchartypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + + PG_RETURN_CSTRING(anychar_typmodout(typmod)); +} + + +/***************************************************************************** + * varchar - varchar(n) + * + * Note: varchar piggybacks on type text for most operations, and so has no + * C-coded functions except for I/O and typmod checking. + *****************************************************************************/ + +/* + * varchar_input -- common guts of varcharin and varcharrecv + * + * s is the input text of length len (may not be null-terminated) + * atttypmod is the typmod value to apply + * + * Note that atttypmod is measured in characters, which + * is not necessarily the same as the number of bytes. + * + * If the input string is too long, raise an error, unless the extra + * characters are spaces, in which case they're truncated. (per SQL) + * + * If escontext points to an ErrorSaveContext node, that is filled instead + * of throwing an error; the caller must check SOFT_ERROR_OCCURRED() + * to detect errors. + */ +static VarChar * +varchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext) +{ + VarChar *result; + size_t maxlen; + + maxlen = atttypmod - VARHDRSZ; + + if (atttypmod >= (int32) VARHDRSZ && len > maxlen) + { + /* Verify that extra characters are spaces, and clip them off */ + size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen); + size_t j; + + for (j = mbmaxlen; j < len; j++) + { + if (s[j] != ' ') + ereturn(escontext, NULL, + (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), + errmsg("value too long for type character varying(%d)", + (int) maxlen))); + } + + len = mbmaxlen; + } + + /* + * We can use cstring_to_text_with_len because VarChar and text are + * binary-compatible types. + */ + result = (VarChar *) cstring_to_text_with_len(s, len); + return result; +} + +/* + * Convert a C string to VARCHAR internal representation. atttypmod + * is the declared length of the type plus VARHDRSZ. + */ +Datum +varcharin(PG_FUNCTION_ARGS) +{ + char *s = PG_GETARG_CSTRING(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + VarChar *result; + + result = varchar_input(s, strlen(s), atttypmod, fcinfo->context); + PG_RETURN_VARCHAR_P(result); +} + + +/* + * Convert a VARCHAR value to a C string. + * + * Uses the text to C string conversion function, which is only appropriate + * if VarChar and text are equivalent types. + */ +Datum +varcharout(PG_FUNCTION_ARGS) +{ + Datum txt = PG_GETARG_DATUM(0); + + PG_RETURN_CSTRING(TextDatumGetCString(txt)); +} + +/* + * varcharrecv - converts external binary format to varchar + */ +Datum +varcharrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); +#ifdef NOT_USED + Oid typelem = PG_GETARG_OID(1); +#endif + int32 atttypmod = PG_GETARG_INT32(2); + VarChar *result; + char *str; + int nbytes; + + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + result = varchar_input(str, nbytes, atttypmod, NULL); + pfree(str); + PG_RETURN_VARCHAR_P(result); +} + +/* + * varcharsend - converts varchar to binary format + */ +Datum +varcharsend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as textsend, so share code */ + return textsend(fcinfo); +} + + +/* + * varchar_support() + * + * Planner support function for the varchar() length coercion function. + * + * Currently, the only interesting thing we can do is flatten calls that set + * the new maximum length >= the previous maximum length. We can ignore the + * isExplicit argument, since that only affects truncation cases. + */ +Datum +varchar_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestSimplify)) + { + SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; + FuncExpr *expr = req->fcall; + Node *typmod; + + Assert(list_length(expr->args) >= 2); + + typmod = (Node *) lsecond(expr->args); + + if (IsA(typmod, Const) && !((Const *) typmod)->constisnull) + { + Node *source = (Node *) linitial(expr->args); + int32 old_typmod = exprTypmod(source); + int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue); + int32 old_max = old_typmod - VARHDRSZ; + int32 new_max = new_typmod - VARHDRSZ; + + if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max)) + ret = relabel_to_typmod(source, new_typmod); + } + } + + PG_RETURN_POINTER(ret); +} + +/* + * Converts a VARCHAR type to the specified size. + * + * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes. + * isExplicit is true if this is for an explicit cast to varchar(N). + * + * Truncation rules: for an explicit cast, silently truncate to the given + * length; for an implicit cast, raise error unless extra characters are + * all spaces. (This is sort-of per SQL: the spec would actually have us + * raise a "completion condition" for the explicit cast case, but Postgres + * hasn't got such a concept.) + */ +Datum +varchar(PG_FUNCTION_ARGS) +{ + VarChar *source = PG_GETARG_VARCHAR_PP(0); + int32 typmod = PG_GETARG_INT32(1); + bool isExplicit = PG_GETARG_BOOL(2); + int32 len, + maxlen; + size_t maxmblen; + int i; + char *s_data; + + len = VARSIZE_ANY_EXHDR(source); + s_data = VARDATA_ANY(source); + maxlen = typmod - VARHDRSZ; + + /* No work if typmod is invalid or supplied data fits it already */ + if (maxlen < 0 || len <= maxlen) + PG_RETURN_VARCHAR_P(source); + + /* only reach here if string is too long... */ + + /* truncate multibyte string preserving multibyte boundary */ + maxmblen = pg_mbcharcliplen(s_data, len, maxlen); + + if (!isExplicit) + { + for (i = maxmblen; i < len; i++) + if (s_data[i] != ' ') + ereport(ERROR, + (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), + errmsg("value too long for type character varying(%d)", + maxlen))); + } + + PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data, + maxmblen)); +} + +Datum +varchartypmodin(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + + PG_RETURN_INT32(anychar_typmodin(ta, "varchar")); +} + +Datum +varchartypmodout(PG_FUNCTION_ARGS) +{ + int32 typmod = PG_GETARG_INT32(0); + + PG_RETURN_CSTRING(anychar_typmodout(typmod)); +} + + +/***************************************************************************** + * Exported functions + *****************************************************************************/ + +/* "True" length (not counting trailing blanks) of a BpChar */ +static inline int +bcTruelen(BpChar *arg) +{ + return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg)); +} + +int +bpchartruelen(char *s, int len) +{ + int i; + + /* + * Note that we rely on the assumption that ' ' is a singleton unit on + * every supported multibyte server encoding. + */ + for (i = len - 1; i >= 0; i--) + { + if (s[i] != ' ') + break; + } + return i + 1; +} + +Datum +bpcharlen(PG_FUNCTION_ARGS) +{ + BpChar *arg = PG_GETARG_BPCHAR_PP(0); + int len; + + /* get number of bytes, ignoring trailing spaces */ + len = bcTruelen(arg); + + /* in multibyte encoding, convert to number of characters */ + if (pg_database_encoding_max_length() != 1) + len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len); + + PG_RETURN_INT32(len); +} + +Datum +bpcharoctetlen(PG_FUNCTION_ARGS) +{ + Datum arg = PG_GETARG_DATUM(0); + + /* We need not detoast the input at all */ + PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ); +} + + +/***************************************************************************** + * Comparison Functions used for bpchar + * + * Note: btree indexes need these routines not to leak memory; therefore, + * be careful to free working copies of toasted datums. Most places don't + * need to be so careful. + *****************************************************************************/ + +static void +check_collation_set(Oid collid) +{ + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string comparison"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } +} + +Datum +bpchareq(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + bool result; + Oid collid = PG_GET_COLLATION(); + bool locale_is_c = false; + pg_locale_t mylocale = 0; + + check_collation_set(collid); + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + if (lc_collate_is_c(collid)) + locale_is_c = true; + else + mylocale = pg_newlocale_from_collation(collid); + + if (locale_is_c || pg_locale_deterministic(mylocale)) + { + /* + * Since we only care about equality or not-equality, we can avoid all + * the expense of strcoll() here, and just do bitwise comparison. + */ + if (len1 != len2) + result = false; + else + result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0); + } + else + { + result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + collid) == 0); + } + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +bpcharne(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + bool result; + Oid collid = PG_GET_COLLATION(); + bool locale_is_c = false; + pg_locale_t mylocale = 0; + + check_collation_set(collid); + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + if (lc_collate_is_c(collid)) + locale_is_c = true; + else + mylocale = pg_newlocale_from_collation(collid); + + if (locale_is_c || pg_locale_deterministic(mylocale)) + { + /* + * Since we only care about equality or not-equality, we can avoid all + * the expense of strcoll() here, and just do bitwise comparison. + */ + if (len1 != len2) + result = true; + else + result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0); + } + else + { + result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + collid) != 0); + } + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); +} + +Datum +bpcharlt(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(cmp < 0); +} + +Datum +bpcharle(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(cmp <= 0); +} + +Datum +bpchargt(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(cmp > 0); +} + +Datum +bpcharge(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(cmp >= 0); +} + +Datum +bpcharcmp(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(cmp); +} + +Datum +bpchar_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + Oid collid = ssup->ssup_collation; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport */ + varstr_sortsupport(ssup, BPCHAROID, collid); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} + +Datum +bpchar_larger(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2); +} + +Datum +bpchar_smaller(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int len1, + len2; + int cmp; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); + + PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2); +} + + +/* + * bpchar needs a specialized hash function because we want to ignore + * trailing blanks in comparisons. + */ +Datum +hashbpchar(PG_FUNCTION_ARGS) +{ + BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); + char *keydata; + int keylen; + pg_locale_t mylocale = 0; + Datum result; + + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + keydata = VARDATA_ANY(key); + keylen = bcTruelen(key); + + if (!lc_collate_is_c(collid)) + mylocale = pg_newlocale_from_collation(collid); + + if (pg_locale_deterministic(mylocale)) + { + result = hash_any((unsigned char *) keydata, keylen); + } + else + { + Size bsize, + rsize; + char *buf; + + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); + buf = palloc(bsize + 1); + + rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); + + /* + * In principle, there's no reason to include the terminating NUL + * character in the hash, but it was done before and the behavior must + * be preserved. + */ + result = hash_any((uint8_t *) buf, bsize + 1); + + pfree(buf); + } + + /* Avoid leaking memory for toasted inputs */ + PG_FREE_IF_COPY(key, 0); + + return result; +} + +Datum +hashbpcharextended(PG_FUNCTION_ARGS) +{ + BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); + char *keydata; + int keylen; + pg_locale_t mylocale = 0; + Datum result; + + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + keydata = VARDATA_ANY(key); + keylen = bcTruelen(key); + + if (!lc_collate_is_c(collid)) + mylocale = pg_newlocale_from_collation(collid); + + if (pg_locale_deterministic(mylocale)) + { + result = hash_any_extended((unsigned char *) keydata, keylen, + PG_GETARG_INT64(1)); + } + else + { + Size bsize, + rsize; + char *buf; + + bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale); + buf = palloc(bsize + 1); + + rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale); + if (rsize != bsize) + elog(ERROR, "pg_strnxfrm() returned unexpected result"); + + /* + * In principle, there's no reason to include the terminating NUL + * character in the hash, but it was done before and the behavior must + * be preserved. + */ + result = hash_any_extended((uint8_t *) buf, bsize + 1, + PG_GETARG_INT64(1)); + + pfree(buf); + } + + PG_FREE_IF_COPY(key, 0); + + return result; +} + +/* + * The following operators support character-by-character comparison + * of bpchar datums, to allow building indexes suitable for LIKE clauses. + * Note that the regular bpchareq/bpcharne comparison operators, and + * regular support functions 1 and 2 with "C" collation are assumed to be + * compatible with these! + */ + +static int +internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2) +{ + int result; + int len1, + len2; + + len1 = bcTruelen(arg1); + len2 = bcTruelen(arg2); + + result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + if (result != 0) + return result; + else if (len1 < len2) + return -1; + else if (len1 > len2) + return 1; + else + return 0; +} + + +Datum +bpchar_pattern_lt(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int result; + + result = internal_bpchar_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result < 0); +} + + +Datum +bpchar_pattern_le(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int result; + + result = internal_bpchar_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result <= 0); +} + + +Datum +bpchar_pattern_ge(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int result; + + result = internal_bpchar_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result >= 0); +} + + +Datum +bpchar_pattern_gt(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int result; + + result = internal_bpchar_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result > 0); +} + + +Datum +btbpchar_pattern_cmp(PG_FUNCTION_ARGS) +{ + BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); + BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); + int result; + + result = internal_bpchar_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(result); +} + + +Datum +btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport, forcing "C" collation */ + varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} |