summaryrefslogtreecommitdiffstats
path: root/src/backend/utils/adt/varchar.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/utils/adt/varchar.c
parentInitial commit. (diff)
downloadpostgresql-14-upstream.tar.xz
postgresql-14-upstream.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/utils/adt/varchar.c')
-rw-r--r--src/backend/utils/adt/varchar.c1224
1 files changed, 1224 insertions, 0 deletions
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
new file mode 100644
index 0000000..8fc8464
--- /dev/null
+++ b/src/backend/utils/adt/varchar.c
@@ -0,0 +1,1224 @@
+/*-------------------------------------------------------------------------
+ *
+ * varchar.c
+ * Functions for the built-in types char(n) and varchar(n).
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/varchar.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/detoast.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
+#include "utils/varlena.h"
+
+/* common code for bpchartypmodin and varchartypmodin */
+static int32
+anychar_typmodin(ArrayType *ta, const char *typename)
+{
+ int32 typmod;
+ int32 *tl;
+ int n;
+
+ tl = ArrayGetIntegerTypmods(ta, &n);
+
+ /*
+ * we're not too tense about good error message here because grammar
+ * shouldn't allow wrong number of modifiers for CHAR
+ */
+ if (n != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid type modifier")));
+
+ if (*tl < 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("length for type %s must be at least 1", typename)));
+ if (*tl > MaxAttrSize)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("length for type %s cannot exceed %d",
+ typename, MaxAttrSize)));
+
+ /*
+ * For largely historical reasons, the typmod is VARHDRSZ plus the number
+ * of characters; there is enough client-side code that knows about that
+ * that we'd better not change it.
+ */
+ typmod = VARHDRSZ + *tl;
+
+ return typmod;
+}
+
+/* common code for bpchartypmodout and varchartypmodout */
+static char *
+anychar_typmodout(int32 typmod)
+{
+ char *res = (char *) palloc(64);
+
+ if (typmod > VARHDRSZ)
+ snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
+ else
+ *res = '\0';
+
+ return res;
+}
+
+
+/*
+ * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
+ * is for blank-padded string whose length is specified in CREATE TABLE.
+ * VARCHAR is for storing string whose length is at most the length specified
+ * at CREATE TABLE time.
+ *
+ * It's hard to implement these types because we cannot figure out
+ * the length of the type from the type itself. I changed (hopefully all) the
+ * fmgr calls that invoke input functions of a data type to supply the
+ * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
+ * the length of the attributes and hence the exact length of the char() or
+ * varchar(). We pass this to bpcharin() or varcharin().) In the case where
+ * we cannot determine the length, we pass in -1 instead and the input
+ * converter does not enforce any length check.
+ *
+ * We actually implement this as a varlena so that we don't have to pass in
+ * the length for the comparison functions. (The difference between these
+ * types and "text" is that we truncate and possibly blank-pad the string
+ * at insertion time.)
+ *
+ * - ay 6/95
+ */
+
+
+/*****************************************************************************
+ * bpchar - char() *
+ *****************************************************************************/
+
+/*
+ * bpchar_input -- common guts of bpcharin and bpcharrecv
+ *
+ * s is the input text of length len (may not be null-terminated)
+ * atttypmod is the typmod value to apply
+ *
+ * Note that atttypmod is measured in characters, which
+ * is not necessarily the same as the number of bytes.
+ *
+ * If the input string is too long, raise an error, unless the extra
+ * characters are spaces, in which case they're truncated. (per SQL)
+ */
+static BpChar *
+bpchar_input(const char *s, size_t len, int32 atttypmod)
+{
+ BpChar *result;
+ char *r;
+ size_t maxlen;
+
+ /* If typmod is -1 (or invalid), use the actual string length */
+ if (atttypmod < (int32) VARHDRSZ)
+ maxlen = len;
+ else
+ {
+ size_t charlen; /* number of CHARACTERS in the input */
+
+ maxlen = atttypmod - VARHDRSZ;
+ charlen = pg_mbstrlen_with_len(s, len);
+ if (charlen > maxlen)
+ {
+ /* Verify that extra characters are spaces, and clip them off */
+ size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
+ size_t j;
+
+ /*
+ * at this point, len is the actual BYTE length of the input
+ * string, maxlen is the max number of CHARACTERS allowed for this
+ * bpchar type, mbmaxlen is the length in BYTES of those chars.
+ */
+ for (j = mbmaxlen; j < len; j++)
+ {
+ if (s[j] != ' ')
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
+ errmsg("value too long for type character(%d)",
+ (int) maxlen)));
+ }
+
+ /*
+ * Now we set maxlen to the necessary byte length, not the number
+ * of CHARACTERS!
+ */
+ maxlen = len = mbmaxlen;
+ }
+ else
+ {
+ /*
+ * Now we set maxlen to the necessary byte length, not the number
+ * of CHARACTERS!
+ */
+ maxlen = len + (maxlen - charlen);
+ }
+ }
+
+ result = (BpChar *) palloc(maxlen + VARHDRSZ);
+ SET_VARSIZE(result, maxlen + VARHDRSZ);
+ r = VARDATA(result);
+ memcpy(r, s, len);
+
+ /* blank pad the string if necessary */
+ if (maxlen > len)
+ memset(r + len, ' ', maxlen - len);
+
+ return result;
+}
+
+/*
+ * Convert a C string to CHARACTER internal representation. atttypmod
+ * is the declared length of the type plus VARHDRSZ.
+ */
+Datum
+bpcharin(PG_FUNCTION_ARGS)
+{
+ char *s = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ BpChar *result;
+
+ result = bpchar_input(s, strlen(s), atttypmod);
+ PG_RETURN_BPCHAR_P(result);
+}
+
+
+/*
+ * Convert a CHARACTER value to a C string.
+ *
+ * Uses the text conversion functions, which is only appropriate if BpChar
+ * and text are equivalent types.
+ */
+Datum
+bpcharout(PG_FUNCTION_ARGS)
+{
+ Datum txt = PG_GETARG_DATUM(0);
+
+ PG_RETURN_CSTRING(TextDatumGetCString(txt));
+}
+
+/*
+ * bpcharrecv - converts external binary format to bpchar
+ */
+Datum
+bpcharrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ BpChar *result;
+ char *str;
+ int nbytes;
+
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+ result = bpchar_input(str, nbytes, atttypmod);
+ pfree(str);
+ PG_RETURN_BPCHAR_P(result);
+}
+
+/*
+ * bpcharsend - converts bpchar to binary format
+ */
+Datum
+bpcharsend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as textsend, so share code */
+ return textsend(fcinfo);
+}
+
+
+/*
+ * Converts a CHARACTER type to the specified size.
+ *
+ * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
+ * isExplicit is true if this is for an explicit cast to char(N).
+ *
+ * Truncation rules: for an explicit cast, silently truncate to the given
+ * length; for an implicit cast, raise error unless extra characters are
+ * all spaces. (This is sort-of per SQL: the spec would actually have us
+ * raise a "completion condition" for the explicit cast case, but Postgres
+ * hasn't got such a concept.)
+ */
+Datum
+bpchar(PG_FUNCTION_ARGS)
+{
+ BpChar *source = PG_GETARG_BPCHAR_PP(0);
+ int32 maxlen = PG_GETARG_INT32(1);
+ bool isExplicit = PG_GETARG_BOOL(2);
+ BpChar *result;
+ int32 len;
+ char *r;
+ char *s;
+ int i;
+ int charlen; /* number of characters in the input string +
+ * VARHDRSZ */
+
+ /* No work if typmod is invalid */
+ if (maxlen < (int32) VARHDRSZ)
+ PG_RETURN_BPCHAR_P(source);
+
+ maxlen -= VARHDRSZ;
+
+ len = VARSIZE_ANY_EXHDR(source);
+ s = VARDATA_ANY(source);
+
+ charlen = pg_mbstrlen_with_len(s, len);
+
+ /* No work if supplied data matches typmod already */
+ if (charlen == maxlen)
+ PG_RETURN_BPCHAR_P(source);
+
+ if (charlen > maxlen)
+ {
+ /* Verify that extra characters are spaces, and clip them off */
+ size_t maxmblen;
+
+ maxmblen = pg_mbcharcliplen(s, len, maxlen);
+
+ if (!isExplicit)
+ {
+ for (i = maxmblen; i < len; i++)
+ if (s[i] != ' ')
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
+ errmsg("value too long for type character(%d)",
+ maxlen)));
+ }
+
+ len = maxmblen;
+
+ /*
+ * At this point, maxlen is the necessary byte length, not the number
+ * of CHARACTERS!
+ */
+ maxlen = len;
+ }
+ else
+ {
+ /*
+ * At this point, maxlen is the necessary byte length, not the number
+ * of CHARACTERS!
+ */
+ maxlen = len + (maxlen - charlen);
+ }
+
+ Assert(maxlen >= len);
+
+ result = palloc(maxlen + VARHDRSZ);
+ SET_VARSIZE(result, maxlen + VARHDRSZ);
+ r = VARDATA(result);
+
+ memcpy(r, s, len);
+
+ /* blank pad the string if necessary */
+ if (maxlen > len)
+ memset(r + len, ' ', maxlen - len);
+
+ PG_RETURN_BPCHAR_P(result);
+}
+
+
+/* char_bpchar()
+ * Convert char to bpchar(1).
+ */
+Datum
+char_bpchar(PG_FUNCTION_ARGS)
+{
+ char c = PG_GETARG_CHAR(0);
+ BpChar *result;
+
+ result = (BpChar *) palloc(VARHDRSZ + 1);
+
+ SET_VARSIZE(result, VARHDRSZ + 1);
+ *(VARDATA(result)) = c;
+
+ PG_RETURN_BPCHAR_P(result);
+}
+
+
+/* bpchar_name()
+ * Converts a bpchar() type to a NameData type.
+ */
+Datum
+bpchar_name(PG_FUNCTION_ARGS)
+{
+ BpChar *s = PG_GETARG_BPCHAR_PP(0);
+ char *s_data;
+ Name result;
+ int len;
+
+ len = VARSIZE_ANY_EXHDR(s);
+ s_data = VARDATA_ANY(s);
+
+ /* Truncate oversize input */
+ if (len >= NAMEDATALEN)
+ len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
+
+ /* Remove trailing blanks */
+ while (len > 0)
+ {
+ if (s_data[len - 1] != ' ')
+ break;
+ len--;
+ }
+
+ /* We use palloc0 here to ensure result is zero-padded */
+ result = (Name) palloc0(NAMEDATALEN);
+ memcpy(NameStr(*result), s_data, len);
+
+ PG_RETURN_NAME(result);
+}
+
+/* name_bpchar()
+ * Converts a NameData type to a bpchar type.
+ *
+ * Uses the text conversion functions, which is only appropriate if BpChar
+ * and text are equivalent types.
+ */
+Datum
+name_bpchar(PG_FUNCTION_ARGS)
+{
+ Name s = PG_GETARG_NAME(0);
+ BpChar *result;
+
+ result = (BpChar *) cstring_to_text(NameStr(*s));
+ PG_RETURN_BPCHAR_P(result);
+}
+
+Datum
+bpchartypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_INT32(anychar_typmodin(ta, "char"));
+}
+
+Datum
+bpchartypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+
+ PG_RETURN_CSTRING(anychar_typmodout(typmod));
+}
+
+
+/*****************************************************************************
+ * varchar - varchar(n)
+ *
+ * Note: varchar piggybacks on type text for most operations, and so has no
+ * C-coded functions except for I/O and typmod checking.
+ *****************************************************************************/
+
+/*
+ * varchar_input -- common guts of varcharin and varcharrecv
+ *
+ * s is the input text of length len (may not be null-terminated)
+ * atttypmod is the typmod value to apply
+ *
+ * Note that atttypmod is measured in characters, which
+ * is not necessarily the same as the number of bytes.
+ *
+ * If the input string is too long, raise an error, unless the extra
+ * characters are spaces, in which case they're truncated. (per SQL)
+ *
+ * Uses the C string to text conversion function, which is only appropriate
+ * if VarChar and text are equivalent types.
+ */
+static VarChar *
+varchar_input(const char *s, size_t len, int32 atttypmod)
+{
+ VarChar *result;
+ size_t maxlen;
+
+ maxlen = atttypmod - VARHDRSZ;
+
+ if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
+ {
+ /* Verify that extra characters are spaces, and clip them off */
+ size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
+ size_t j;
+
+ for (j = mbmaxlen; j < len; j++)
+ {
+ if (s[j] != ' ')
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
+ errmsg("value too long for type character varying(%d)",
+ (int) maxlen)));
+ }
+
+ len = mbmaxlen;
+ }
+
+ result = (VarChar *) cstring_to_text_with_len(s, len);
+ return result;
+}
+
+/*
+ * Convert a C string to VARCHAR internal representation. atttypmod
+ * is the declared length of the type plus VARHDRSZ.
+ */
+Datum
+varcharin(PG_FUNCTION_ARGS)
+{
+ char *s = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ VarChar *result;
+
+ result = varchar_input(s, strlen(s), atttypmod);
+ PG_RETURN_VARCHAR_P(result);
+}
+
+
+/*
+ * Convert a VARCHAR value to a C string.
+ *
+ * Uses the text to C string conversion function, which is only appropriate
+ * if VarChar and text are equivalent types.
+ */
+Datum
+varcharout(PG_FUNCTION_ARGS)
+{
+ Datum txt = PG_GETARG_DATUM(0);
+
+ PG_RETURN_CSTRING(TextDatumGetCString(txt));
+}
+
+/*
+ * varcharrecv - converts external binary format to varchar
+ */
+Datum
+varcharrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ VarChar *result;
+ char *str;
+ int nbytes;
+
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+ result = varchar_input(str, nbytes, atttypmod);
+ pfree(str);
+ PG_RETURN_VARCHAR_P(result);
+}
+
+/*
+ * varcharsend - converts varchar to binary format
+ */
+Datum
+varcharsend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as textsend, so share code */
+ return textsend(fcinfo);
+}
+
+
+/*
+ * varchar_support()
+ *
+ * Planner support function for the varchar() length coercion function.
+ *
+ * Currently, the only interesting thing we can do is flatten calls that set
+ * the new maximum length >= the previous maximum length. We can ignore the
+ * isExplicit argument, since that only affects truncation cases.
+ */
+Datum
+varchar_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestSimplify))
+ {
+ SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
+ FuncExpr *expr = req->fcall;
+ Node *typmod;
+
+ Assert(list_length(expr->args) >= 2);
+
+ typmod = (Node *) lsecond(expr->args);
+
+ if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
+ {
+ Node *source = (Node *) linitial(expr->args);
+ int32 old_typmod = exprTypmod(source);
+ int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
+ int32 old_max = old_typmod - VARHDRSZ;
+ int32 new_max = new_typmod - VARHDRSZ;
+
+ if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
+ ret = relabel_to_typmod(source, new_typmod);
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+/*
+ * Converts a VARCHAR type to the specified size.
+ *
+ * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
+ * isExplicit is true if this is for an explicit cast to varchar(N).
+ *
+ * Truncation rules: for an explicit cast, silently truncate to the given
+ * length; for an implicit cast, raise error unless extra characters are
+ * all spaces. (This is sort-of per SQL: the spec would actually have us
+ * raise a "completion condition" for the explicit cast case, but Postgres
+ * hasn't got such a concept.)
+ */
+Datum
+varchar(PG_FUNCTION_ARGS)
+{
+ VarChar *source = PG_GETARG_VARCHAR_PP(0);
+ int32 typmod = PG_GETARG_INT32(1);
+ bool isExplicit = PG_GETARG_BOOL(2);
+ int32 len,
+ maxlen;
+ size_t maxmblen;
+ int i;
+ char *s_data;
+
+ len = VARSIZE_ANY_EXHDR(source);
+ s_data = VARDATA_ANY(source);
+ maxlen = typmod - VARHDRSZ;
+
+ /* No work if typmod is invalid or supplied data fits it already */
+ if (maxlen < 0 || len <= maxlen)
+ PG_RETURN_VARCHAR_P(source);
+
+ /* only reach here if string is too long... */
+
+ /* truncate multibyte string preserving multibyte boundary */
+ maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
+
+ if (!isExplicit)
+ {
+ for (i = maxmblen; i < len; i++)
+ if (s_data[i] != ' ')
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
+ errmsg("value too long for type character varying(%d)",
+ maxlen)));
+ }
+
+ PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
+ maxmblen));
+}
+
+Datum
+varchartypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
+}
+
+Datum
+varchartypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+
+ PG_RETURN_CSTRING(anychar_typmodout(typmod));
+}
+
+
+/*****************************************************************************
+ * Exported functions
+ *****************************************************************************/
+
+/* "True" length (not counting trailing blanks) of a BpChar */
+static inline int
+bcTruelen(BpChar *arg)
+{
+ return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
+}
+
+int
+bpchartruelen(char *s, int len)
+{
+ int i;
+
+ /*
+ * Note that we rely on the assumption that ' ' is a singleton unit on
+ * every supported multibyte server encoding.
+ */
+ for (i = len - 1; i >= 0; i--)
+ {
+ if (s[i] != ' ')
+ break;
+ }
+ return i + 1;
+}
+
+Datum
+bpcharlen(PG_FUNCTION_ARGS)
+{
+ BpChar *arg = PG_GETARG_BPCHAR_PP(0);
+ int len;
+
+ /* get number of bytes, ignoring trailing spaces */
+ len = bcTruelen(arg);
+
+ /* in multibyte encoding, convert to number of characters */
+ if (pg_database_encoding_max_length() != 1)
+ len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
+
+ PG_RETURN_INT32(len);
+}
+
+Datum
+bpcharoctetlen(PG_FUNCTION_ARGS)
+{
+ Datum arg = PG_GETARG_DATUM(0);
+
+ /* We need not detoast the input at all */
+ PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
+}
+
+
+/*****************************************************************************
+ * Comparison Functions used for bpchar
+ *
+ * Note: btree indexes need these routines not to leak memory; therefore,
+ * be careful to free working copies of toasted datums. Most places don't
+ * need to be so careful.
+ *****************************************************************************/
+
+static void
+check_collation_set(Oid collid)
+{
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a conflict
+ * of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string comparison"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+}
+
+Datum
+bpchareq(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ bool result;
+ Oid collid = PG_GET_COLLATION();
+
+ check_collation_set(collid);
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ if (lc_collate_is_c(collid) ||
+ collid == DEFAULT_COLLATION_OID ||
+ pg_newlocale_from_collation(collid)->deterministic)
+ {
+ /*
+ * Since we only care about equality or not-equality, we can avoid all
+ * the expense of strcoll() here, and just do bitwise comparison.
+ */
+ if (len1 != len2)
+ result = false;
+ else
+ result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
+ }
+ else
+ {
+ result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ collid) == 0);
+ }
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bpcharne(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ bool result;
+ Oid collid = PG_GET_COLLATION();
+
+ check_collation_set(collid);
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ if (lc_collate_is_c(collid) ||
+ collid == DEFAULT_COLLATION_OID ||
+ pg_newlocale_from_collation(collid)->deterministic)
+ {
+ /*
+ * Since we only care about equality or not-equality, we can avoid all
+ * the expense of strcoll() here, and just do bitwise comparison.
+ */
+ if (len1 != len2)
+ result = true;
+ else
+ result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
+ }
+ else
+ {
+ result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ collid) != 0);
+ }
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bpcharlt(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(cmp < 0);
+}
+
+Datum
+bpcharle(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(cmp <= 0);
+}
+
+Datum
+bpchargt(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(cmp > 0);
+}
+
+Datum
+bpcharge(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(cmp >= 0);
+}
+
+Datum
+bpcharcmp(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(cmp);
+}
+
+Datum
+bpchar_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ Oid collid = ssup->ssup_collation;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport */
+ varstr_sortsupport(ssup, BPCHAROID, collid);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
+Datum
+bpchar_larger(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
+}
+
+Datum
+bpchar_smaller(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
+}
+
+
+/*
+ * bpchar needs a specialized hash function because we want to ignore
+ * trailing blanks in comparisons.
+ */
+Datum
+hashbpchar(PG_FUNCTION_ARGS)
+{
+ BpChar *key = PG_GETARG_BPCHAR_PP(0);
+ Oid collid = PG_GET_COLLATION();
+ char *keydata;
+ int keylen;
+ pg_locale_t mylocale = 0;
+ Datum result;
+
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ keydata = VARDATA_ANY(key);
+ keylen = bcTruelen(key);
+
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any((unsigned char *) keydata, keylen);
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, keydata, keylen);
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+
+ result = hash_any(buf, bsize);
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
+
+ /* Avoid leaking memory for toasted inputs */
+ PG_FREE_IF_COPY(key, 0);
+
+ return result;
+}
+
+Datum
+hashbpcharextended(PG_FUNCTION_ARGS)
+{
+ BpChar *key = PG_GETARG_BPCHAR_PP(0);
+ Oid collid = PG_GET_COLLATION();
+ char *keydata;
+ int keylen;
+ pg_locale_t mylocale = 0;
+ Datum result;
+
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ keydata = VARDATA_ANY(key);
+ keylen = bcTruelen(key);
+
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any_extended((unsigned char *) keydata, keylen,
+ PG_GETARG_INT64(1));
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+
+ result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
+
+ PG_FREE_IF_COPY(key, 0);
+
+ return result;
+}
+
+/*
+ * The following operators support character-by-character comparison
+ * of bpchar datums, to allow building indexes suitable for LIKE clauses.
+ * Note that the regular bpchareq/bpcharne comparison operators, and
+ * regular support functions 1 and 2 with "C" collation are assumed to be
+ * compatible with these!
+ */
+
+static int
+internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
+{
+ int result;
+ int len1,
+ len2;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ if (result != 0)
+ return result;
+ else if (len1 < len2)
+ return -1;
+ else if (len1 > len2)
+ return 1;
+ else
+ return 0;
+}
+
+
+Datum
+bpchar_pattern_lt(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int result;
+
+ result = internal_bpchar_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result < 0);
+}
+
+
+Datum
+bpchar_pattern_le(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int result;
+
+ result = internal_bpchar_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result <= 0);
+}
+
+
+Datum
+bpchar_pattern_ge(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int result;
+
+ result = internal_bpchar_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result >= 0);
+}
+
+
+Datum
+bpchar_pattern_gt(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int result;
+
+ result = internal_bpchar_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result > 0);
+}
+
+
+Datum
+btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int result;
+
+ result = internal_bpchar_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+
+Datum
+btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport, forcing "C" collation */
+ varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}