summaryrefslogtreecommitdiffstats
path: root/src/backend/access/hash/hashfunc.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/access/hash/hashfunc.c
parentInitial commit. (diff)
downloadpostgresql-14-upstream.tar.xz
postgresql-14-upstream.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/access/hash/hashfunc.c')
-rw-r--r--src/backend/access/hash/hashfunc.c411
1 files changed, 411 insertions, 0 deletions
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
new file mode 100644
index 0000000..2423339
--- /dev/null
+++ b/src/backend/access/hash/hashfunc.c
@@ -0,0 +1,411 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashfunc.c
+ * Support functions for hash access method.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/access/hash/hashfunc.c
+ *
+ * NOTES
+ * These functions are stored in pg_amproc. For each operator class
+ * defined for hash indexes, they compute the hash value of the argument.
+ *
+ * Additional hash functions appear in /utils/adt/ files for various
+ * specialized datatypes.
+ *
+ * It is expected that every bit of a hash function's 32-bit result is
+ * as random as every other; failure to ensure this is likely to lead
+ * to poor performance of hash joins, for example. In most cases a hash
+ * function should use hash_any() or its variant hash_uint32().
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "catalog/pg_collation.h"
+#include "common/hashfn.h"
+#include "utils/builtins.h"
+#include "utils/float.h"
+#include "utils/pg_locale.h"
+
+/*
+ * Datatype-specific hash functions.
+ *
+ * These support both hash indexes and hash joins.
+ *
+ * NOTE: some of these are also used by catcache operations, without
+ * any direct connection to hash indexes. Also, the common hash_any
+ * routine is also used by dynahash tables.
+ */
+
+/* Note: this is used for both "char" and boolean datatypes */
+Datum
+hashchar(PG_FUNCTION_ARGS)
+{
+ return hash_uint32((int32) PG_GETARG_CHAR(0));
+}
+
+Datum
+hashcharextended(PG_FUNCTION_ARGS)
+{
+ return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
+}
+
+Datum
+hashint2(PG_FUNCTION_ARGS)
+{
+ return hash_uint32((int32) PG_GETARG_INT16(0));
+}
+
+Datum
+hashint2extended(PG_FUNCTION_ARGS)
+{
+ return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
+}
+
+Datum
+hashint4(PG_FUNCTION_ARGS)
+{
+ return hash_uint32(PG_GETARG_INT32(0));
+}
+
+Datum
+hashint4extended(PG_FUNCTION_ARGS)
+{
+ return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
+}
+
+Datum
+hashint8(PG_FUNCTION_ARGS)
+{
+ /*
+ * The idea here is to produce a hash value compatible with the values
+ * produced by hashint4 and hashint2 for logically equal inputs; this is
+ * necessary to support cross-type hash joins across these input types.
+ * Since all three types are signed, we can xor the high half of the int8
+ * value if the sign is positive, or the complement of the high half when
+ * the sign is negative.
+ */
+ int64 val = PG_GETARG_INT64(0);
+ uint32 lohalf = (uint32) val;
+ uint32 hihalf = (uint32) (val >> 32);
+
+ lohalf ^= (val >= 0) ? hihalf : ~hihalf;
+
+ return hash_uint32(lohalf);
+}
+
+Datum
+hashint8extended(PG_FUNCTION_ARGS)
+{
+ /* Same approach as hashint8 */
+ int64 val = PG_GETARG_INT64(0);
+ uint32 lohalf = (uint32) val;
+ uint32 hihalf = (uint32) (val >> 32);
+
+ lohalf ^= (val >= 0) ? hihalf : ~hihalf;
+
+ return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
+}
+
+Datum
+hashoid(PG_FUNCTION_ARGS)
+{
+ return hash_uint32((uint32) PG_GETARG_OID(0));
+}
+
+Datum
+hashoidextended(PG_FUNCTION_ARGS)
+{
+ return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
+}
+
+Datum
+hashenum(PG_FUNCTION_ARGS)
+{
+ return hash_uint32((uint32) PG_GETARG_OID(0));
+}
+
+Datum
+hashenumextended(PG_FUNCTION_ARGS)
+{
+ return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
+}
+
+Datum
+hashfloat4(PG_FUNCTION_ARGS)
+{
+ float4 key = PG_GETARG_FLOAT4(0);
+ float8 key8;
+
+ /*
+ * On IEEE-float machines, minus zero and zero have different bit patterns
+ * but should compare as equal. We must ensure that they have the same
+ * hash value, which is most reliably done this way:
+ */
+ if (key == (float4) 0)
+ PG_RETURN_UINT32(0);
+
+ /*
+ * To support cross-type hashing of float8 and float4, we want to return
+ * the same hash value hashfloat8 would produce for an equal float8 value.
+ * So, widen the value to float8 and hash that. (We must do this rather
+ * than have hashfloat8 try to narrow its value to float4; that could fail
+ * on overflow.)
+ */
+ key8 = key;
+
+ /*
+ * Similarly, NaNs can have different bit patterns but they should all
+ * compare as equal. For backwards-compatibility reasons we force them to
+ * have the hash value of a standard float8 NaN. (You'd think we could
+ * replace key with a float4 NaN and then widen it; but on some old
+ * platforms, that way produces a different bit pattern.)
+ */
+ if (isnan(key8))
+ key8 = get_float8_nan();
+
+ return hash_any((unsigned char *) &key8, sizeof(key8));
+}
+
+Datum
+hashfloat4extended(PG_FUNCTION_ARGS)
+{
+ float4 key = PG_GETARG_FLOAT4(0);
+ uint64 seed = PG_GETARG_INT64(1);
+ float8 key8;
+
+ /* Same approach as hashfloat4 */
+ if (key == (float4) 0)
+ PG_RETURN_UINT64(seed);
+ key8 = key;
+ if (isnan(key8))
+ key8 = get_float8_nan();
+
+ return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
+}
+
+Datum
+hashfloat8(PG_FUNCTION_ARGS)
+{
+ float8 key = PG_GETARG_FLOAT8(0);
+
+ /*
+ * On IEEE-float machines, minus zero and zero have different bit patterns
+ * but should compare as equal. We must ensure that they have the same
+ * hash value, which is most reliably done this way:
+ */
+ if (key == (float8) 0)
+ PG_RETURN_UINT32(0);
+
+ /*
+ * Similarly, NaNs can have different bit patterns but they should all
+ * compare as equal. For backwards-compatibility reasons we force them to
+ * have the hash value of a standard NaN.
+ */
+ if (isnan(key))
+ key = get_float8_nan();
+
+ return hash_any((unsigned char *) &key, sizeof(key));
+}
+
+Datum
+hashfloat8extended(PG_FUNCTION_ARGS)
+{
+ float8 key = PG_GETARG_FLOAT8(0);
+ uint64 seed = PG_GETARG_INT64(1);
+
+ /* Same approach as hashfloat8 */
+ if (key == (float8) 0)
+ PG_RETURN_UINT64(seed);
+ if (isnan(key))
+ key = get_float8_nan();
+
+ return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
+}
+
+Datum
+hashoidvector(PG_FUNCTION_ARGS)
+{
+ oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
+
+ return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
+}
+
+Datum
+hashoidvectorextended(PG_FUNCTION_ARGS)
+{
+ oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
+
+ return hash_any_extended((unsigned char *) key->values,
+ key->dim1 * sizeof(Oid),
+ PG_GETARG_INT64(1));
+}
+
+Datum
+hashname(PG_FUNCTION_ARGS)
+{
+ char *key = NameStr(*PG_GETARG_NAME(0));
+
+ return hash_any((unsigned char *) key, strlen(key));
+}
+
+Datum
+hashnameextended(PG_FUNCTION_ARGS)
+{
+ char *key = NameStr(*PG_GETARG_NAME(0));
+
+ return hash_any_extended((unsigned char *) key, strlen(key),
+ PG_GETARG_INT64(1));
+}
+
+Datum
+hashtext(PG_FUNCTION_ARGS)
+{
+ text *key = PG_GETARG_TEXT_PP(0);
+ Oid collid = PG_GET_COLLATION();
+ pg_locale_t mylocale = 0;
+ Datum result;
+
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any((unsigned char *) VARDATA_ANY(key),
+ VARSIZE_ANY_EXHDR(key));
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+
+ result = hash_any(buf, bsize);
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
+
+ /* Avoid leaking memory for toasted inputs */
+ PG_FREE_IF_COPY(key, 0);
+
+ return result;
+}
+
+Datum
+hashtextextended(PG_FUNCTION_ARGS)
+{
+ text *key = PG_GETARG_TEXT_PP(0);
+ Oid collid = PG_GET_COLLATION();
+ pg_locale_t mylocale = 0;
+ Datum result;
+
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
+ VARSIZE_ANY_EXHDR(key),
+ PG_GETARG_INT64(1));
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+
+ result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
+
+ PG_FREE_IF_COPY(key, 0);
+
+ return result;
+}
+
+/*
+ * hashvarlena() can be used for any varlena datatype in which there are
+ * no non-significant bits, ie, distinct bitpatterns never compare as equal.
+ */
+Datum
+hashvarlena(PG_FUNCTION_ARGS)
+{
+ struct varlena *key = PG_GETARG_VARLENA_PP(0);
+ Datum result;
+
+ result = hash_any((unsigned char *) VARDATA_ANY(key),
+ VARSIZE_ANY_EXHDR(key));
+
+ /* Avoid leaking memory for toasted inputs */
+ PG_FREE_IF_COPY(key, 0);
+
+ return result;
+}
+
+Datum
+hashvarlenaextended(PG_FUNCTION_ARGS)
+{
+ struct varlena *key = PG_GETARG_VARLENA_PP(0);
+ Datum result;
+
+ result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
+ VARSIZE_ANY_EXHDR(key),
+ PG_GETARG_INT64(1));
+
+ PG_FREE_IF_COPY(key, 0);
+
+ return result;
+}