diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
commit | 46651ce6fe013220ed397add242004d764fc0153 (patch) | |
tree | 6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/access/hash/hashfunc.c | |
parent | Initial commit. (diff) | |
download | postgresql-14-upstream.tar.xz postgresql-14-upstream.zip |
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/access/hash/hashfunc.c')
-rw-r--r-- | src/backend/access/hash/hashfunc.c | 411 |
1 files changed, 411 insertions, 0 deletions
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c new file mode 100644 index 0000000..2423339 --- /dev/null +++ b/src/backend/access/hash/hashfunc.c @@ -0,0 +1,411 @@ +/*------------------------------------------------------------------------- + * + * hashfunc.c + * Support functions for hash access method. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/hash/hashfunc.c + * + * NOTES + * These functions are stored in pg_amproc. For each operator class + * defined for hash indexes, they compute the hash value of the argument. + * + * Additional hash functions appear in /utils/adt/ files for various + * specialized datatypes. + * + * It is expected that every bit of a hash function's 32-bit result is + * as random as every other; failure to ensure this is likely to lead + * to poor performance of hash joins, for example. In most cases a hash + * function should use hash_any() or its variant hash_uint32(). + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/hash.h" +#include "catalog/pg_collation.h" +#include "common/hashfn.h" +#include "utils/builtins.h" +#include "utils/float.h" +#include "utils/pg_locale.h" + +/* + * Datatype-specific hash functions. + * + * These support both hash indexes and hash joins. + * + * NOTE: some of these are also used by catcache operations, without + * any direct connection to hash indexes. Also, the common hash_any + * routine is also used by dynahash tables. + */ + +/* Note: this is used for both "char" and boolean datatypes */ +Datum +hashchar(PG_FUNCTION_ARGS) +{ + return hash_uint32((int32) PG_GETARG_CHAR(0)); +} + +Datum +hashcharextended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1)); +} + +Datum +hashint2(PG_FUNCTION_ARGS) +{ + return hash_uint32((int32) PG_GETARG_INT16(0)); +} + +Datum +hashint2extended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1)); +} + +Datum +hashint4(PG_FUNCTION_ARGS) +{ + return hash_uint32(PG_GETARG_INT32(0)); +} + +Datum +hashint4extended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1)); +} + +Datum +hashint8(PG_FUNCTION_ARGS) +{ + /* + * The idea here is to produce a hash value compatible with the values + * produced by hashint4 and hashint2 for logically equal inputs; this is + * necessary to support cross-type hash joins across these input types. + * Since all three types are signed, we can xor the high half of the int8 + * value if the sign is positive, or the complement of the high half when + * the sign is negative. + */ + int64 val = PG_GETARG_INT64(0); + uint32 lohalf = (uint32) val; + uint32 hihalf = (uint32) (val >> 32); + + lohalf ^= (val >= 0) ? hihalf : ~hihalf; + + return hash_uint32(lohalf); +} + +Datum +hashint8extended(PG_FUNCTION_ARGS) +{ + /* Same approach as hashint8 */ + int64 val = PG_GETARG_INT64(0); + uint32 lohalf = (uint32) val; + uint32 hihalf = (uint32) (val >> 32); + + lohalf ^= (val >= 0) ? hihalf : ~hihalf; + + return hash_uint32_extended(lohalf, PG_GETARG_INT64(1)); +} + +Datum +hashoid(PG_FUNCTION_ARGS) +{ + return hash_uint32((uint32) PG_GETARG_OID(0)); +} + +Datum +hashoidextended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1)); +} + +Datum +hashenum(PG_FUNCTION_ARGS) +{ + return hash_uint32((uint32) PG_GETARG_OID(0)); +} + +Datum +hashenumextended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1)); +} + +Datum +hashfloat4(PG_FUNCTION_ARGS) +{ + float4 key = PG_GETARG_FLOAT4(0); + float8 key8; + + /* + * On IEEE-float machines, minus zero and zero have different bit patterns + * but should compare as equal. We must ensure that they have the same + * hash value, which is most reliably done this way: + */ + if (key == (float4) 0) + PG_RETURN_UINT32(0); + + /* + * To support cross-type hashing of float8 and float4, we want to return + * the same hash value hashfloat8 would produce for an equal float8 value. + * So, widen the value to float8 and hash that. (We must do this rather + * than have hashfloat8 try to narrow its value to float4; that could fail + * on overflow.) + */ + key8 = key; + + /* + * Similarly, NaNs can have different bit patterns but they should all + * compare as equal. For backwards-compatibility reasons we force them to + * have the hash value of a standard float8 NaN. (You'd think we could + * replace key with a float4 NaN and then widen it; but on some old + * platforms, that way produces a different bit pattern.) + */ + if (isnan(key8)) + key8 = get_float8_nan(); + + return hash_any((unsigned char *) &key8, sizeof(key8)); +} + +Datum +hashfloat4extended(PG_FUNCTION_ARGS) +{ + float4 key = PG_GETARG_FLOAT4(0); + uint64 seed = PG_GETARG_INT64(1); + float8 key8; + + /* Same approach as hashfloat4 */ + if (key == (float4) 0) + PG_RETURN_UINT64(seed); + key8 = key; + if (isnan(key8)) + key8 = get_float8_nan(); + + return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed); +} + +Datum +hashfloat8(PG_FUNCTION_ARGS) +{ + float8 key = PG_GETARG_FLOAT8(0); + + /* + * On IEEE-float machines, minus zero and zero have different bit patterns + * but should compare as equal. We must ensure that they have the same + * hash value, which is most reliably done this way: + */ + if (key == (float8) 0) + PG_RETURN_UINT32(0); + + /* + * Similarly, NaNs can have different bit patterns but they should all + * compare as equal. For backwards-compatibility reasons we force them to + * have the hash value of a standard NaN. + */ + if (isnan(key)) + key = get_float8_nan(); + + return hash_any((unsigned char *) &key, sizeof(key)); +} + +Datum +hashfloat8extended(PG_FUNCTION_ARGS) +{ + float8 key = PG_GETARG_FLOAT8(0); + uint64 seed = PG_GETARG_INT64(1); + + /* Same approach as hashfloat8 */ + if (key == (float8) 0) + PG_RETURN_UINT64(seed); + if (isnan(key)) + key = get_float8_nan(); + + return hash_any_extended((unsigned char *) &key, sizeof(key), seed); +} + +Datum +hashoidvector(PG_FUNCTION_ARGS) +{ + oidvector *key = (oidvector *) PG_GETARG_POINTER(0); + + return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid)); +} + +Datum +hashoidvectorextended(PG_FUNCTION_ARGS) +{ + oidvector *key = (oidvector *) PG_GETARG_POINTER(0); + + return hash_any_extended((unsigned char *) key->values, + key->dim1 * sizeof(Oid), + PG_GETARG_INT64(1)); +} + +Datum +hashname(PG_FUNCTION_ARGS) +{ + char *key = NameStr(*PG_GETARG_NAME(0)); + + return hash_any((unsigned char *) key, strlen(key)); +} + +Datum +hashnameextended(PG_FUNCTION_ARGS) +{ + char *key = NameStr(*PG_GETARG_NAME(0)); + + return hash_any_extended((unsigned char *) key, strlen(key), + PG_GETARG_INT64(1)); +} + +Datum +hashtext(PG_FUNCTION_ARGS) +{ + text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); + pg_locale_t mylocale = 0; + Datum result; + + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any((unsigned char *) VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key)); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any(buf, bsize); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } + + /* Avoid leaking memory for toasted inputs */ + PG_FREE_IF_COPY(key, 0); + + return result; +} + +Datum +hashtextextended(PG_FUNCTION_ARGS) +{ + text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); + pg_locale_t mylocale = 0; + Datum result; + + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any_extended((unsigned char *) VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key), + PG_GETARG_INT64(1)); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } + + PG_FREE_IF_COPY(key, 0); + + return result; +} + +/* + * hashvarlena() can be used for any varlena datatype in which there are + * no non-significant bits, ie, distinct bitpatterns never compare as equal. + */ +Datum +hashvarlena(PG_FUNCTION_ARGS) +{ + struct varlena *key = PG_GETARG_VARLENA_PP(0); + Datum result; + + result = hash_any((unsigned char *) VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key)); + + /* Avoid leaking memory for toasted inputs */ + PG_FREE_IF_COPY(key, 0); + + return result; +} + +Datum +hashvarlenaextended(PG_FUNCTION_ARGS) +{ + struct varlena *key = PG_GETARG_VARLENA_PP(0); + Datum result; + + result = hash_any_extended((unsigned char *) VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key), + PG_GETARG_INT64(1)); + + PG_FREE_IF_COPY(key, 0); + + return result; +} |