summaryrefslogtreecommitdiffstats
path: root/contrib/intarray/_intbig_gist.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:46:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:46:48 +0000
commit311bcfc6b3acdd6fd152798c7f287ddf74fa2a98 (patch)
tree0ec307299b1dada3701e42f4ca6eda57d708261e /contrib/intarray/_intbig_gist.c
parentInitial commit. (diff)
downloadpostgresql-15-311bcfc6b3acdd6fd152798c7f287ddf74fa2a98.tar.xz
postgresql-15-311bcfc6b3acdd6fd152798c7f287ddf74fa2a98.zip
Adding upstream version 15.4.upstream/15.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'contrib/intarray/_intbig_gist.c')
-rw-r--r--contrib/intarray/_intbig_gist.c592
1 files changed, 592 insertions, 0 deletions
diff --git a/contrib/intarray/_intbig_gist.c b/contrib/intarray/_intbig_gist.c
new file mode 100644
index 0000000..8273478
--- /dev/null
+++ b/contrib/intarray/_intbig_gist.c
@@ -0,0 +1,592 @@
+/*
+ * contrib/intarray/_intbig_gist.c
+ */
+#include "postgres.h"
+
+#include "_int.h"
+#include "access/gist.h"
+#include "access/reloptions.h"
+#include "access/stratnum.h"
+#include "port/pg_bitutils.h"
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
+/*
+** _intbig methods
+*/
+PG_FUNCTION_INFO_V1(g_intbig_consistent);
+PG_FUNCTION_INFO_V1(g_intbig_compress);
+PG_FUNCTION_INFO_V1(g_intbig_decompress);
+PG_FUNCTION_INFO_V1(g_intbig_penalty);
+PG_FUNCTION_INFO_V1(g_intbig_picksplit);
+PG_FUNCTION_INFO_V1(g_intbig_union);
+PG_FUNCTION_INFO_V1(g_intbig_same);
+PG_FUNCTION_INFO_V1(g_intbig_options);
+
+PG_FUNCTION_INFO_V1(_intbig_in);
+PG_FUNCTION_INFO_V1(_intbig_out);
+
+Datum
+_intbig_in(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("_intbig_in() not implemented")));
+ PG_RETURN_DATUM(0);
+}
+
+Datum
+_intbig_out(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("_intbig_out() not implemented")));
+ PG_RETURN_DATUM(0);
+}
+
+static GISTTYPE *
+_intbig_alloc(bool allistrue, int siglen, BITVECP sign)
+{
+ int flag = allistrue ? ALLISTRUE : 0;
+ int size = CALCGTSIZE(flag, siglen);
+ GISTTYPE *res = (GISTTYPE *) palloc(size);
+
+ SET_VARSIZE(res, size);
+ res->flag = flag;
+
+ if (!allistrue)
+ {
+ if (sign)
+ memcpy(GETSIGN(res), sign, siglen);
+ else
+ memset(GETSIGN(res), 0, siglen);
+ }
+
+ return res;
+}
+
+
+/*********************************************************************
+** intbig functions
+*********************************************************************/
+static bool
+_intbig_overlap(GISTTYPE *a, ArrayType *b, int siglen)
+{
+ int num = ARRNELEMS(b);
+ int32 *ptr = ARRPTR(b);
+
+ CHECKARRVALID(b);
+
+ while (num--)
+ {
+ if (GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
+ return true;
+ ptr++;
+ }
+
+ return false;
+}
+
+static bool
+_intbig_contains(GISTTYPE *a, ArrayType *b, int siglen)
+{
+ int num = ARRNELEMS(b);
+ int32 *ptr = ARRPTR(b);
+
+ CHECKARRVALID(b);
+
+ while (num--)
+ {
+ if (!GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
+ return false;
+ ptr++;
+ }
+
+ return true;
+}
+
+Datum
+g_intbig_same(PG_FUNCTION_ARGS)
+{
+ GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+ GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+ bool *result = (bool *) PG_GETARG_POINTER(2);
+ int siglen = GET_SIGLEN();
+
+ if (ISALLTRUE(a) && ISALLTRUE(b))
+ *result = true;
+ else if (ISALLTRUE(a))
+ *result = false;
+ else if (ISALLTRUE(b))
+ *result = false;
+ else
+ {
+ int32 i;
+ BITVECP sa = GETSIGN(a),
+ sb = GETSIGN(b);
+
+ *result = true;
+ LOOPBYTE(siglen)
+ {
+ if (sa[i] != sb[i])
+ {
+ *result = false;
+ break;
+ }
+ }
+ }
+ PG_RETURN_POINTER(result);
+}
+
+Datum
+g_intbig_compress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ int siglen = GET_SIGLEN();
+
+ if (entry->leafkey)
+ {
+ GISTENTRY *retval;
+ ArrayType *in = DatumGetArrayTypeP(entry->key);
+ int32 *ptr;
+ int num;
+ GISTTYPE *res = _intbig_alloc(false, siglen, NULL);
+
+ CHECKARRVALID(in);
+ if (ARRISEMPTY(in))
+ {
+ ptr = NULL;
+ num = 0;
+ }
+ else
+ {
+ ptr = ARRPTR(in);
+ num = ARRNELEMS(in);
+ }
+
+ while (num--)
+ {
+ HASH(GETSIGN(res), *ptr, siglen);
+ ptr++;
+ }
+
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ gistentryinit(*retval, PointerGetDatum(res),
+ entry->rel, entry->page,
+ entry->offset, false);
+
+ PG_RETURN_POINTER(retval);
+ }
+ else if (!ISALLTRUE(DatumGetPointer(entry->key)))
+ {
+ GISTENTRY *retval;
+ int i;
+ BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
+ GISTTYPE *res;
+
+ LOOPBYTE(siglen)
+ {
+ if ((sign[i] & 0xff) != 0xff)
+ PG_RETURN_POINTER(entry);
+ }
+
+ res = _intbig_alloc(true, siglen, sign);
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ gistentryinit(*retval, PointerGetDatum(res),
+ entry->rel, entry->page,
+ entry->offset, false);
+
+ PG_RETURN_POINTER(retval);
+ }
+
+ PG_RETURN_POINTER(entry);
+}
+
+
+static int32
+sizebitvec(BITVECP sign, int siglen)
+{
+ return pg_popcount(sign, siglen);
+}
+
+static int
+hemdistsign(BITVECP a, BITVECP b, int siglen)
+{
+ int i,
+ diff,
+ dist = 0;
+
+ LOOPBYTE(siglen)
+ {
+ diff = (unsigned char) (a[i] ^ b[i]);
+ /* Using the popcount functions here isn't likely to win */
+ dist += pg_number_of_ones[diff];
+ }
+ return dist;
+}
+
+static int
+hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
+{
+ if (ISALLTRUE(a))
+ {
+ if (ISALLTRUE(b))
+ return 0;
+ else
+ return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
+ }
+ else if (ISALLTRUE(b))
+ return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
+
+ return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
+}
+
+Datum
+g_intbig_decompress(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(PG_GETARG_DATUM(0));
+}
+
+static int32
+unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
+{
+ int32 i;
+ BITVECP sadd = GETSIGN(add);
+
+ if (ISALLTRUE(add))
+ return 1;
+ LOOPBYTE(siglen)
+ sbase[i] |= sadd[i];
+ return 0;
+}
+
+Datum
+g_intbig_union(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ int *size = (int *) PG_GETARG_POINTER(1);
+ int siglen = GET_SIGLEN();
+ int32 i;
+ GISTTYPE *result = _intbig_alloc(false, siglen, NULL);
+ BITVECP base = GETSIGN(result);
+
+ for (i = 0; i < entryvec->n; i++)
+ {
+ if (unionkey(base, GETENTRY(entryvec, i), siglen))
+ {
+ result->flag |= ALLISTRUE;
+ SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
+ break;
+ }
+ }
+
+ *size = VARSIZE(result);
+
+ PG_RETURN_POINTER(result);
+}
+
+Datum
+g_intbig_penalty(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+ GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+ float *penalty = (float *) PG_GETARG_POINTER(2);
+ GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+ GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+ int siglen = GET_SIGLEN();
+
+ *penalty = hemdist(origval, newval, siglen);
+ PG_RETURN_POINTER(penalty);
+}
+
+
+typedef struct
+{
+ OffsetNumber pos;
+ int32 cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+ return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
+}
+
+
+Datum
+g_intbig_picksplit(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+ int siglen = GET_SIGLEN();
+ OffsetNumber k,
+ j;
+ GISTTYPE *datum_l,
+ *datum_r;
+ BITVECP union_l,
+ union_r;
+ int32 size_alpha,
+ size_beta;
+ int32 size_waste,
+ waste = -1;
+ int32 nbytes;
+ OffsetNumber seed_1 = 0,
+ seed_2 = 0;
+ OffsetNumber *left,
+ *right;
+ OffsetNumber maxoff;
+ BITVECP ptr;
+ int i;
+ SPLITCOST *costvector;
+ GISTTYPE *_k,
+ *_j;
+
+ maxoff = entryvec->n - 2;
+ nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+ v->spl_left = (OffsetNumber *) palloc(nbytes);
+ v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+ for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
+ {
+ _k = GETENTRY(entryvec, k);
+ for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
+ {
+ size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
+ if (size_waste > waste)
+ {
+ waste = size_waste;
+ seed_1 = k;
+ seed_2 = j;
+ }
+ }
+ }
+
+ left = v->spl_left;
+ v->spl_nleft = 0;
+ right = v->spl_right;
+ v->spl_nright = 0;
+
+ if (seed_1 == 0 || seed_2 == 0)
+ {
+ seed_1 = 1;
+ seed_2 = 2;
+ }
+
+ /* form initial .. */
+ datum_l = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen,
+ GETSIGN(GETENTRY(entryvec, seed_1)));
+ datum_r = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen,
+ GETSIGN(GETENTRY(entryvec, seed_2)));
+
+ maxoff = OffsetNumberNext(maxoff);
+ /* sort before ... */
+ costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+ for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
+ {
+ costvector[j - 1].pos = j;
+ _j = GETENTRY(entryvec, j);
+ size_alpha = hemdist(datum_l, _j, siglen);
+ size_beta = hemdist(datum_r, _j, siglen);
+ costvector[j - 1].cost = Abs(size_alpha - size_beta);
+ }
+ qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+ union_l = GETSIGN(datum_l);
+ union_r = GETSIGN(datum_r);
+
+ for (k = 0; k < maxoff; k++)
+ {
+ j = costvector[k].pos;
+ if (j == seed_1)
+ {
+ *left++ = j;
+ v->spl_nleft++;
+ continue;
+ }
+ else if (j == seed_2)
+ {
+ *right++ = j;
+ v->spl_nright++;
+ continue;
+ }
+ _j = GETENTRY(entryvec, j);
+ size_alpha = hemdist(datum_l, _j, siglen);
+ size_beta = hemdist(datum_r, _j, siglen);
+
+ if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
+ {
+ if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
+ {
+ if (!ISALLTRUE(datum_l))
+ MemSet((void *) union_l, 0xff, siglen);
+ }
+ else
+ {
+ ptr = GETSIGN(_j);
+ LOOPBYTE(siglen)
+ union_l[i] |= ptr[i];
+ }
+ *left++ = j;
+ v->spl_nleft++;
+ }
+ else
+ {
+ if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
+ {
+ if (!ISALLTRUE(datum_r))
+ MemSet((void *) union_r, 0xff, siglen);
+ }
+ else
+ {
+ ptr = GETSIGN(_j);
+ LOOPBYTE(siglen)
+ union_r[i] |= ptr[i];
+ }
+ *right++ = j;
+ v->spl_nright++;
+ }
+ }
+
+ *right = *left = FirstOffsetNumber;
+ pfree(costvector);
+
+ v->spl_ldatum = PointerGetDatum(datum_l);
+ v->spl_rdatum = PointerGetDatum(datum_r);
+
+ PG_RETURN_POINTER(v);
+}
+
+Datum
+g_intbig_consistent(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
+ StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
+
+ /* Oid subtype = PG_GETARG_OID(3); */
+ bool *recheck = (bool *) PG_GETARG_POINTER(4);
+ int siglen = GET_SIGLEN();
+ bool retval;
+
+ /* All cases served by this function are inexact */
+ *recheck = true;
+
+ if (ISALLTRUE(DatumGetPointer(entry->key)))
+ PG_RETURN_BOOL(true);
+
+ if (strategy == BooleanSearchStrategy)
+ {
+ retval = signconsistent((QUERYTYPE *) query,
+ GETSIGN(DatumGetPointer(entry->key)),
+ siglen,
+ false);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_BOOL(retval);
+ }
+
+ CHECKARRVALID(query);
+
+ switch (strategy)
+ {
+ case RTOverlapStrategyNumber:
+ retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key),
+ query, siglen);
+ break;
+ case RTSameStrategyNumber:
+ if (GIST_LEAF(entry))
+ {
+ int i,
+ num = ARRNELEMS(query);
+ int32 *ptr = ARRPTR(query);
+ BITVECP dq = palloc0(siglen),
+ de;
+
+ while (num--)
+ {
+ HASH(dq, *ptr, siglen);
+ ptr++;
+ }
+
+ de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
+ retval = true;
+ LOOPBYTE(siglen)
+ {
+ if (de[i] != dq[i])
+ {
+ retval = false;
+ break;
+ }
+ }
+
+ pfree(dq);
+ }
+ else
+ retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
+ query, siglen);
+ break;
+ case RTContainsStrategyNumber:
+ case RTOldContainsStrategyNumber:
+ retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
+ query, siglen);
+ break;
+ case RTContainedByStrategyNumber:
+ case RTOldContainedByStrategyNumber:
+
+ /*
+ * This code is unreachable as of intarray 1.4, because the <@
+ * operator has been removed from the opclass. We keep it for now
+ * to support older versions of the SQL definitions.
+ */
+ if (GIST_LEAF(entry))
+ {
+ int i,
+ num = ARRNELEMS(query);
+ int32 *ptr = ARRPTR(query);
+ BITVECP dq = palloc0(siglen),
+ de;
+
+ while (num--)
+ {
+ HASH(dq, *ptr, siglen);
+ ptr++;
+ }
+
+ de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
+ retval = true;
+ LOOPBYTE(siglen)
+ {
+ if (de[i] & ~dq[i])
+ {
+ retval = false;
+ break;
+ }
+ }
+ }
+ else
+ {
+ /*
+ * Unfortunately, because empty arrays could be anywhere in
+ * the index, we must search the whole tree.
+ */
+ retval = true;
+ }
+ break;
+ default:
+ retval = false;
+ }
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_BOOL(retval);
+}
+
+Datum
+g_intbig_options(PG_FUNCTION_ARGS)
+{
+ local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
+
+ init_local_reloptions(relopts, sizeof(GISTIntArrayBigOptions));
+ add_local_int_reloption(relopts, "siglen",
+ "signature length in bytes",
+ SIGLEN_DEFAULT, 1, SIGLEN_MAX,
+ offsetof(GISTIntArrayBigOptions, siglen));
+
+ PG_RETURN_VOID();
+}