summaryrefslogtreecommitdiffstats
path: root/contrib/ltree/_ltree_gist.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:19:15 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:19:15 +0000
commit6eb9c5a5657d1fe77b55cc261450f3538d35a94d (patch)
tree657d8194422a5daccecfd42d654b8a245ef7b4c8 /contrib/ltree/_ltree_gist.c
parentInitial commit. (diff)
downloadpostgresql-13-6eb9c5a5657d1fe77b55cc261450f3538d35a94d.tar.xz
postgresql-13-6eb9c5a5657d1fe77b55cc261450f3538d35a94d.zip
Adding upstream version 13.4.upstream/13.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'contrib/ltree/_ltree_gist.c')
-rw-r--r--contrib/ltree/_ltree_gist.c555
1 files changed, 555 insertions, 0 deletions
diff --git a/contrib/ltree/_ltree_gist.c b/contrib/ltree/_ltree_gist.c
new file mode 100644
index 0000000..72516c3
--- /dev/null
+++ b/contrib/ltree/_ltree_gist.c
@@ -0,0 +1,555 @@
+/*
+ * contrib/ltree/_ltree_gist.c
+ *
+ *
+ * GiST support for ltree[]
+ * Teodor Sigaev <teodor@stack.net>
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/reloptions.h"
+#include "access/stratnum.h"
+#include "crc32.h"
+#include "ltree.h"
+#include "port/pg_bitutils.h"
+
+PG_FUNCTION_INFO_V1(_ltree_compress);
+PG_FUNCTION_INFO_V1(_ltree_same);
+PG_FUNCTION_INFO_V1(_ltree_union);
+PG_FUNCTION_INFO_V1(_ltree_penalty);
+PG_FUNCTION_INFO_V1(_ltree_picksplit);
+PG_FUNCTION_INFO_V1(_ltree_consistent);
+PG_FUNCTION_INFO_V1(_ltree_gist_options);
+
+#define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
+#define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+
+
+static void
+hashing(BITVECP sign, ltree *t, int siglen)
+{
+ int tlen = t->numlevel;
+ ltree_level *cur = LTREE_FIRST(t);
+ int hash;
+
+ while (tlen > 0)
+ {
+ hash = ltree_crc32_sz(cur->name, cur->len);
+ AHASH(sign, hash, siglen);
+ cur = LEVEL_NEXT(cur);
+ tlen--;
+ }
+}
+
+Datum
+_ltree_compress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ GISTENTRY *retval = entry;
+ int siglen = LTREE_GET_ASIGLEN();
+
+ if (entry->leafkey)
+ { /* ltree */
+ ltree_gist *key;
+ ArrayType *val = DatumGetArrayTypeP(entry->key);
+ int num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
+ ltree *item = (ltree *) ARR_DATA_PTR(val);
+
+ if (ARR_NDIM(val) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array must be one-dimensional")));
+ if (array_contains_nulls(val))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("array must not contain nulls")));
+
+ key = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
+
+ while (num > 0)
+ {
+ hashing(LTG_SIGN(key), item, siglen);
+ num--;
+ item = NEXTVAL(item);
+ }
+
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ gistentryinit(*retval, PointerGetDatum(key),
+ entry->rel, entry->page,
+ entry->offset, false);
+ }
+ else if (!LTG_ISALLTRUE(entry->key))
+ {
+ int32 i;
+ ltree_gist *key;
+ BITVECP sign = LTG_SIGN(DatumGetPointer(entry->key));
+
+ ALOOPBYTE(siglen)
+ {
+ if ((sign[i] & 0xff) != 0xff)
+ PG_RETURN_POINTER(retval);
+ }
+
+ key = ltree_gist_alloc(true, sign, siglen, NULL, NULL);
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ gistentryinit(*retval, PointerGetDatum(key),
+ entry->rel, entry->page,
+ entry->offset, false);
+ }
+ PG_RETURN_POINTER(retval);
+}
+
+Datum
+_ltree_same(PG_FUNCTION_ARGS)
+{
+ ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
+ ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
+ bool *result = (bool *) PG_GETARG_POINTER(2);
+ int siglen = LTREE_GET_ASIGLEN();
+
+ if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
+ *result = true;
+ else if (LTG_ISALLTRUE(a))
+ *result = false;
+ else if (LTG_ISALLTRUE(b))
+ *result = false;
+ else
+ {
+ int32 i;
+ BITVECP sa = LTG_SIGN(a),
+ sb = LTG_SIGN(b);
+
+ *result = true;
+ ALOOPBYTE(siglen)
+ {
+ if (sa[i] != sb[i])
+ {
+ *result = false;
+ break;
+ }
+ }
+ }
+ PG_RETURN_POINTER(result);
+}
+
+static int32
+unionkey(BITVECP sbase, ltree_gist *add, int siglen)
+{
+ int32 i;
+ BITVECP sadd = LTG_SIGN(add);
+
+ if (LTG_ISALLTRUE(add))
+ return 1;
+
+ ALOOPBYTE(siglen)
+ sbase[i] |= sadd[i];
+ return 0;
+}
+
+Datum
+_ltree_union(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ int *size = (int *) PG_GETARG_POINTER(1);
+ int siglen = LTREE_GET_ASIGLEN();
+ int32 i;
+ ltree_gist *result = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
+ BITVECP base = LTG_SIGN(result);
+
+ for (i = 0; i < entryvec->n; i++)
+ {
+ if (unionkey(base, GETENTRY(entryvec, i), siglen))
+ {
+ result->flag |= LTG_ALLTRUE;
+ SET_VARSIZE(result, LTG_HDRSIZE);
+ break;
+ }
+ }
+
+ *size = VARSIZE(result);
+
+ PG_RETURN_POINTER(result);
+}
+
+static int32
+sizebitvec(BITVECP sign, int siglen)
+{
+ return pg_popcount((const char *) sign, siglen);
+}
+
+static int
+hemdistsign(BITVECP a, BITVECP b, int siglen)
+{
+ int i,
+ diff,
+ dist = 0;
+
+ ALOOPBYTE(siglen)
+ {
+ diff = (unsigned char) (a[i] ^ b[i]);
+ /* Using the popcount functions here isn't likely to win */
+ dist += pg_number_of_ones[diff];
+ }
+ return dist;
+}
+
+static int
+hemdist(ltree_gist *a, ltree_gist *b, int siglen)
+{
+ if (LTG_ISALLTRUE(a))
+ {
+ if (LTG_ISALLTRUE(b))
+ return 0;
+ else
+ return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(b), siglen);
+ }
+ else if (LTG_ISALLTRUE(b))
+ return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(a), siglen);
+
+ return hemdistsign(LTG_SIGN(a), LTG_SIGN(b), siglen);
+}
+
+
+Datum
+_ltree_penalty(PG_FUNCTION_ARGS)
+{
+ ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
+ ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
+ float *penalty = (float *) PG_GETARG_POINTER(2);
+ int siglen = LTREE_GET_ASIGLEN();
+
+ *penalty = hemdist(origval, newval, siglen);
+ PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+ OffsetNumber pos;
+ int32 cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+ return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
+}
+
+Datum
+_ltree_picksplit(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+ int siglen = LTREE_GET_ASIGLEN();
+ OffsetNumber k,
+ j;
+ ltree_gist *datum_l,
+ *datum_r;
+ BITVECP union_l,
+ union_r;
+ int32 size_alpha,
+ size_beta;
+ int32 size_waste,
+ waste = -1;
+ int32 nbytes;
+ OffsetNumber seed_1 = 0,
+ seed_2 = 0;
+ OffsetNumber *left,
+ *right;
+ OffsetNumber maxoff;
+ BITVECP ptr;
+ int i;
+ SPLITCOST *costvector;
+ ltree_gist *_k,
+ *_j;
+
+ maxoff = entryvec->n - 2;
+ nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+ v->spl_left = (OffsetNumber *) palloc(nbytes);
+ v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+ for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
+ {
+ _k = GETENTRY(entryvec, k);
+ for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
+ {
+ size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
+ if (size_waste > waste)
+ {
+ waste = size_waste;
+ seed_1 = k;
+ seed_2 = j;
+ }
+ }
+ }
+
+ left = v->spl_left;
+ v->spl_nleft = 0;
+ right = v->spl_right;
+ v->spl_nright = 0;
+
+ if (seed_1 == 0 || seed_2 == 0)
+ {
+ seed_1 = 1;
+ seed_2 = 2;
+ }
+
+ /* form initial .. */
+ datum_l = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)),
+ LTG_SIGN(GETENTRY(entryvec, seed_1)),
+ siglen, NULL, NULL);
+
+ datum_r = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)),
+ LTG_SIGN(GETENTRY(entryvec, seed_2)),
+ siglen, NULL, NULL);
+
+ maxoff = OffsetNumberNext(maxoff);
+ /* sort before ... */
+ costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+ for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
+ {
+ costvector[j - 1].pos = j;
+ _j = GETENTRY(entryvec, j);
+ size_alpha = hemdist(datum_l, _j, siglen);
+ size_beta = hemdist(datum_r, _j, siglen);
+ costvector[j - 1].cost = Abs(size_alpha - size_beta);
+ }
+ qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+ union_l = LTG_SIGN(datum_l);
+ union_r = LTG_SIGN(datum_r);
+
+ for (k = 0; k < maxoff; k++)
+ {
+ j = costvector[k].pos;
+ if (j == seed_1)
+ {
+ *left++ = j;
+ v->spl_nleft++;
+ continue;
+ }
+ else if (j == seed_2)
+ {
+ *right++ = j;
+ v->spl_nright++;
+ continue;
+ }
+ _j = GETENTRY(entryvec, j);
+ size_alpha = hemdist(datum_l, _j, siglen);
+ size_beta = hemdist(datum_r, _j, siglen);
+
+ if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
+ {
+ if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
+ {
+ if (!LTG_ISALLTRUE(datum_l))
+ MemSet((void *) union_l, 0xff, siglen);
+ }
+ else
+ {
+ ptr = LTG_SIGN(_j);
+ ALOOPBYTE(siglen)
+ union_l[i] |= ptr[i];
+ }
+ *left++ = j;
+ v->spl_nleft++;
+ }
+ else
+ {
+ if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
+ {
+ if (!LTG_ISALLTRUE(datum_r))
+ MemSet((void *) union_r, 0xff, siglen);
+ }
+ else
+ {
+ ptr = LTG_SIGN(_j);
+ ALOOPBYTE(siglen)
+ union_r[i] |= ptr[i];
+ }
+ *right++ = j;
+ v->spl_nright++;
+ }
+ }
+
+ *right = *left = FirstOffsetNumber;
+
+ v->spl_ldatum = PointerGetDatum(datum_l);
+ v->spl_rdatum = PointerGetDatum(datum_r);
+
+ PG_RETURN_POINTER(v);
+}
+
+static bool
+gist_te(ltree_gist *key, ltree *query, int siglen)
+{
+ ltree_level *curq = LTREE_FIRST(query);
+ BITVECP sign = LTG_SIGN(key);
+ int qlen = query->numlevel;
+ unsigned int hv;
+
+ if (LTG_ISALLTRUE(key))
+ return true;
+
+ while (qlen > 0)
+ {
+ hv = ltree_crc32_sz(curq->name, curq->len);
+ if (!GETBIT(sign, AHASHVAL(hv, siglen)))
+ return false;
+ curq = LEVEL_NEXT(curq);
+ qlen--;
+ }
+
+ return true;
+}
+
+typedef struct LtreeSignature
+{
+ BITVECP sign;
+ int siglen;
+} LtreeSignature;
+
+static bool
+checkcondition_bit(void *cxt, ITEM *val)
+{
+ LtreeSignature *sig = cxt;
+
+ return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(sig->sign, AHASHVAL(val->val, sig->siglen)) : true;
+}
+
+static bool
+gist_qtxt(ltree_gist *key, ltxtquery *query, int siglen)
+{
+ LtreeSignature sig;
+
+ if (LTG_ISALLTRUE(key))
+ return true;
+
+ sig.sign = LTG_SIGN(key);
+ sig.siglen = siglen;
+
+ return ltree_execute(GETQUERY(query),
+ &sig, false,
+ checkcondition_bit);
+}
+
+static bool
+gist_qe(ltree_gist *key, lquery *query, int siglen)
+{
+ lquery_level *curq = LQUERY_FIRST(query);
+ BITVECP sign = LTG_SIGN(key);
+ int qlen = query->numlevel;
+
+ if (LTG_ISALLTRUE(key))
+ return true;
+
+ while (qlen > 0)
+ {
+ if (curq->numvar && LQL_CANLOOKSIGN(curq))
+ {
+ bool isexist = false;
+ int vlen = curq->numvar;
+ lquery_variant *curv = LQL_FIRST(curq);
+
+ while (vlen > 0)
+ {
+ if (GETBIT(sign, AHASHVAL(curv->val, siglen)))
+ {
+ isexist = true;
+ break;
+ }
+ curv = LVAR_NEXT(curv);
+ vlen--;
+ }
+ if (!isexist)
+ return false;
+ }
+
+ curq = LQL_NEXT(curq);
+ qlen--;
+ }
+
+ return true;
+}
+
+static bool
+_arrq_cons(ltree_gist *key, ArrayType *_query, int siglen)
+{
+ lquery *query = (lquery *) ARR_DATA_PTR(_query);
+ int num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
+
+ if (ARR_NDIM(_query) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array must be one-dimensional")));
+ if (array_contains_nulls(_query))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("array must not contain nulls")));
+
+ while (num > 0)
+ {
+ if (gist_qe(key, query, siglen))
+ return true;
+ num--;
+ query = (lquery *) NEXTVAL(query);
+ }
+ return false;
+}
+
+Datum
+_ltree_consistent(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ void *query = (void *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+ StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
+
+ /* Oid subtype = PG_GETARG_OID(3); */
+ bool *recheck = (bool *) PG_GETARG_POINTER(4);
+ int siglen = LTREE_GET_ASIGLEN();
+ ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
+ bool res = false;
+
+ /* All cases served by this function are inexact */
+ *recheck = true;
+
+ switch (strategy)
+ {
+ case 10:
+ case 11:
+ res = gist_te(key, (ltree *) query, siglen);
+ break;
+ case 12:
+ case 13:
+ res = gist_qe(key, (lquery *) query, siglen);
+ break;
+ case 14:
+ case 15:
+ res = gist_qtxt(key, (ltxtquery *) query, siglen);
+ break;
+ case 16:
+ case 17:
+ res = _arrq_cons(key, (ArrayType *) query, siglen);
+ break;
+ default:
+ /* internal error */
+ elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
+ }
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+_ltree_gist_options(PG_FUNCTION_ARGS)
+{
+ local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
+
+ init_local_reloptions(relopts, sizeof(LtreeGistOptions));
+ add_local_int_reloption(relopts, "siglen", "signature length",
+ LTREE_ASIGLEN_DEFAULT, 1, LTREE_ASIGLEN_MAX,
+ offsetof(LtreeGistOptions, siglen));
+
+ PG_RETURN_VOID();
+}