diff options
Diffstat (limited to 'src/backend/access/gin/ginbulk.c')
-rw-r--r-- | src/backend/access/gin/ginbulk.c | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c new file mode 100644 index 0000000..4c5067c --- /dev/null +++ b/src/backend/access/gin/ginbulk.c @@ -0,0 +1,293 @@ +/*------------------------------------------------------------------------- + * + * ginbulk.c + * routines for fast build of inverted index + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/access/gin/ginbulk.c + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <limits.h> + +#include "access/gin_private.h" +#include "utils/datum.h" +#include "utils/memutils.h" + + +#define DEF_NENTRY 2048 /* GinEntryAccumulator allocation quantum */ +#define DEF_NPTR 5 /* ItemPointer initial allocation quantum */ + + +/* Combiner function for rbtree.c */ +static void +ginCombineData(RBTNode *existing, const RBTNode *newdata, void *arg) +{ + GinEntryAccumulator *eo = (GinEntryAccumulator *) existing; + const GinEntryAccumulator *en = (const GinEntryAccumulator *) newdata; + BuildAccumulator *accum = (BuildAccumulator *) arg; + + /* + * Note this code assumes that newdata contains only one itempointer. + */ + if (eo->count >= eo->maxcount) + { + if (eo->maxcount > INT_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("posting list is too long"), + errhint("Reduce maintenance_work_mem."))); + + accum->allocatedMemory -= GetMemoryChunkSpace(eo->list); + eo->maxcount *= 2; + eo->list = (ItemPointerData *) + repalloc_huge(eo->list, sizeof(ItemPointerData) * eo->maxcount); + accum->allocatedMemory += GetMemoryChunkSpace(eo->list); + } + + /* If item pointers are not ordered, they will need to be sorted later */ + if (eo->shouldSort == false) + { + int res; + + res = ginCompareItemPointers(eo->list + eo->count - 1, en->list); + Assert(res != 0); + + if (res > 0) + eo->shouldSort = true; + } + + eo->list[eo->count] = en->list[0]; + eo->count++; +} + +/* Comparator function for rbtree.c */ +static int +cmpEntryAccumulator(const RBTNode *a, const RBTNode *b, void *arg) +{ + const GinEntryAccumulator *ea = (const GinEntryAccumulator *) a; + const GinEntryAccumulator *eb = (const GinEntryAccumulator *) b; + BuildAccumulator *accum = (BuildAccumulator *) arg; + + return ginCompareAttEntries(accum->ginstate, + ea->attnum, ea->key, ea->category, + eb->attnum, eb->key, eb->category); +} + +/* Allocator function for rbtree.c */ +static RBTNode * +ginAllocEntryAccumulator(void *arg) +{ + BuildAccumulator *accum = (BuildAccumulator *) arg; + GinEntryAccumulator *ea; + + /* + * Allocate memory by rather big chunks to decrease overhead. We have no + * need to reclaim RBTNodes individually, so this costs nothing. + */ + if (accum->entryallocator == NULL || accum->eas_used >= DEF_NENTRY) + { + accum->entryallocator = palloc(sizeof(GinEntryAccumulator) * DEF_NENTRY); + accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator); + accum->eas_used = 0; + } + + /* Allocate new RBTNode from current chunk */ + ea = accum->entryallocator + accum->eas_used; + accum->eas_used++; + + return (RBTNode *) ea; +} + +void +ginInitBA(BuildAccumulator *accum) +{ + /* accum->ginstate is intentionally not set here */ + accum->allocatedMemory = 0; + accum->entryallocator = NULL; + accum->eas_used = 0; + accum->tree = rbt_create(sizeof(GinEntryAccumulator), + cmpEntryAccumulator, + ginCombineData, + ginAllocEntryAccumulator, + NULL, /* no freefunc needed */ + (void *) accum); +} + +/* + * This is basically the same as datumCopy(), but extended to count + * palloc'd space in accum->allocatedMemory. + */ +static Datum +getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value) +{ + Form_pg_attribute att; + Datum res; + + att = TupleDescAttr(accum->ginstate->origTupdesc, attnum - 1); + if (att->attbyval) + res = value; + else + { + res = datumCopy(value, false, att->attlen); + accum->allocatedMemory += GetMemoryChunkSpace(DatumGetPointer(res)); + } + return res; +} + +/* + * Find/store one entry from indexed value. + */ +static void +ginInsertBAEntry(BuildAccumulator *accum, + ItemPointer heapptr, OffsetNumber attnum, + Datum key, GinNullCategory category) +{ + GinEntryAccumulator eatmp; + GinEntryAccumulator *ea; + bool isNew; + + /* + * For the moment, fill only the fields of eatmp that will be looked at by + * cmpEntryAccumulator or ginCombineData. + */ + eatmp.attnum = attnum; + eatmp.key = key; + eatmp.category = category; + /* temporarily set up single-entry itempointer list */ + eatmp.list = heapptr; + + ea = (GinEntryAccumulator *) rbt_insert(accum->tree, (RBTNode *) &eatmp, + &isNew); + + if (isNew) + { + /* + * Finish initializing new tree entry, including making permanent + * copies of the datum (if it's not null) and itempointer. + */ + if (category == GIN_CAT_NORM_KEY) + ea->key = getDatumCopy(accum, attnum, key); + ea->maxcount = DEF_NPTR; + ea->count = 1; + ea->shouldSort = false; + ea->list = + (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR); + ea->list[0] = *heapptr; + accum->allocatedMemory += GetMemoryChunkSpace(ea->list); + } + else + { + /* + * ginCombineData did everything needed. + */ + } +} + +/* + * Insert the entries for one heap pointer. + * + * Since the entries are being inserted into a balanced binary tree, you + * might think that the order of insertion wouldn't be critical, but it turns + * out that inserting the entries in sorted order results in a lot of + * rebalancing operations and is slow. To prevent this, we attempt to insert + * the nodes in an order that will produce a nearly-balanced tree if the input + * is in fact sorted. + * + * We do this as follows. First, we imagine that we have an array whose size + * is the smallest power of two greater than or equal to the actual array + * size. Second, we insert the middle entry of our virtual array into the + * tree; then, we insert the middles of each half of our virtual array, then + * middles of quarters, etc. + */ +void +ginInsertBAEntries(BuildAccumulator *accum, + ItemPointer heapptr, OffsetNumber attnum, + Datum *entries, GinNullCategory *categories, + int32 nentries) +{ + uint32 step = nentries; + + if (nentries <= 0) + return; + + Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber); + + /* + * step will contain largest power of 2 and <= nentries + */ + step |= (step >> 1); + step |= (step >> 2); + step |= (step >> 4); + step |= (step >> 8); + step |= (step >> 16); + step >>= 1; + step++; + + while (step > 0) + { + int i; + + for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ ) + ginInsertBAEntry(accum, heapptr, attnum, + entries[i], categories[i]); + + step >>= 1; /* /2 */ + } +} + +static int +qsortCompareItemPointers(const void *a, const void *b) +{ + int res = ginCompareItemPointers((ItemPointer) a, (ItemPointer) b); + + /* Assert that there are no equal item pointers being sorted */ + Assert(res != 0); + return res; +} + +/* Prepare to read out the rbtree contents using ginGetBAEntry */ +void +ginBeginBAScan(BuildAccumulator *accum) +{ + rbt_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk); +} + +/* + * Get the next entry in sequence from the BuildAccumulator's rbtree. + * This consists of a single key datum and a list (array) of one or more + * heap TIDs in which that key is found. The list is guaranteed sorted. + */ +ItemPointerData * +ginGetBAEntry(BuildAccumulator *accum, + OffsetNumber *attnum, Datum *key, GinNullCategory *category, + uint32 *n) +{ + GinEntryAccumulator *entry; + ItemPointerData *list; + + entry = (GinEntryAccumulator *) rbt_iterate(&accum->tree_walk); + + if (entry == NULL) + return NULL; /* no more entries */ + + *attnum = entry->attnum; + *key = entry->key; + *category = entry->category; + list = entry->list; + *n = entry->count; + + Assert(list != NULL && entry->count > 0); + + if (entry->shouldSort && entry->count > 1) + qsort(list, entry->count, sizeof(ItemPointerData), + qsortCompareItemPointers); + + return list; +} |