summaryrefslogtreecommitdiffstats
path: root/src/backend/access/gist/gistbuildbuffers.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/access/gist/gistbuildbuffers.c
parentInitial commit. (diff)
downloadpostgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz
postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/access/gist/gistbuildbuffers.c')
-rw-r--r--src/backend/access/gist/gistbuildbuffers.c775
1 files changed, 775 insertions, 0 deletions
diff --git a/src/backend/access/gist/gistbuildbuffers.c b/src/backend/access/gist/gistbuildbuffers.c
new file mode 100644
index 0000000..95cc334
--- /dev/null
+++ b/src/backend/access/gist/gistbuildbuffers.c
@@ -0,0 +1,775 @@
+/*-------------------------------------------------------------------------
+ *
+ * gistbuildbuffers.c
+ * node buffer management functions for GiST buffering build algorithm.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/access/gist/gistbuildbuffers.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/gist_private.h"
+#include "catalog/index.h"
+#include "miscadmin.h"
+#include "storage/buffile.h"
+#include "storage/bufmgr.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+static GISTNodeBufferPage *gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb);
+static void gistAddLoadedBuffer(GISTBuildBuffers *gfbb,
+ GISTNodeBuffer *nodeBuffer);
+static void gistLoadNodeBuffer(GISTBuildBuffers *gfbb,
+ GISTNodeBuffer *nodeBuffer);
+static void gistUnloadNodeBuffer(GISTBuildBuffers *gfbb,
+ GISTNodeBuffer *nodeBuffer);
+static void gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer,
+ IndexTuple item);
+static void gistGetItupFromPage(GISTNodeBufferPage *pageBuffer,
+ IndexTuple *item);
+static long gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb);
+static void gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum);
+
+static void ReadTempFileBlock(BufFile *file, long blknum, void *ptr);
+static void WriteTempFileBlock(BufFile *file, long blknum, void *ptr);
+
+
+/*
+ * Initialize GiST build buffers.
+ */
+GISTBuildBuffers *
+gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel)
+{
+ GISTBuildBuffers *gfbb;
+ HASHCTL hashCtl;
+
+ gfbb = palloc(sizeof(GISTBuildBuffers));
+ gfbb->pagesPerBuffer = pagesPerBuffer;
+ gfbb->levelStep = levelStep;
+
+ /*
+ * Create a temporary file to hold buffer pages that are swapped out of
+ * memory.
+ */
+ gfbb->pfile = BufFileCreateTemp(false);
+ gfbb->nFileBlocks = 0;
+
+ /* Initialize free page management. */
+ gfbb->nFreeBlocks = 0;
+ gfbb->freeBlocksLen = 32;
+ gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long));
+
+ /*
+ * Current memory context will be used for all in-memory data structures
+ * of buffers which are persistent during buffering build.
+ */
+ gfbb->context = CurrentMemoryContext;
+
+ /*
+ * nodeBuffersTab hash is association between index blocks and it's
+ * buffers.
+ */
+ hashCtl.keysize = sizeof(BlockNumber);
+ hashCtl.entrysize = sizeof(GISTNodeBuffer);
+ hashCtl.hcxt = CurrentMemoryContext;
+ gfbb->nodeBuffersTab = hash_create("gistbuildbuffers",
+ 1024,
+ &hashCtl,
+ HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+
+ gfbb->bufferEmptyingQueue = NIL;
+
+ /*
+ * Per-level node buffers lists for final buffers emptying process. Node
+ * buffers are inserted here when they are created.
+ */
+ gfbb->buffersOnLevelsLen = 1;
+ gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) *
+ gfbb->buffersOnLevelsLen);
+ gfbb->buffersOnLevels[0] = NIL;
+
+ /*
+ * Block numbers of node buffers which last pages are currently loaded
+ * into main memory.
+ */
+ gfbb->loadedBuffersLen = 32;
+ gfbb->loadedBuffers = (GISTNodeBuffer **) palloc(gfbb->loadedBuffersLen *
+ sizeof(GISTNodeBuffer *));
+ gfbb->loadedBuffersCount = 0;
+
+ gfbb->rootlevel = maxLevel;
+
+ return gfbb;
+}
+
+/*
+ * Returns a node buffer for given block. The buffer is created if it
+ * doesn't exist yet.
+ */
+GISTNodeBuffer *
+gistGetNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
+ BlockNumber nodeBlocknum, int level)
+{
+ GISTNodeBuffer *nodeBuffer;
+ bool found;
+
+ /* Find node buffer in hash table */
+ nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab,
+ (const void *) &nodeBlocknum,
+ HASH_ENTER,
+ &found);
+ if (!found)
+ {
+ /*
+ * Node buffer wasn't found. Initialize the new buffer as empty.
+ */
+ MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
+
+ /* nodeBuffer->nodeBlocknum is the hash key and was filled in already */
+ nodeBuffer->blocksCount = 0;
+ nodeBuffer->pageBlocknum = InvalidBlockNumber;
+ nodeBuffer->pageBuffer = NULL;
+ nodeBuffer->queuedForEmptying = false;
+ nodeBuffer->isTemp = false;
+ nodeBuffer->level = level;
+
+ /*
+ * Add this buffer to the list of buffers on this level. Enlarge
+ * buffersOnLevels array if needed.
+ */
+ if (level >= gfbb->buffersOnLevelsLen)
+ {
+ int i;
+
+ gfbb->buffersOnLevels =
+ (List **) repalloc(gfbb->buffersOnLevels,
+ (level + 1) * sizeof(List *));
+
+ /* initialize the enlarged portion */
+ for (i = gfbb->buffersOnLevelsLen; i <= level; i++)
+ gfbb->buffersOnLevels[i] = NIL;
+ gfbb->buffersOnLevelsLen = level + 1;
+ }
+
+ /*
+ * Prepend the new buffer to the list of buffers on this level. It's
+ * not arbitrary that the new buffer is put to the beginning of the
+ * list: in the final emptying phase we loop through all buffers at
+ * each level, and flush them. If a page is split during the emptying,
+ * it's more efficient to flush the new splitted pages first, before
+ * moving on to pre-existing pages on the level. The buffers just
+ * created during the page split are likely still in cache, so
+ * flushing them immediately is more efficient than putting them to
+ * the end of the queue.
+ */
+ gfbb->buffersOnLevels[level] = lcons(nodeBuffer,
+ gfbb->buffersOnLevels[level]);
+
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ return nodeBuffer;
+}
+
+/*
+ * Allocate memory for a buffer page.
+ */
+static GISTNodeBufferPage *
+gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb)
+{
+ GISTNodeBufferPage *pageBuffer;
+
+ pageBuffer = (GISTNodeBufferPage *) MemoryContextAllocZero(gfbb->context,
+ BLCKSZ);
+ pageBuffer->prev = InvalidBlockNumber;
+
+ /* Set page free space */
+ PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET;
+ return pageBuffer;
+}
+
+/*
+ * Add specified buffer into loadedBuffers array.
+ */
+static void
+gistAddLoadedBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
+{
+ /* Never add a temporary buffer to the array */
+ if (nodeBuffer->isTemp)
+ return;
+
+ /* Enlarge the array if needed */
+ if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen)
+ {
+ gfbb->loadedBuffersLen *= 2;
+ gfbb->loadedBuffers = (GISTNodeBuffer **)
+ repalloc(gfbb->loadedBuffers,
+ gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *));
+ }
+
+ gfbb->loadedBuffers[gfbb->loadedBuffersCount] = nodeBuffer;
+ gfbb->loadedBuffersCount++;
+}
+
+/*
+ * Load last page of node buffer into main memory.
+ */
+static void
+gistLoadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
+{
+ /* Check if we really should load something */
+ if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > 0)
+ {
+ /* Allocate memory for page */
+ nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
+
+ /* Read block from temporary file */
+ ReadTempFileBlock(gfbb->pfile, nodeBuffer->pageBlocknum,
+ nodeBuffer->pageBuffer);
+
+ /* Mark file block as free */
+ gistBuffersReleaseBlock(gfbb, nodeBuffer->pageBlocknum);
+
+ /* Mark node buffer as loaded */
+ gistAddLoadedBuffer(gfbb, nodeBuffer);
+ nodeBuffer->pageBlocknum = InvalidBlockNumber;
+ }
+}
+
+/*
+ * Write last page of node buffer to the disk.
+ */
+static void
+gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
+{
+ /* Check if we have something to write */
+ if (nodeBuffer->pageBuffer)
+ {
+ BlockNumber blkno;
+
+ /* Get free file block */
+ blkno = gistBuffersGetFreeBlock(gfbb);
+
+ /* Write block to the temporary file */
+ WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
+
+ /* Free memory of that page */
+ pfree(nodeBuffer->pageBuffer);
+ nodeBuffer->pageBuffer = NULL;
+
+ /* Save block number */
+ nodeBuffer->pageBlocknum = blkno;
+ }
+}
+
+/*
+ * Write last pages of all node buffers to the disk.
+ */
+void
+gistUnloadNodeBuffers(GISTBuildBuffers *gfbb)
+{
+ int i;
+
+ /* Unload all the buffers that have a page loaded in memory. */
+ for (i = 0; i < gfbb->loadedBuffersCount; i++)
+ gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]);
+
+ /* Now there are no node buffers with loaded last page */
+ gfbb->loadedBuffersCount = 0;
+}
+
+/*
+ * Add index tuple to buffer page.
+ */
+static void
+gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, IndexTuple itup)
+{
+ Size itupsz = IndexTupleSize(itup);
+ char *ptr;
+
+ /* There should be enough of space. */
+ Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(itupsz));
+
+ /* Reduce free space value of page to reserve a spot for the tuple. */
+ PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(itupsz);
+
+ /* Get pointer to the spot we reserved (ie. end of free space). */
+ ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET
+ + PAGE_FREE_SPACE(pageBuffer);
+
+ /* Copy the index tuple there. */
+ memcpy(ptr, itup, itupsz);
+}
+
+/*
+ * Get last item from buffer page and remove it from page.
+ */
+static void
+gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, IndexTuple *itup)
+{
+ IndexTuple ptr;
+ Size itupsz;
+
+ Assert(!PAGE_IS_EMPTY(pageBuffer)); /* Page shouldn't be empty */
+
+ /* Get pointer to last index tuple */
+ ptr = (IndexTuple) ((char *) pageBuffer
+ + BUFFER_PAGE_DATA_OFFSET
+ + PAGE_FREE_SPACE(pageBuffer));
+ itupsz = IndexTupleSize(ptr);
+
+ /* Make a copy of the tuple */
+ *itup = (IndexTuple) palloc(itupsz);
+ memcpy(*itup, ptr, itupsz);
+
+ /* Mark the space used by the tuple as free */
+ PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(itupsz);
+}
+
+/*
+ * Push an index tuple to node buffer.
+ */
+void
+gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
+ IndexTuple itup)
+{
+ /*
+ * Most part of memory operations will be in buffering build persistent
+ * context. So, let's switch to it.
+ */
+ MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
+
+ /*
+ * If the buffer is currently empty, create the first page.
+ */
+ if (nodeBuffer->blocksCount == 0)
+ {
+ nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
+ nodeBuffer->blocksCount = 1;
+ gistAddLoadedBuffer(gfbb, nodeBuffer);
+ }
+
+ /* Load last page of node buffer if it wasn't in memory already */
+ if (!nodeBuffer->pageBuffer)
+ gistLoadNodeBuffer(gfbb, nodeBuffer);
+
+ /*
+ * Check if there is enough space on the last page for the tuple.
+ */
+ if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup))
+ {
+ /*
+ * Nope. Swap previous block to disk and allocate a new one.
+ */
+ BlockNumber blkno;
+
+ /* Write filled page to the disk */
+ blkno = gistBuffersGetFreeBlock(gfbb);
+ WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
+
+ /*
+ * Reset the in-memory page as empty, and link the previous block to
+ * the new page by storing its block number in the prev-link.
+ */
+ PAGE_FREE_SPACE(nodeBuffer->pageBuffer) =
+ BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata));
+ nodeBuffer->pageBuffer->prev = blkno;
+
+ /* We've just added one more page */
+ nodeBuffer->blocksCount++;
+ }
+
+ gistPlaceItupToPage(nodeBuffer->pageBuffer, itup);
+
+ /*
+ * If the buffer just overflowed, add it to the emptying queue.
+ */
+ if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying)
+ {
+ gfbb->bufferEmptyingQueue = lcons(nodeBuffer,
+ gfbb->bufferEmptyingQueue);
+ nodeBuffer->queuedForEmptying = true;
+ }
+
+ /* Restore memory context */
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * Removes one index tuple from node buffer. Returns true if success and false
+ * if node buffer is empty.
+ */
+bool
+gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
+ IndexTuple *itup)
+{
+ /*
+ * If node buffer is empty then return false.
+ */
+ if (nodeBuffer->blocksCount <= 0)
+ return false;
+
+ /* Load last page of node buffer if needed */
+ if (!nodeBuffer->pageBuffer)
+ gistLoadNodeBuffer(gfbb, nodeBuffer);
+
+ /*
+ * Get index tuple from last non-empty page.
+ */
+ gistGetItupFromPage(nodeBuffer->pageBuffer, itup);
+
+ /*
+ * If we just removed the last tuple from the page, fetch previous page on
+ * this node buffer (if any).
+ */
+ if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer))
+ {
+ BlockNumber prevblkno;
+
+ /*
+ * blocksCount includes the page in pageBuffer, so decrease it now.
+ */
+ nodeBuffer->blocksCount--;
+
+ /*
+ * If there's more pages, fetch previous one.
+ */
+ prevblkno = nodeBuffer->pageBuffer->prev;
+ if (prevblkno != InvalidBlockNumber)
+ {
+ /* There is a previous page. Fetch it. */
+ Assert(nodeBuffer->blocksCount > 0);
+ ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer);
+
+ /*
+ * Now that we've read the block in memory, we can release its
+ * on-disk block for reuse.
+ */
+ gistBuffersReleaseBlock(gfbb, prevblkno);
+ }
+ else
+ {
+ /* No more pages. Free memory. */
+ Assert(nodeBuffer->blocksCount == 0);
+ pfree(nodeBuffer->pageBuffer);
+ nodeBuffer->pageBuffer = NULL;
+ }
+ }
+ return true;
+}
+
+/*
+ * Select a currently unused block for writing to.
+ */
+static long
+gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb)
+{
+ /*
+ * If there are multiple free blocks, we select the one appearing last in
+ * freeBlocks[]. If there are none, assign the next block at the end of
+ * the file (causing the file to be extended).
+ */
+ if (gfbb->nFreeBlocks > 0)
+ return gfbb->freeBlocks[--gfbb->nFreeBlocks];
+ else
+ return gfbb->nFileBlocks++;
+}
+
+/*
+ * Return a block# to the freelist.
+ */
+static void
+gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum)
+{
+ int ndx;
+
+ /* Enlarge freeBlocks array if full. */
+ if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen)
+ {
+ gfbb->freeBlocksLen *= 2;
+ gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks,
+ gfbb->freeBlocksLen *
+ sizeof(long));
+ }
+
+ /* Add blocknum to array */
+ ndx = gfbb->nFreeBlocks++;
+ gfbb->freeBlocks[ndx] = blocknum;
+}
+
+/*
+ * Free buffering build data structure.
+ */
+void
+gistFreeBuildBuffers(GISTBuildBuffers *gfbb)
+{
+ /* Close buffers file. */
+ BufFileClose(gfbb->pfile);
+
+ /* All other things will be freed on memory context release */
+}
+
+/*
+ * Data structure representing information about node buffer for index tuples
+ * relocation from splitted node buffer.
+ */
+typedef struct
+{
+ GISTENTRY entry[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ GISTPageSplitInfo *splitinfo;
+ GISTNodeBuffer *nodeBuffer;
+} RelocationBufferInfo;
+
+/*
+ * At page split, distribute tuples from the buffer of the split page to
+ * new buffers for the created page halves. This also adjusts the downlinks
+ * in 'splitinfo' to include the tuples in the buffers.
+ */
+void
+gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
+ Relation r, int level,
+ Buffer buffer, List *splitinfo)
+{
+ RelocationBufferInfo *relocationBuffersInfos;
+ bool found;
+ GISTNodeBuffer *nodeBuffer;
+ BlockNumber blocknum;
+ IndexTuple itup;
+ int splitPagesCount = 0,
+ i;
+ GISTENTRY entry[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ GISTNodeBuffer oldBuf;
+ ListCell *lc;
+
+ /* If the splitted page doesn't have buffers, we have nothing to do. */
+ if (!LEVEL_HAS_BUFFERS(level, gfbb))
+ return;
+
+ /*
+ * Get the node buffer of the splitted page.
+ */
+ blocknum = BufferGetBlockNumber(buffer);
+ nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum,
+ HASH_FIND, &found);
+ if (!found)
+ {
+ /* The page has no buffer, so we have nothing to do. */
+ return;
+ }
+
+ /*
+ * Make a copy of the old buffer, as we're going reuse it as the buffer
+ * for the new left page, which is on the same block as the old page.
+ * That's not true for the root page, but that's fine because we never
+ * have a buffer on the root page anyway. The original algorithm as
+ * described by Arge et al did, but it's of no use, as you might as well
+ * read the tuples straight from the heap instead of the root buffer.
+ */
+ Assert(blocknum != GIST_ROOT_BLKNO);
+ memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer));
+ oldBuf.isTemp = true;
+
+ /* Reset the old buffer, used for the new left page from now on */
+ nodeBuffer->blocksCount = 0;
+ nodeBuffer->pageBuffer = NULL;
+ nodeBuffer->pageBlocknum = InvalidBlockNumber;
+
+ /*
+ * Allocate memory for information about relocation buffers.
+ */
+ splitPagesCount = list_length(splitinfo);
+ relocationBuffersInfos =
+ (RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) *
+ splitPagesCount);
+
+ /*
+ * Fill relocation buffers information for node buffers of pages produced
+ * by split.
+ */
+ i = 0;
+ foreach(lc, splitinfo)
+ {
+ GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc);
+ GISTNodeBuffer *newNodeBuffer;
+
+ /* Decompress parent index tuple of node buffer page. */
+ gistDeCompressAtt(giststate, r,
+ si->downlink, NULL, (OffsetNumber) 0,
+ relocationBuffersInfos[i].entry,
+ relocationBuffersInfos[i].isnull);
+
+ /*
+ * Create a node buffer for the page. The leftmost half is on the same
+ * block as the old page before split, so for the leftmost half this
+ * will return the original buffer. The tuples on the original buffer
+ * were relinked to the temporary buffer, so the original one is now
+ * empty.
+ */
+ newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level);
+
+ relocationBuffersInfos[i].nodeBuffer = newNodeBuffer;
+ relocationBuffersInfos[i].splitinfo = si;
+
+ i++;
+ }
+
+ /*
+ * Loop through all index tuples in the buffer of the page being split,
+ * moving them to buffers for the new pages. We try to move each tuple to
+ * the page that will result in the lowest penalty for the leading column
+ * or, in the case of a tie, the lowest penalty for the earliest column
+ * that is not tied.
+ *
+ * The page searching logic is very similar to gistchoose().
+ */
+ while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
+ {
+ float best_penalty[INDEX_MAX_KEYS];
+ int i,
+ which;
+ IndexTuple newtup;
+ RelocationBufferInfo *targetBufferInfo;
+
+ gistDeCompressAtt(giststate, r,
+ itup, NULL, (OffsetNumber) 0, entry, isnull);
+
+ /* default to using first page (shouldn't matter) */
+ which = 0;
+
+ /*
+ * best_penalty[j] is the best penalty we have seen so far for column
+ * j, or -1 when we haven't yet examined column j. Array entries to
+ * the right of the first -1 are undefined.
+ */
+ best_penalty[0] = -1;
+
+ /*
+ * Loop over possible target pages, looking for one to move this tuple
+ * to.
+ */
+ for (i = 0; i < splitPagesCount; i++)
+ {
+ RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
+ bool zero_penalty;
+ int j;
+
+ zero_penalty = true;
+
+ /* Loop over index attributes. */
+ for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
+ {
+ float usize;
+
+ /* Compute penalty for this column. */
+ usize = gistpenalty(giststate, j,
+ &splitPageInfo->entry[j],
+ splitPageInfo->isnull[j],
+ &entry[j], isnull[j]);
+ if (usize > 0)
+ zero_penalty = false;
+
+ if (best_penalty[j] < 0 || usize < best_penalty[j])
+ {
+ /*
+ * New best penalty for column. Tentatively select this
+ * page as the target, and record the best penalty. Then
+ * reset the next column's penalty to "unknown" (and
+ * indirectly, the same for all the ones to its right).
+ * This will force us to adopt this page's penalty values
+ * as the best for all the remaining columns during
+ * subsequent loop iterations.
+ */
+ which = i;
+ best_penalty[j] = usize;
+
+ if (j < IndexRelationGetNumberOfKeyAttributes(r) - 1)
+ best_penalty[j + 1] = -1;
+ }
+ else if (best_penalty[j] == usize)
+ {
+ /*
+ * The current page is exactly as good for this column as
+ * the best page seen so far. The next iteration of this
+ * loop will compare the next column.
+ */
+ }
+ else
+ {
+ /*
+ * The current page is worse for this column than the best
+ * page seen so far. Skip the remaining columns and move
+ * on to the next page, if any.
+ */
+ zero_penalty = false; /* so outer loop won't exit */
+ break;
+ }
+ }
+
+ /*
+ * If we find a page with zero penalty for all columns, there's no
+ * need to examine remaining pages; just break out of the loop and
+ * return it.
+ */
+ if (zero_penalty)
+ break;
+ }
+
+ /* OK, "which" is the page index to push the tuple to */
+ targetBufferInfo = &relocationBuffersInfos[which];
+
+ /* Push item to selected node buffer */
+ gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup);
+
+ /* Adjust the downlink for this page, if needed. */
+ newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink,
+ itup, giststate);
+ if (newtup)
+ {
+ gistDeCompressAtt(giststate, r,
+ newtup, NULL, (OffsetNumber) 0,
+ targetBufferInfo->entry,
+ targetBufferInfo->isnull);
+
+ targetBufferInfo->splitinfo->downlink = newtup;
+ }
+ }
+
+ pfree(relocationBuffersInfos);
+}
+
+
+/*
+ * Wrappers around BufFile operations. The main difference is that these
+ * wrappers report errors with ereport(), so that the callers don't need
+ * to check the return code.
+ */
+
+static void
+ReadTempFileBlock(BufFile *file, long blknum, void *ptr)
+{
+ size_t nread;
+
+ if (BufFileSeekBlock(file, blknum) != 0)
+ elog(ERROR, "could not seek to block %ld in temporary file", blknum);
+ nread = BufFileRead(file, ptr, BLCKSZ);
+ if (nread != BLCKSZ)
+ elog(ERROR, "could not read temporary file: read only %zu of %zu bytes",
+ nread, (size_t) BLCKSZ);
+}
+
+static void
+WriteTempFileBlock(BufFile *file, long blknum, void *ptr)
+{
+ if (BufFileSeekBlock(file, blknum) != 0)
+ elog(ERROR, "could not seek to block %ld in temporary file", blknum);
+ BufFileWrite(file, ptr, BLCKSZ);
+}