1 files changed, 775 insertions, 0 deletions
diff --git a/src/backend/access/gist/gistbuildbuffers.c b/src/backend/access/gist/gistbuildbuffers.c
new file mode 100644
index 0000000..95cc334
--- /dev/null
+++ b/src/backend/access/gist/gistbuildbuffers.c
@@ -0,0 +1,775 @@
+/*-------------------------------------------------------------------------
+ *
+ * gistbuildbuffers.c
+ *	  node buffer management functions for GiST buffering build algorithm.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/gist/gistbuildbuffers.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/gist_private.h"
+#include "catalog/index.h"
+#include "miscadmin.h"
+#include "storage/buffile.h"
+#include "storage/bufmgr.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+static GISTNodeBufferPage *gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb);
+static void gistAddLoadedBuffer(GISTBuildBuffers *gfbb,
+								GISTNodeBuffer *nodeBuffer);
+static void gistLoadNodeBuffer(GISTBuildBuffers *gfbb,
+							   GISTNodeBuffer *nodeBuffer);
+static void gistUnloadNodeBuffer(GISTBuildBuffers *gfbb,
+								 GISTNodeBuffer *nodeBuffer);
+static void gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer,
+								IndexTuple item);
+static void gistGetItupFromPage(GISTNodeBufferPage *pageBuffer,
+								IndexTuple *item);
+static long gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb);
+static void gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum);
+
+static void ReadTempFileBlock(BufFile *file, long blknum, void *ptr);
+static void WriteTempFileBlock(BufFile *file, long blknum, void *ptr);
+
+
+/*
+ * Initialize GiST build buffers.
+ */
+GISTBuildBuffers *
+gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel)
+{
+	GISTBuildBuffers *gfbb;
+	HASHCTL		hashCtl;
+
+	gfbb = palloc(sizeof(GISTBuildBuffers));
+	gfbb->pagesPerBuffer = pagesPerBuffer;
+	gfbb->levelStep = levelStep;
+
+	/*
+	 * Create a temporary file to hold buffer pages that are swapped out of
+	 * memory.
+	 */
+	gfbb->pfile = BufFileCreateTemp(false);
+	gfbb->nFileBlocks = 0;
+
+	/* Initialize free page management. */
+	gfbb->nFreeBlocks = 0;
+	gfbb->freeBlocksLen = 32;
+	gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long));
+
+	/*
+	 * Current memory context will be used for all in-memory data structures
+	 * of buffers which are persistent during buffering build.
+	 */
+	gfbb->context = CurrentMemoryContext;
+
+	/*
+	 * nodeBuffersTab hash is association between index blocks and it's
+	 * buffers.
+	 */
+	hashCtl.keysize = sizeof(BlockNumber);
+	hashCtl.entrysize = sizeof(GISTNodeBuffer);
+	hashCtl.hcxt = CurrentMemoryContext;
+	gfbb->nodeBuffersTab = hash_create("gistbuildbuffers",
+									   1024,
+									   &hashCtl,
+									   HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+
+	gfbb->bufferEmptyingQueue = NIL;
+
+	/*
+	 * Per-level node buffers lists for final buffers emptying process. Node
+	 * buffers are inserted here when they are created.
+	 */
+	gfbb->buffersOnLevelsLen = 1;
+	gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) *
+											 gfbb->buffersOnLevelsLen);
+	gfbb->buffersOnLevels[0] = NIL;
+
+	/*
+	 * Block numbers of node buffers which last pages are currently loaded
+	 * into main memory.
+	 */
+	gfbb->loadedBuffersLen = 32;
+	gfbb->loadedBuffers = (GISTNodeBuffer **) palloc(gfbb->loadedBuffersLen *
+													 sizeof(GISTNodeBuffer *));
+	gfbb->loadedBuffersCount = 0;
+
+	gfbb->rootlevel = maxLevel;
+
+	return gfbb;
+}
+
+/*
+ * Returns a node buffer for given block. The buffer is created if it
+ * doesn't exist yet.
+ */
+GISTNodeBuffer *
+gistGetNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
+				  BlockNumber nodeBlocknum, int level)
+{
+	GISTNodeBuffer *nodeBuffer;
+	bool		found;
+
+	/* Find node buffer in hash table */
+	nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab,
+												(const void *) &nodeBlocknum,
+												HASH_ENTER,
+												&found);
+	if (!found)
+	{
+		/*
+		 * Node buffer wasn't found. Initialize the new buffer as empty.
+		 */
+		MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
+
+		/* nodeBuffer->nodeBlocknum is the hash key and was filled in already */
+		nodeBuffer->blocksCount = 0;
+		nodeBuffer->pageBlocknum = InvalidBlockNumber;
+		nodeBuffer->pageBuffer = NULL;
+		nodeBuffer->queuedForEmptying = false;
+		nodeBuffer->isTemp = false;
+		nodeBuffer->level = level;
+
+		/*
+		 * Add this buffer to the list of buffers on this level. Enlarge
+		 * buffersOnLevels array if needed.
+		 */
+		if (level >= gfbb->buffersOnLevelsLen)
+		{
+			int			i;
+
+			gfbb->buffersOnLevels =
+				(List **) repalloc(gfbb->buffersOnLevels,
+								   (level + 1) * sizeof(List *));
+
+			/* initialize the enlarged portion */
+			for (i = gfbb->buffersOnLevelsLen; i <= level; i++)
+				gfbb->buffersOnLevels[i] = NIL;
+			gfbb->buffersOnLevelsLen = level + 1;
+		}
+
+		/*
+		 * Prepend the new buffer to the list of buffers on this level. It's
+		 * not arbitrary that the new buffer is put to the beginning of the
+		 * list: in the final emptying phase we loop through all buffers at
+		 * each level, and flush them. If a page is split during the emptying,
+		 * it's more efficient to flush the new splitted pages first, before
+		 * moving on to pre-existing pages on the level. The buffers just
+		 * created during the page split are likely still in cache, so
+		 * flushing them immediately is more efficient than putting them to
+		 * the end of the queue.
+		 */
+		gfbb->buffersOnLevels[level] = lcons(nodeBuffer,
+											 gfbb->buffersOnLevels[level]);
+
+		MemoryContextSwitchTo(oldcxt);
+	}
+
+	return nodeBuffer;
+}
+
+/*
+ * Allocate memory for a buffer page.
+ */
+static GISTNodeBufferPage *
+gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb)
+{
+	GISTNodeBufferPage *pageBuffer;
+
+	pageBuffer = (GISTNodeBufferPage *) MemoryContextAllocZero(gfbb->context,
+															   BLCKSZ);
+	pageBuffer->prev = InvalidBlockNumber;
+
+	/* Set page free space */
+	PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET;
+	return pageBuffer;
+}
+
+/*
+ * Add specified buffer into loadedBuffers array.
+ */
+static void
+gistAddLoadedBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
+{
+	/* Never add a temporary buffer to the array */
+	if (nodeBuffer->isTemp)
+		return;
+
+	/* Enlarge the array if needed */
+	if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen)
+	{
+		gfbb->loadedBuffersLen *= 2;
+		gfbb->loadedBuffers = (GISTNodeBuffer **)
+			repalloc(gfbb->loadedBuffers,
+					 gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *));
+	}
+
+	gfbb->loadedBuffers[gfbb->loadedBuffersCount] = nodeBuffer;
+	gfbb->loadedBuffersCount++;
+}
+
+/*
+ * Load last page of node buffer into main memory.
+ */
+static void
+gistLoadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
+{
+	/* Check if we really should load something */
+	if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > 0)
+	{
+		/* Allocate memory for page */
+		nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
+
+		/* Read block from temporary file */
+		ReadTempFileBlock(gfbb->pfile, nodeBuffer->pageBlocknum,
+						  nodeBuffer->pageBuffer);
+
+		/* Mark file block as free */
+		gistBuffersReleaseBlock(gfbb, nodeBuffer->pageBlocknum);
+
+		/* Mark node buffer as loaded */
+		gistAddLoadedBuffer(gfbb, nodeBuffer);
+		nodeBuffer->pageBlocknum = InvalidBlockNumber;
+	}
+}
+
+/*
+ * Write last page of node buffer to the disk.
+ */
+static void
+gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
+{
+	/* Check if we have something to write */
+	if (nodeBuffer->pageBuffer)
+	{
+		BlockNumber blkno;
+
+		/* Get free file block */
+		blkno = gistBuffersGetFreeBlock(gfbb);
+
+		/* Write block to the temporary file */
+		WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
+
+		/* Free memory of that page */
+		pfree(nodeBuffer->pageBuffer);
+		nodeBuffer->pageBuffer = NULL;
+
+		/* Save block number */
+		nodeBuffer->pageBlocknum = blkno;
+	}
+}
+
+/*
+ * Write last pages of all node buffers to the disk.
+ */
+void
+gistUnloadNodeBuffers(GISTBuildBuffers *gfbb)
+{
+	int			i;
+
+	/* Unload all the buffers that have a page loaded in memory. */
+	for (i = 0; i < gfbb->loadedBuffersCount; i++)
+		gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]);
+
+	/* Now there are no node buffers with loaded last page */
+	gfbb->loadedBuffersCount = 0;
+}
+
+/*
+ * Add index tuple to buffer page.
+ */
+static void
+gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, IndexTuple itup)
+{
+	Size		itupsz = IndexTupleSize(itup);
+	char	   *ptr;
+
+	/* There should be enough of space. */
+	Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(itupsz));
+
+	/* Reduce free space value of page to reserve a spot for the tuple. */
+	PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(itupsz);
+
+	/* Get pointer to the spot we reserved (ie. end of free space). */
+	ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET
+		+ PAGE_FREE_SPACE(pageBuffer);
+
+	/* Copy the index tuple there. */
+	memcpy(ptr, itup, itupsz);
+}
+
+/*
+ * Get last item from buffer page and remove it from page.
+ */
+static void
+gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, IndexTuple *itup)
+{
+	IndexTuple	ptr;
+	Size		itupsz;
+
+	Assert(!PAGE_IS_EMPTY(pageBuffer)); /* Page shouldn't be empty */
+
+	/* Get pointer to last index tuple */
+	ptr = (IndexTuple) ((char *) pageBuffer
+						+ BUFFER_PAGE_DATA_OFFSET
+						+ PAGE_FREE_SPACE(pageBuffer));
+	itupsz = IndexTupleSize(ptr);
+
+	/* Make a copy of the tuple */
+	*itup = (IndexTuple) palloc(itupsz);
+	memcpy(*itup, ptr, itupsz);
+
+	/* Mark the space used by the tuple as free */
+	PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(itupsz);
+}
+
+/*
+ * Push an index tuple to node buffer.
+ */
+void
+gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
+						 IndexTuple itup)
+{
+	/*
+	 * Most part of memory operations will be in buffering build persistent
+	 * context. So, let's switch to it.
+	 */
+	MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
+
+	/*
+	 * If the buffer is currently empty, create the first page.
+	 */
+	if (nodeBuffer->blocksCount == 0)
+	{
+		nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
+		nodeBuffer->blocksCount = 1;
+		gistAddLoadedBuffer(gfbb, nodeBuffer);
+	}
+
+	/* Load last page of node buffer if it wasn't in memory already */
+	if (!nodeBuffer->pageBuffer)
+		gistLoadNodeBuffer(gfbb, nodeBuffer);
+
+	/*
+	 * Check if there is enough space on the last page for the tuple.
+	 */
+	if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup))
+	{
+		/*
+		 * Nope. Swap previous block to disk and allocate a new one.
+		 */
+		BlockNumber blkno;
+
+		/* Write filled page to the disk */
+		blkno = gistBuffersGetFreeBlock(gfbb);
+		WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
+
+		/*
+		 * Reset the in-memory page as empty, and link the previous block to
+		 * the new page by storing its block number in the prev-link.
+		 */
+		PAGE_FREE_SPACE(nodeBuffer->pageBuffer) =
+			BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata));
+		nodeBuffer->pageBuffer->prev = blkno;
+
+		/* We've just added one more page */
+		nodeBuffer->blocksCount++;
+	}
+
+	gistPlaceItupToPage(nodeBuffer->pageBuffer, itup);
+
+	/*
+	 * If the buffer just overflowed, add it to the emptying queue.
+	 */
+	if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying)
+	{
+		gfbb->bufferEmptyingQueue = lcons(nodeBuffer,
+										  gfbb->bufferEmptyingQueue);
+		nodeBuffer->queuedForEmptying = true;
+	}
+
+	/* Restore memory context */
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * Removes one index tuple from node buffer. Returns true if success and false
+ * if node buffer is empty.
+ */
+bool
+gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
+						  IndexTuple *itup)
+{
+	/*
+	 * If node buffer is empty then return false.
+	 */
+	if (nodeBuffer->blocksCount <= 0)
+		return false;
+
+	/* Load last page of node buffer if needed */
+	if (!nodeBuffer->pageBuffer)
+		gistLoadNodeBuffer(gfbb, nodeBuffer);
+
+	/*
+	 * Get index tuple from last non-empty page.
+	 */
+	gistGetItupFromPage(nodeBuffer->pageBuffer, itup);
+
+	/*
+	 * If we just removed the last tuple from the page, fetch previous page on
+	 * this node buffer (if any).
+	 */
+	if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer))
+	{
+		BlockNumber prevblkno;
+
+		/*
+		 * blocksCount includes the page in pageBuffer, so decrease it now.
+		 */
+		nodeBuffer->blocksCount--;
+
+		/*
+		 * If there's more pages, fetch previous one.
+		 */
+		prevblkno = nodeBuffer->pageBuffer->prev;
+		if (prevblkno != InvalidBlockNumber)
+		{
+			/* There is a previous page. Fetch it. */
+			Assert(nodeBuffer->blocksCount > 0);
+			ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer);
+
+			/*
+			 * Now that we've read the block in memory, we can release its
+			 * on-disk block for reuse.
+			 */
+			gistBuffersReleaseBlock(gfbb, prevblkno);
+		}
+		else
+		{
+			/* No more pages. Free memory. */
+			Assert(nodeBuffer->blocksCount == 0);
+			pfree(nodeBuffer->pageBuffer);
+			nodeBuffer->pageBuffer = NULL;
+		}
+	}
+	return true;
+}
+
+/*
+ * Select a currently unused block for writing to.
+ */
+static long
+gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb)
+{
+	/*
+	 * If there are multiple free blocks, we select the one appearing last in
+	 * freeBlocks[].  If there are none, assign the next block at the end of
+	 * the file (causing the file to be extended).
+	 */
+	if (gfbb->nFreeBlocks > 0)
+		return gfbb->freeBlocks[--gfbb->nFreeBlocks];
+	else
+		return gfbb->nFileBlocks++;
+}
+
+/*
+ * Return a block# to the freelist.
+ */
+static void
+gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum)
+{
+	int			ndx;
+
+	/* Enlarge freeBlocks array if full. */
+	if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen)
+	{
+		gfbb->freeBlocksLen *= 2;
+		gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks,
+											 gfbb->freeBlocksLen *
+											 sizeof(long));
+	}
+
+	/* Add blocknum to array */
+	ndx = gfbb->nFreeBlocks++;
+	gfbb->freeBlocks[ndx] = blocknum;
+}
+
+/*
+ * Free buffering build data structure.
+ */
+void
+gistFreeBuildBuffers(GISTBuildBuffers *gfbb)
+{
+	/* Close buffers file. */
+	BufFileClose(gfbb->pfile);
+
+	/* All other things will be freed on memory context release */
+}
+
+/*
+ * Data structure representing information about node buffer for index tuples
+ * relocation from splitted node buffer.
+ */
+typedef struct
+{
+	GISTENTRY	entry[INDEX_MAX_KEYS];
+	bool		isnull[INDEX_MAX_KEYS];
+	GISTPageSplitInfo *splitinfo;
+	GISTNodeBuffer *nodeBuffer;
+} RelocationBufferInfo;
+
+/*
+ * At page split, distribute tuples from the buffer of the split page to
+ * new buffers for the created page halves. This also adjusts the downlinks
+ * in 'splitinfo' to include the tuples in the buffers.
+ */
+void
+gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
+								Relation r, int level,
+								Buffer buffer, List *splitinfo)
+{
+	RelocationBufferInfo *relocationBuffersInfos;
+	bool		found;
+	GISTNodeBuffer *nodeBuffer;
+	BlockNumber blocknum;
+	IndexTuple	itup;
+	int			splitPagesCount = 0,
+				i;
+	GISTENTRY	entry[INDEX_MAX_KEYS];
+	bool		isnull[INDEX_MAX_KEYS];
+	GISTNodeBuffer oldBuf;
+	ListCell   *lc;
+
+	/* If the splitted page doesn't have buffers, we have nothing to do. */
+	if (!LEVEL_HAS_BUFFERS(level, gfbb))
+		return;
+
+	/*
+	 * Get the node buffer of the splitted page.
+	 */
+	blocknum = BufferGetBlockNumber(buffer);
+	nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum,
+							 HASH_FIND, &found);
+	if (!found)
+	{
+		/* The page has no buffer, so we have nothing to do. */
+		return;
+	}
+
+	/*
+	 * Make a copy of the old buffer, as we're going reuse it as the buffer
+	 * for the new left page, which is on the same block as the old page.
+	 * That's not true for the root page, but that's fine because we never
+	 * have a buffer on the root page anyway. The original algorithm as
+	 * described by Arge et al did, but it's of no use, as you might as well
+	 * read the tuples straight from the heap instead of the root buffer.
+	 */
+	Assert(blocknum != GIST_ROOT_BLKNO);
+	memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer));
+	oldBuf.isTemp = true;
+
+	/* Reset the old buffer, used for the new left page from now on */
+	nodeBuffer->blocksCount = 0;
+	nodeBuffer->pageBuffer = NULL;
+	nodeBuffer->pageBlocknum = InvalidBlockNumber;
+
+	/*
+	 * Allocate memory for information about relocation buffers.
+	 */
+	splitPagesCount = list_length(splitinfo);
+	relocationBuffersInfos =
+		(RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) *
+										splitPagesCount);
+
+	/*
+	 * Fill relocation buffers information for node buffers of pages produced
+	 * by split.
+	 */
+	i = 0;
+	foreach(lc, splitinfo)
+	{
+		GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc);
+		GISTNodeBuffer *newNodeBuffer;
+
+		/* Decompress parent index tuple of node buffer page. */
+		gistDeCompressAtt(giststate, r,
+						  si->downlink, NULL, (OffsetNumber) 0,
+						  relocationBuffersInfos[i].entry,
+						  relocationBuffersInfos[i].isnull);
+
+		/*
+		 * Create a node buffer for the page. The leftmost half is on the same
+		 * block as the old page before split, so for the leftmost half this
+		 * will return the original buffer. The tuples on the original buffer
+		 * were relinked to the temporary buffer, so the original one is now
+		 * empty.
+		 */
+		newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level);
+
+		relocationBuffersInfos[i].nodeBuffer = newNodeBuffer;
+		relocationBuffersInfos[i].splitinfo = si;
+
+		i++;
+	}
+
+	/*
+	 * Loop through all index tuples in the buffer of the page being split,
+	 * moving them to buffers for the new pages.  We try to move each tuple to
+	 * the page that will result in the lowest penalty for the leading column
+	 * or, in the case of a tie, the lowest penalty for the earliest column
+	 * that is not tied.
+	 *
+	 * The page searching logic is very similar to gistchoose().
+	 */
+	while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
+	{
+		float		best_penalty[INDEX_MAX_KEYS];
+		int			i,
+					which;
+		IndexTuple	newtup;
+		RelocationBufferInfo *targetBufferInfo;
+
+		gistDeCompressAtt(giststate, r,
+						  itup, NULL, (OffsetNumber) 0, entry, isnull);
+
+		/* default to using first page (shouldn't matter) */
+		which = 0;
+
+		/*
+		 * best_penalty[j] is the best penalty we have seen so far for column
+		 * j, or -1 when we haven't yet examined column j.  Array entries to
+		 * the right of the first -1 are undefined.
+		 */
+		best_penalty[0] = -1;
+
+		/*
+		 * Loop over possible target pages, looking for one to move this tuple
+		 * to.
+		 */
+		for (i = 0; i < splitPagesCount; i++)
+		{
+			RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
+			bool		zero_penalty;
+			int			j;
+
+			zero_penalty = true;
+
+			/* Loop over index attributes. */
+			for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
+			{
+				float		usize;
+
+				/* Compute penalty for this column. */
+				usize = gistpenalty(giststate, j,
+									&splitPageInfo->entry[j],
+									splitPageInfo->isnull[j],
+									&entry[j], isnull[j]);
+				if (usize > 0)
+					zero_penalty = false;
+
+				if (best_penalty[j] < 0 || usize < best_penalty[j])
+				{
+					/*
+					 * New best penalty for column.  Tentatively select this
+					 * page as the target, and record the best penalty.  Then
+					 * reset the next column's penalty to "unknown" (and
+					 * indirectly, the same for all the ones to its right).
+					 * This will force us to adopt this page's penalty values
+					 * as the best for all the remaining columns during
+					 * subsequent loop iterations.
+					 */
+					which = i;
+					best_penalty[j] = usize;
+
+					if (j < IndexRelationGetNumberOfKeyAttributes(r) - 1)
+						best_penalty[j + 1] = -1;
+				}
+				else if (best_penalty[j] == usize)
+				{
+					/*
+					 * The current page is exactly as good for this column as
+					 * the best page seen so far.  The next iteration of this
+					 * loop will compare the next column.
+					 */
+				}
+				else
+				{
+					/*
+					 * The current page is worse for this column than the best
+					 * page seen so far.  Skip the remaining columns and move
+					 * on to the next page, if any.
+					 */
+					zero_penalty = false;	/* so outer loop won't exit */
+					break;
+				}
+			}
+
+			/*
+			 * If we find a page with zero penalty for all columns, there's no
+			 * need to examine remaining pages; just break out of the loop and
+			 * return it.
+			 */
+			if (zero_penalty)
+				break;
+		}
+
+		/* OK, "which" is the page index to push the tuple to */
+		targetBufferInfo = &relocationBuffersInfos[which];
+
+		/* Push item to selected node buffer */
+		gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup);
+
+		/* Adjust the downlink for this page, if needed. */
+		newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink,
+								 itup, giststate);
+		if (newtup)
+		{
+			gistDeCompressAtt(giststate, r,
+							  newtup, NULL, (OffsetNumber) 0,
+							  targetBufferInfo->entry,
+							  targetBufferInfo->isnull);
+
+			targetBufferInfo->splitinfo->downlink = newtup;
+		}
+	}
+
+	pfree(relocationBuffersInfos);
+}
+
+
+/*
+ * Wrappers around BufFile operations. The main difference is that these
+ * wrappers report errors with ereport(), so that the callers don't need
+ * to check the return code.
+ */
+
+static void
+ReadTempFileBlock(BufFile *file, long blknum, void *ptr)
+{
+	size_t		nread;
+
+	if (BufFileSeekBlock(file, blknum) != 0)
+		elog(ERROR, "could not seek to block %ld in temporary file", blknum);
+	nread = BufFileRead(file, ptr, BLCKSZ);
+	if (nread != BLCKSZ)
+		elog(ERROR, "could not read temporary file: read only %zu of %zu bytes",
+			 nread, (size_t) BLCKSZ);
+}
+
+static void
+WriteTempFileBlock(BufFile *file, long blknum, void *ptr)
+{
+	if (BufFileSeekBlock(file, blknum) != 0)
+		elog(ERROR, "could not seek to block %ld in temporary file", blknum);
+	BufFileWrite(file, ptr, BLCKSZ);
+}