1 files changed, 434 insertions, 0 deletions
diff --git a/src/backend/access/gin/ginpostinglist.c b/src/backend/access/gin/ginpostinglist.c
new file mode 100644
index 0000000..66a8983
--- /dev/null
+++ b/src/backend/access/gin/ginpostinglist.c
@@ -0,0 +1,434 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginpostinglist.c
+ *	  routines for dealing with posting lists.
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/gin/ginpostinglist.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin_private.h"
+
+#ifdef USE_ASSERT_CHECKING
+#define CHECK_ENCODING_ROUNDTRIP
+#endif
+
+/*
+ * For encoding purposes, item pointers are represented as 64-bit unsigned
+ * integers. The lowest 11 bits represent the offset number, and the next
+ * lowest 32 bits are the block number. That leaves 21 bits unused, i.e.
+ * only 43 low bits are used.
+ *
+ * 11 bits is enough for the offset number, because MaxHeapTuplesPerPage <
+ * 2^11 on all supported block sizes. We are frugal with the bits, because
+ * smaller integers use fewer bytes in the varbyte encoding, saving disk
+ * space. (If we get a new table AM in the future that wants to use the full
+ * range of possible offset numbers, we'll need to change this.)
+ *
+ * These 43-bit integers are encoded using varbyte encoding. In each byte,
+ * the 7 low bits contain data, while the highest bit is a continuation bit.
+ * When the continuation bit is set, the next byte is part of the same
+ * integer, otherwise this is the last byte of this integer. 43 bits need
+ * at most 7 bytes in this encoding:
+ *
+ * 0XXXXXXX
+ * 1XXXXXXX 0XXXXYYY
+ * 1XXXXXXX 1XXXXYYY 0YYYYYYY
+ * 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
+ * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
+ * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
+ * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY
+ *
+ * X = bits used for offset number
+ * Y = bits used for block number
+ * u = unused bit
+ *
+ * The bytes are in stored in little-endian order.
+ *
+ * An important property of this encoding is that removing an item from list
+ * never increases the size of the resulting compressed posting list. Proof:
+ *
+ * Removing number is actually replacement of two numbers with their sum. We
+ * have to prove that varbyte encoding of a sum can't be longer than varbyte
+ * encoding of its summands. Sum of two numbers is at most one bit wider than
+ * the larger of the summands. Widening a number by one bit enlarges its length
+ * in varbyte encoding by at most one byte. Therefore, varbyte encoding of sum
+ * is at most one byte longer than varbyte encoding of larger summand. Lesser
+ * summand is at least one byte, so the sum cannot take more space than the
+ * summands, Q.E.D.
+ *
+ * This property greatly simplifies VACUUM, which can assume that posting
+ * lists always fit on the same page after vacuuming. Note that even though
+ * that holds for removing items from a posting list, you must also be
+ * careful to not cause expansion e.g. when merging uncompressed items on the
+ * page into the compressed lists, when vacuuming.
+ */
+
+/*
+ * How many bits do you need to encode offset number? OffsetNumber is a 16-bit
+ * integer, but you can't fit that many items on a page. 11 ought to be more
+ * than enough. It's tempting to derive this from MaxHeapTuplesPerPage, and
+ * use the minimum number of bits, but that would require changing the on-disk
+ * format if MaxHeapTuplesPerPage changes. Better to leave some slack.
+ */
+#define MaxHeapTuplesPerPageBits		11
+
+/* Max. number of bytes needed to encode the largest supported integer. */
+#define MaxBytesPerInteger				7
+
+static inline uint64
+itemptr_to_uint64(const ItemPointer iptr)
+{
+	uint64		val;
+
+	Assert(ItemPointerIsValid(iptr));
+	Assert(GinItemPointerGetOffsetNumber(iptr) < (1 << MaxHeapTuplesPerPageBits));
+
+	val = GinItemPointerGetBlockNumber(iptr);
+	val <<= MaxHeapTuplesPerPageBits;
+	val |= GinItemPointerGetOffsetNumber(iptr);
+
+	return val;
+}
+
+static inline void
+uint64_to_itemptr(uint64 val, ItemPointer iptr)
+{
+	GinItemPointerSetOffsetNumber(iptr, val & ((1 << MaxHeapTuplesPerPageBits) - 1));
+	val = val >> MaxHeapTuplesPerPageBits;
+	GinItemPointerSetBlockNumber(iptr, val);
+
+	Assert(ItemPointerIsValid(iptr));
+}
+
+/*
+ * Varbyte-encode 'val' into *ptr. *ptr is incremented to next integer.
+ */
+static void
+encode_varbyte(uint64 val, unsigned char **ptr)
+{
+	unsigned char *p = *ptr;
+
+	while (val > 0x7F)
+	{
+		*(p++) = 0x80 | (val & 0x7F);
+		val >>= 7;
+	}
+	*(p++) = (unsigned char) val;
+
+	*ptr = p;
+}
+
+/*
+ * Decode varbyte-encoded integer at *ptr. *ptr is incremented to next integer.
+ */
+static uint64
+decode_varbyte(unsigned char **ptr)
+{
+	uint64		val;
+	unsigned char *p = *ptr;
+	uint64		c;
+
+	/* 1st byte */
+	c = *(p++);
+	val = c & 0x7F;
+	if (c & 0x80)
+	{
+		/* 2nd byte */
+		c = *(p++);
+		val |= (c & 0x7F) << 7;
+		if (c & 0x80)
+		{
+			/* 3rd byte */
+			c = *(p++);
+			val |= (c & 0x7F) << 14;
+			if (c & 0x80)
+			{
+				/* 4th byte */
+				c = *(p++);
+				val |= (c & 0x7F) << 21;
+				if (c & 0x80)
+				{
+					/* 5th byte */
+					c = *(p++);
+					val |= (c & 0x7F) << 28;
+					if (c & 0x80)
+					{
+						/* 6th byte */
+						c = *(p++);
+						val |= (c & 0x7F) << 35;
+						if (c & 0x80)
+						{
+							/* 7th byte, should not have continuation bit */
+							c = *(p++);
+							val |= c << 42;
+							Assert((c & 0x80) == 0);
+						}
+					}
+				}
+			}
+		}
+	}
+
+	*ptr = p;
+
+	return val;
+}
+
+/*
+ * Encode a posting list.
+ *
+ * The encoded list is returned in a palloc'd struct, which will be at most
+ * 'maxsize' bytes in size.  The number items in the returned segment is
+ * returned in *nwritten. If it's not equal to nipd, not all the items fit
+ * in 'maxsize', and only the first *nwritten were encoded.
+ *
+ * The allocated size of the returned struct is short-aligned, and the padding
+ * byte at the end, if any, is zero.
+ */
+GinPostingList *
+ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
+					   int *nwritten)
+{
+	uint64		prev;
+	int			totalpacked = 0;
+	int			maxbytes;
+	GinPostingList *result;
+	unsigned char *ptr;
+	unsigned char *endptr;
+
+	maxsize = SHORTALIGN_DOWN(maxsize);
+
+	result = palloc(maxsize);
+
+	maxbytes = maxsize - offsetof(GinPostingList, bytes);
+	Assert(maxbytes > 0);
+
+	/* Store the first special item */
+	result->first = ipd[0];
+
+	prev = itemptr_to_uint64(&result->first);
+
+	ptr = result->bytes;
+	endptr = result->bytes + maxbytes;
+	for (totalpacked = 1; totalpacked < nipd; totalpacked++)
+	{
+		uint64		val = itemptr_to_uint64(&ipd[totalpacked]);
+		uint64		delta = val - prev;
+
+		Assert(val > prev);
+
+		if (endptr - ptr >= MaxBytesPerInteger)
+			encode_varbyte(delta, &ptr);
+		else
+		{
+			/*
+			 * There are less than 7 bytes left. Have to check if the next
+			 * item fits in that space before writing it out.
+			 */
+			unsigned char buf[MaxBytesPerInteger];
+			unsigned char *p = buf;
+
+			encode_varbyte(delta, &p);
+			if (p - buf > (endptr - ptr))
+				break;			/* output is full */
+
+			memcpy(ptr, buf, p - buf);
+			ptr += (p - buf);
+		}
+		prev = val;
+	}
+	result->nbytes = ptr - result->bytes;
+
+	/*
+	 * If we wrote an odd number of bytes, zero out the padding byte at the
+	 * end.
+	 */
+	if (result->nbytes != SHORTALIGN(result->nbytes))
+		result->bytes[result->nbytes] = 0;
+
+	if (nwritten)
+		*nwritten = totalpacked;
+
+	Assert(SizeOfGinPostingList(result) <= maxsize);
+
+	/*
+	 * Check that the encoded segment decodes back to the original items.
+	 */
+#if defined (CHECK_ENCODING_ROUNDTRIP)
+	{
+		int			ndecoded;
+		ItemPointer tmp = ginPostingListDecode(result, &ndecoded);
+
+		Assert(ndecoded == totalpacked);
+		Assert(memcmp(tmp, ipd, ndecoded * sizeof(ItemPointerData)) == 0);
+		pfree(tmp);
+	}
+#endif
+
+	return result;
+}
+
+/*
+ * Decode a compressed posting list into an array of item pointers.
+ * The number of items is returned in *ndecoded.
+ */
+ItemPointer
+ginPostingListDecode(GinPostingList *plist, int *ndecoded_out)
+{
+	return ginPostingListDecodeAllSegments(plist,
+										   SizeOfGinPostingList(plist),
+										   ndecoded_out);
+}
+
+/*
+ * Decode multiple posting list segments into an array of item pointers.
+ * The number of items is returned in *ndecoded_out. The segments are stored
+ * one after each other, with total size 'len' bytes.
+ */
+ItemPointer
+ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
+{
+	ItemPointer result;
+	int			nallocated;
+	uint64		val;
+	char	   *endseg = ((char *) segment) + len;
+	int			ndecoded;
+	unsigned char *ptr;
+	unsigned char *endptr;
+
+	/*
+	 * Guess an initial size of the array.
+	 */
+	nallocated = segment->nbytes * 2 + 1;
+	result = palloc(nallocated * sizeof(ItemPointerData));
+
+	ndecoded = 0;
+	while ((char *) segment < endseg)
+	{
+		/* enlarge output array if needed */
+		if (ndecoded >= nallocated)
+		{
+			nallocated *= 2;
+			result = repalloc(result, nallocated * sizeof(ItemPointerData));
+		}
+
+		/* copy the first item */
+		Assert(OffsetNumberIsValid(ItemPointerGetOffsetNumber(&segment->first)));
+		Assert(ndecoded == 0 || ginCompareItemPointers(&segment->first, &result[ndecoded - 1]) > 0);
+		result[ndecoded] = segment->first;
+		ndecoded++;
+
+		val = itemptr_to_uint64(&segment->first);
+		ptr = segment->bytes;
+		endptr = segment->bytes + segment->nbytes;
+		while (ptr < endptr)
+		{
+			/* enlarge output array if needed */
+			if (ndecoded >= nallocated)
+			{
+				nallocated *= 2;
+				result = repalloc(result, nallocated * sizeof(ItemPointerData));
+			}
+
+			val += decode_varbyte(&ptr);
+
+			uint64_to_itemptr(val, &result[ndecoded]);
+			ndecoded++;
+		}
+		segment = GinNextPostingListSegment(segment);
+	}
+
+	if (ndecoded_out)
+		*ndecoded_out = ndecoded;
+	return result;
+}
+
+/*
+ * Add all item pointers from a bunch of posting lists to a TIDBitmap.
+ */
+int
+ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int len,
+									 TIDBitmap *tbm)
+{
+	int			ndecoded;
+	ItemPointer items;
+
+	items = ginPostingListDecodeAllSegments(ptr, len, &ndecoded);
+	tbm_add_tuples(tbm, items, ndecoded, false);
+	pfree(items);
+
+	return ndecoded;
+}
+
+/*
+ * Merge two ordered arrays of itempointers, eliminating any duplicates.
+ *
+ * Returns a palloc'd array, and *nmerged is set to the number of items in
+ * the result, after eliminating duplicates.
+ */
+ItemPointer
+ginMergeItemPointers(ItemPointerData *a, uint32 na,
+					 ItemPointerData *b, uint32 nb,
+					 int *nmerged)
+{
+	ItemPointerData *dst;
+
+	dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
+
+	/*
+	 * If the argument arrays don't overlap, we can just append them to each
+	 * other.
+	 */
+	if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
+	{
+		memcpy(dst, a, na * sizeof(ItemPointerData));
+		memcpy(&dst[na], b, nb * sizeof(ItemPointerData));
+		*nmerged = na + nb;
+	}
+	else if (ginCompareItemPointers(&b[nb - 1], &a[0]) < 0)
+	{
+		memcpy(dst, b, nb * sizeof(ItemPointerData));
+		memcpy(&dst[nb], a, na * sizeof(ItemPointerData));
+		*nmerged = na + nb;
+	}
+	else
+	{
+		ItemPointerData *dptr = dst;
+		ItemPointerData *aptr = a;
+		ItemPointerData *bptr = b;
+
+		while (aptr - a < na && bptr - b < nb)
+		{
+			int			cmp = ginCompareItemPointers(aptr, bptr);
+
+			if (cmp > 0)
+				*dptr++ = *bptr++;
+			else if (cmp == 0)
+			{
+				/* only keep one copy of the identical items */
+				*dptr++ = *bptr++;
+				aptr++;
+			}
+			else
+				*dptr++ = *aptr++;
+		}
+
+		while (aptr - a < na)
+			*dptr++ = *aptr++;
+
+		while (bptr - b < nb)
+			*dptr++ = *bptr++;
+
+		*nmerged = dptr - dst;
+	}
+
+	return dst;
+}