12 files changed, 10028 insertions, 0 deletions
diff --git a/src/backend/access/brin/Makefile b/src/backend/access/brin/Makefile
new file mode 100644
index 0000000..a386cb7
--- /dev/null
+++ b/src/backend/access/brin/Makefile
@@ -0,0 +1,27 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for access/brin
+#
+# IDENTIFICATION
+#    src/backend/access/brin/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/access/brin
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = \
+	brin.o \
+	brin_bloom.o \
+	brin_inclusion.o \
+	brin_minmax.o \
+	brin_minmax_multi.o \
+	brin_pageops.o \
+	brin_revmap.o \
+	brin_tuple.o \
+	brin_validate.o \
+	brin_xlog.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/brin/README b/src/backend/access/brin/README
new file mode 100644
index 0000000..636d965
--- /dev/null
+++ b/src/backend/access/brin/README
@@ -0,0 +1,189 @@
+Block Range Indexes (BRIN)
+==========================
+
+BRIN indexes intend to enable very fast scanning of extremely large tables.
+
+The essential idea of a BRIN index is to keep track of summarizing values in
+consecutive groups of heap pages (page ranges); for example, the minimum and
+maximum values for datatypes with a btree opclass, or the bounding box for
+geometric types.  These values can be used to avoid scanning such pages
+during a table scan, depending on query quals.
+
+The cost of this is having to update the stored summary values of each page
+range as tuples are inserted into them.
+
+
+Access Method Design
+--------------------
+
+Since item pointers are not stored inside indexes of this type, it is not
+possible to support the amgettuple interface.  Instead, we only provide
+amgetbitmap support.  The amgetbitmap routine returns a lossy TIDBitmap
+comprising all pages in those page ranges that match the query
+qualifications.  The recheck step in the BitmapHeapScan node prunes tuples
+that are not visible according to the query qualifications.
+
+An operator class must have the following entries:
+
+- generic support procedures (pg_amproc), identical to all opclasses:
+  * "opcinfo" (BRIN_PROCNUM_OPCINFO) initializes a structure for index
+    creation or scanning
+  * "addValue" (BRIN_PROCNUM_ADDVALUE) takes an index tuple and a heap item,
+    and possibly changes the index tuple so that it includes the heap item
+    values
+  * "consistent" (BRIN_PROCNUM_CONSISTENT) takes an index tuple and query
+    quals, and returns whether the index tuple values match the query quals.
+  * "union" (BRIN_PROCNUM_UNION) takes two index tuples and modifies the first
+    one so that it represents the union of the two.
+Procedure numbers up to 10 are reserved for future expansion.
+
+Additionally, each opclass needs additional support functions:
+- Minmax-style operator classes:
+  * Proc numbers 11-14 are used for the functions implementing inequality
+    operators for the type, in this order: less than, less or equal,
+    greater or equal, greater than.
+
+Opclasses using a different design will require different additional procedure
+numbers.
+
+Operator classes also need to have operator (pg_amop) entries so that the
+optimizer can choose the index to execute queries.
+- Minmax-style operator classes:
+  * The same operators as btree (<=, <, =, >=, >)
+
+Each index tuple stores some NULL bits and some opclass-specified values, which
+are stored in a single null bitmask of length twice the number of columns.  The
+generic NULL bits indicate, for each column:
+  * bt_hasnulls: Whether there's any NULL value at all in the page range
+  * bt_allnulls: Whether all values are NULLs in the page range
+
+The opclass-specified values are:
+- Minmax-style operator classes
+  * minimum value across all tuples in the range
+  * maximum value across all tuples in the range
+
+Note that the addValue and Union support procedures  must be careful to
+datumCopy() the values they want to store in the in-memory BRIN tuple, and
+must pfree() the old copies when replacing older ones.  Since some values
+referenced from the tuple persist and others go away, there is no
+well-defined lifetime for a memory context that would make this automatic.
+
+
+The Range Map
+-------------
+
+To find the index tuple for a particular page range, we have an internal
+structure we call the range map, or "revmap" for short.  This stores one TID
+per page range, which is the address of the index tuple summarizing that
+range.  Since the map entries are fixed size, it is possible to compute the
+address of the range map entry for any given heap page by simple arithmetic.
+
+When a new heap tuple is inserted in a summarized page range, we compare the
+existing index tuple with the new heap tuple.  If the heap tuple is outside
+the summarization data given by the index tuple for any indexed column (or
+if the new heap tuple contains null values but the index tuple indicates
+there are no nulls), the index is updated with the new values.  In many
+cases it is possible to update the index tuple in-place, but if the new
+index tuple is larger than the old one and there's not enough space in the
+page, it is necessary to create a new index tuple with the new values.  The
+range map can be updated quickly to point to it; the old index tuple is
+removed.
+
+If the range map points to an invalid TID, the corresponding page range is
+considered to be not summarized.  When tuples are added to unsummarized
+pages, nothing needs to happen.
+
+To scan a table following a BRIN index, we scan the range map sequentially.
+This yields index tuples in ascending page range order.  Query quals are
+matched to each index tuple; if they match, each page within the page range
+is returned as part of the output TID bitmap.  If there's no match, they are
+skipped.  Range map entries returning invalid index TIDs, that is
+unsummarized page ranges, are also returned in the TID bitmap.
+
+The revmap is stored in the first few blocks of the index main fork,
+immediately following the metapage.  Whenever the revmap needs to be
+extended by another page, existing tuples in that page are moved to some
+other page.
+
+Heap tuples can be removed from anywhere without restriction.  It might be
+useful to mark the corresponding index tuple somehow, if the heap tuple is
+one of the constraining values of the summary data (i.e. either min or max
+in the case of a btree-opclass-bearing datatype), so that in the future we
+are aware of the need to re-execute summarization on that range, leading to
+a possible tightening of the summary values.
+
+Summarization
+-------------
+
+At index creation time, the whole table is scanned; for each page range the
+summarizing values of each indexed column and nulls bitmap are collected and
+stored in the index.  The partially-filled page range at the end of the
+table is also summarized.
+
+As new tuples get inserted at the end of the table, they may update the
+index tuple that summarizes the partial page range at the end.  Eventually
+that page range is complete and new tuples belong in a new page range that
+hasn't yet been summarized.  Those insertions do not create a new index
+entry; instead, the page range remains unsummarized until later.
+
+Whenever VACUUM is run on the table, all unsummarized page ranges are
+summarized.  This action can also be invoked by the user via
+brin_summarize_new_values().  Both these procedures scan all the
+unsummarized ranges, and create a summary tuple.  Again, this includes the
+partially-filled page range at the end of the table.
+
+Vacuuming
+---------
+
+Since no heap TIDs are stored in a BRIN index, it's not necessary to scan the
+index when heap tuples are removed.  It might be that some summary values can
+be tightened if heap tuples have been deleted; but this would represent an
+optimization opportunity only, not a correctness issue.  It's simpler to
+represent this as the need to re-run summarization on the affected page range
+rather than "subtracting" values from the existing one.  This is not
+currently implemented.
+
+Note that if there are no indexes on the table other than the BRIN index,
+usage of maintenance_work_mem by vacuum can be decreased significantly, because
+no detailed index scan needs to take place (and thus it's not necessary for
+vacuum to save TIDs to remove).  It's unlikely that BRIN would be the only
+indexes in a table, though, because primary keys can be btrees only, and so
+we don't implement this optimization.
+
+
+Optimizer
+---------
+
+The optimizer selects the index based on the operator class' pg_amop
+entries for the column.
+
+
+Future improvements
+-------------------
+
+* Different-size page ranges?
+  In the current design, each "index entry" in a BRIN index covers the same
+  number of pages.  There's no hard reason for this; it might make sense to
+  allow the index to self-tune so that some index entries cover smaller page
+  ranges, if this allows the summary values to be more compact.  This would incur
+  larger BRIN overhead for the index itself, but might allow better pruning of
+  page ranges during scan.  In the limit of one index tuple per page, the index
+  itself would occupy too much space, even though we would be able to skip
+  reading the most heap pages, because the summary values are tight; in the
+  opposite limit of a single tuple that summarizes the whole table, we wouldn't
+  be able to prune anything even though the index is very small.  This can
+  probably be made to work by using the range map as an index in itself.
+
+* More compact representation for TIDBitmap?
+  TIDBitmap is the structure used to represent bitmap scans.  The
+  representation of lossy page ranges is not optimal for our purposes, because
+  it uses a Bitmapset to represent pages in the range; since we're going to return
+  all pages in a large range, it might be more convenient to allow for a
+  struct that uses start and end page numbers to represent the range, instead.
+
+* Better vacuuming?
+  It might be useful to enable passing more useful info to BRIN indexes during
+  vacuuming about tuples that are deleted, i.e. do not require the callback to
+  pass each tuple's TID.  For instance we might need a callback that passes a
+  block number instead of a TID.  That would help determine when to re-run
+  summarization on blocks that have seen lots of tuple deletions.
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
new file mode 100644
index 0000000..a38ff1b
--- /dev/null
+++ b/src/backend/access/brin/brin.c
@@ -0,0 +1,1933 @@
+/*
+ * brin.c
+ *		Implementation of BRIN indexes for Postgres
+ *
+ * See src/backend/access/brin/README for details.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/brin/brin.c
+ *
+ * TODO
+ *		* ScalarArrayOpExpr (amsearcharray -> SK_SEARCHARRAY)
+ */
+#include "postgres.h"
+
+#include "access/brin.h"
+#include "access/brin_page.h"
+#include "access/brin_pageops.h"
+#include "access/brin_xlog.h"
+#include "access/relation.h"
+#include "access/reloptions.h"
+#include "access/relscan.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "access/xloginsert.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "postmaster/autovacuum.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/index_selfuncs.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+
+/*
+ * We use a BrinBuildState during initial construction of a BRIN index.
+ * The running state is kept in a BrinMemTuple.
+ */
+typedef struct BrinBuildState
+{
+	Relation	bs_irel;
+	int			bs_numtuples;
+	Buffer		bs_currentInsertBuf;
+	BlockNumber bs_pagesPerRange;
+	BlockNumber bs_currRangeStart;
+	BrinRevmap *bs_rmAccess;
+	BrinDesc   *bs_bdesc;
+	BrinMemTuple *bs_dtuple;
+} BrinBuildState;
+
+/*
+ * Struct used as "opaque" during index scans
+ */
+typedef struct BrinOpaque
+{
+	BlockNumber bo_pagesPerRange;
+	BrinRevmap *bo_rmAccess;
+	BrinDesc   *bo_bdesc;
+} BrinOpaque;
+
+#define BRIN_ALL_BLOCKRANGES	InvalidBlockNumber
+
+static BrinBuildState *initialize_brin_buildstate(Relation idxRel,
+												  BrinRevmap *revmap, BlockNumber pagesPerRange);
+static void terminate_brin_buildstate(BrinBuildState *state);
+static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
+						  bool include_partial, double *numSummarized, double *numExisting);
+static void form_and_insert_tuple(BrinBuildState *state);
+static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a,
+						 BrinTuple *b);
+static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy);
+static bool add_values_to_range(Relation idxRel, BrinDesc *bdesc,
+								BrinMemTuple *dtup, Datum *values, bool *nulls);
+static bool check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys);
+
+/*
+ * BRIN handler function: return IndexAmRoutine with access method parameters
+ * and callbacks.
+ */
+Datum
+brinhandler(PG_FUNCTION_ARGS)
+{
+	IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
+
+	amroutine->amstrategies = 0;
+	amroutine->amsupport = BRIN_LAST_OPTIONAL_PROCNUM;
+	amroutine->amoptsprocnum = BRIN_PROCNUM_OPTIONS;
+	amroutine->amcanorder = false;
+	amroutine->amcanorderbyop = false;
+	amroutine->amcanbackward = false;
+	amroutine->amcanunique = false;
+	amroutine->amcanmulticol = true;
+	amroutine->amoptionalkey = true;
+	amroutine->amsearcharray = false;
+	amroutine->amsearchnulls = true;
+	amroutine->amstorage = true;
+	amroutine->amclusterable = false;
+	amroutine->ampredlocks = false;
+	amroutine->amcanparallel = false;
+	amroutine->amcaninclude = false;
+	amroutine->amusemaintenanceworkmem = false;
+	amroutine->amparallelvacuumoptions =
+		VACUUM_OPTION_PARALLEL_CLEANUP;
+	amroutine->amkeytype = InvalidOid;
+
+	amroutine->ambuild = brinbuild;
+	amroutine->ambuildempty = brinbuildempty;
+	amroutine->aminsert = brininsert;
+	amroutine->ambulkdelete = brinbulkdelete;
+	amroutine->amvacuumcleanup = brinvacuumcleanup;
+	amroutine->amcanreturn = NULL;
+	amroutine->amcostestimate = brincostestimate;
+	amroutine->amoptions = brinoptions;
+	amroutine->amproperty = NULL;
+	amroutine->ambuildphasename = NULL;
+	amroutine->amvalidate = brinvalidate;
+	amroutine->amadjustmembers = NULL;
+	amroutine->ambeginscan = brinbeginscan;
+	amroutine->amrescan = brinrescan;
+	amroutine->amgettuple = NULL;
+	amroutine->amgetbitmap = bringetbitmap;
+	amroutine->amendscan = brinendscan;
+	amroutine->ammarkpos = NULL;
+	amroutine->amrestrpos = NULL;
+	amroutine->amestimateparallelscan = NULL;
+	amroutine->aminitparallelscan = NULL;
+	amroutine->amparallelrescan = NULL;
+
+	PG_RETURN_POINTER(amroutine);
+}
+
+/*
+ * A tuple in the heap is being inserted.  To keep a brin index up to date,
+ * we need to obtain the relevant index tuple and compare its stored values
+ * with those of the new tuple.  If the tuple values are not consistent with
+ * the summary tuple, we need to update the index tuple.
+ *
+ * If autosummarization is enabled, check if we need to summarize the previous
+ * page range.
+ *
+ * If the range is not currently summarized (i.e. the revmap returns NULL for
+ * it), there's nothing to do for this tuple.
+ */
+bool
+brininsert(Relation idxRel, Datum *values, bool *nulls,
+		   ItemPointer heaptid, Relation heapRel,
+		   IndexUniqueCheck checkUnique,
+		   bool indexUnchanged,
+		   IndexInfo *indexInfo)
+{
+	BlockNumber pagesPerRange;
+	BlockNumber origHeapBlk;
+	BlockNumber heapBlk;
+	BrinDesc   *bdesc = (BrinDesc *) indexInfo->ii_AmCache;
+	BrinRevmap *revmap;
+	Buffer		buf = InvalidBuffer;
+	MemoryContext tupcxt = NULL;
+	MemoryContext oldcxt = CurrentMemoryContext;
+	bool		autosummarize = BrinGetAutoSummarize(idxRel);
+
+	revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);
+
+	/*
+	 * origHeapBlk is the block number where the insertion occurred.  heapBlk
+	 * is the first block in the corresponding page range.
+	 */
+	origHeapBlk = ItemPointerGetBlockNumber(heaptid);
+	heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange;
+
+	for (;;)
+	{
+		bool		need_insert = false;
+		OffsetNumber off;
+		BrinTuple  *brtup;
+		BrinMemTuple *dtup;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * If auto-summarization is enabled and we just inserted the first
+		 * tuple into the first block of a new non-first page range, request a
+		 * summarization run of the previous range.
+		 */
+		if (autosummarize &&
+			heapBlk > 0 &&
+			heapBlk == origHeapBlk &&
+			ItemPointerGetOffsetNumber(heaptid) == FirstOffsetNumber)
+		{
+			BlockNumber lastPageRange = heapBlk - 1;
+			BrinTuple  *lastPageTuple;
+
+			lastPageTuple =
+				brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off,
+										 NULL, BUFFER_LOCK_SHARE, NULL);
+			if (!lastPageTuple)
+			{
+				bool		recorded;
+
+				recorded = AutoVacuumRequestWork(AVW_BRINSummarizeRange,
+												 RelationGetRelid(idxRel),
+												 lastPageRange);
+				if (!recorded)
+					ereport(LOG,
+							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+							 errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded",
+									RelationGetRelationName(idxRel),
+									lastPageRange)));
+			}
+			else
+				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+		}
+
+		brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
+										 NULL, BUFFER_LOCK_SHARE, NULL);
+
+		/* if range is unsummarized, there's nothing to do */
+		if (!brtup)
+			break;
+
+		/* First time through in this statement? */
+		if (bdesc == NULL)
+		{
+			MemoryContextSwitchTo(indexInfo->ii_Context);
+			bdesc = brin_build_desc(idxRel);
+			indexInfo->ii_AmCache = (void *) bdesc;
+			MemoryContextSwitchTo(oldcxt);
+		}
+		/* First time through in this brininsert call? */
+		if (tupcxt == NULL)
+		{
+			tupcxt = AllocSetContextCreate(CurrentMemoryContext,
+										   "brininsert cxt",
+										   ALLOCSET_DEFAULT_SIZES);
+			MemoryContextSwitchTo(tupcxt);
+		}
+
+		dtup = brin_deform_tuple(bdesc, brtup, NULL);
+
+		need_insert = add_values_to_range(idxRel, bdesc, dtup, values, nulls);
+
+		if (!need_insert)
+		{
+			/*
+			 * The tuple is consistent with the new values, so there's nothing
+			 * to do.
+			 */
+			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+		}
+		else
+		{
+			Page		page = BufferGetPage(buf);
+			ItemId		lp = PageGetItemId(page, off);
+			Size		origsz;
+			BrinTuple  *origtup;
+			Size		newsz;
+			BrinTuple  *newtup;
+			bool		samepage;
+
+			/*
+			 * Make a copy of the old tuple, so that we can compare it after
+			 * re-acquiring the lock.
+			 */
+			origsz = ItemIdGetLength(lp);
+			origtup = brin_copy_tuple(brtup, origsz, NULL, NULL);
+
+			/*
+			 * Before releasing the lock, check if we can attempt a same-page
+			 * update.  Another process could insert a tuple concurrently in
+			 * the same page though, so downstream we must be prepared to cope
+			 * if this turns out to not be possible after all.
+			 */
+			newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
+			samepage = brin_can_do_samepage_update(buf, origsz, newsz);
+			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+
+			/*
+			 * Try to update the tuple.  If this doesn't work for whatever
+			 * reason, we need to restart from the top; the revmap might be
+			 * pointing at a different tuple for this block now, so we need to
+			 * recompute to ensure both our new heap tuple and the other
+			 * inserter's are covered by the combined tuple.  It might be that
+			 * we don't need to update at all.
+			 */
+			if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
+							   buf, off, origtup, origsz, newtup, newsz,
+							   samepage))
+			{
+				/* no luck; start over */
+				MemoryContextResetAndDeleteChildren(tupcxt);
+				continue;
+			}
+		}
+
+		/* success! */
+		break;
+	}
+
+	brinRevmapTerminate(revmap);
+	if (BufferIsValid(buf))
+		ReleaseBuffer(buf);
+	MemoryContextSwitchTo(oldcxt);
+	if (tupcxt != NULL)
+		MemoryContextDelete(tupcxt);
+
+	return false;
+}
+
+/*
+ * Initialize state for a BRIN index scan.
+ *
+ * We read the metapage here to determine the pages-per-range number that this
+ * index was built with.  Note that since this cannot be changed while we're
+ * holding lock on index, it's not necessary to recompute it during brinrescan.
+ */
+IndexScanDesc
+brinbeginscan(Relation r, int nkeys, int norderbys)
+{
+	IndexScanDesc scan;
+	BrinOpaque *opaque;
+
+	scan = RelationGetIndexScan(r, nkeys, norderbys);
+
+	opaque = (BrinOpaque *) palloc(sizeof(BrinOpaque));
+	opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange,
+											   scan->xs_snapshot);
+	opaque->bo_bdesc = brin_build_desc(r);
+	scan->opaque = opaque;
+
+	return scan;
+}
+
+/*
+ * Execute the index scan.
+ *
+ * This works by reading index TIDs from the revmap, and obtaining the index
+ * tuples pointed to by them; the summary values in the index tuples are
+ * compared to the scan keys.  We return into the TID bitmap all the pages in
+ * ranges corresponding to index tuples that match the scan keys.
+ *
+ * If a TID from the revmap is read as InvalidTID, we know that range is
+ * unsummarized.  Pages in those ranges need to be returned regardless of scan
+ * keys.
+ */
+int64
+bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
+{
+	Relation	idxRel = scan->indexRelation;
+	Buffer		buf = InvalidBuffer;
+	BrinDesc   *bdesc;
+	Oid			heapOid;
+	Relation	heapRel;
+	BrinOpaque *opaque;
+	BlockNumber nblocks;
+	BlockNumber heapBlk;
+	int			totalpages = 0;
+	FmgrInfo   *consistentFn;
+	MemoryContext oldcxt;
+	MemoryContext perRangeCxt;
+	BrinMemTuple *dtup;
+	BrinTuple  *btup = NULL;
+	Size		btupsz = 0;
+	ScanKey   **keys,
+			  **nullkeys;
+	int		   *nkeys,
+			   *nnullkeys;
+	int			keyno;
+	char	   *ptr;
+	Size		len;
+	char	   *tmp PG_USED_FOR_ASSERTS_ONLY;
+
+	opaque = (BrinOpaque *) scan->opaque;
+	bdesc = opaque->bo_bdesc;
+	pgstat_count_index_scan(idxRel);
+
+	/*
+	 * We need to know the size of the table so that we know how long to
+	 * iterate on the revmap.
+	 */
+	heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
+	heapRel = table_open(heapOid, AccessShareLock);
+	nblocks = RelationGetNumberOfBlocks(heapRel);
+	table_close(heapRel, AccessShareLock);
+
+	/*
+	 * Make room for the consistent support procedures of indexed columns.  We
+	 * don't look them up here; we do that lazily the first time we see a scan
+	 * key reference each of them.  We rely on zeroing fn_oid to InvalidOid.
+	 */
+	consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
+
+	/*
+	 * Make room for per-attribute lists of scan keys that we'll pass to the
+	 * consistent support procedure. We don't know which attributes have scan
+	 * keys, so we allocate space for all attributes. That may use more memory
+	 * but it's probably cheaper than determining which attributes are used.
+	 *
+	 * We keep null and regular keys separate, so that we can pass just the
+	 * regular keys to the consistent function easily.
+	 *
+	 * To reduce the allocation overhead, we allocate one big chunk and then
+	 * carve it into smaller arrays ourselves. All the pieces have exactly the
+	 * same lifetime, so that's OK.
+	 *
+	 * XXX The widest index can have 32 attributes, so the amount of wasted
+	 * memory is negligible. We could invent a more compact approach (with
+	 * just space for used attributes) but that would make the matching more
+	 * complex so it's not a good trade-off.
+	 */
+	len =
+		MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) +	/* regular keys */
+		MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
+		MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts) +
+		MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) +	/* NULL keys */
+		MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
+		MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
+
+	ptr = palloc(len);
+	tmp = ptr;
+
+	keys = (ScanKey **) ptr;
+	ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
+
+	nullkeys = (ScanKey **) ptr;
+	ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
+
+	nkeys = (int *) ptr;
+	ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
+
+	nnullkeys = (int *) ptr;
+	ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
+
+	for (int i = 0; i < bdesc->bd_tupdesc->natts; i++)
+	{
+		keys[i] = (ScanKey *) ptr;
+		ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
+
+		nullkeys[i] = (ScanKey *) ptr;
+		ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
+	}
+
+	Assert(tmp + len == ptr);
+
+	/* zero the number of keys */
+	memset(nkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
+	memset(nnullkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
+
+	/* Preprocess the scan keys - split them into per-attribute arrays. */
+	for (keyno = 0; keyno < scan->numberOfKeys; keyno++)
+	{
+		ScanKey		key = &scan->keyData[keyno];
+		AttrNumber	keyattno = key->sk_attno;
+
+		/*
+		 * The collation of the scan key must match the collation used in the
+		 * index column (but only if the search is not IS NULL/ IS NOT NULL).
+		 * Otherwise we shouldn't be using this index ...
+		 */
+		Assert((key->sk_flags & SK_ISNULL) ||
+			   (key->sk_collation ==
+				TupleDescAttr(bdesc->bd_tupdesc,
+							  keyattno - 1)->attcollation));
+
+		/*
+		 * First time we see this index attribute, so init as needed.
+		 *
+		 * This is a bit of an overkill - we don't know how many scan keys are
+		 * there for this attribute, so we simply allocate the largest number
+		 * possible (as if all keys were for this attribute). This may waste a
+		 * bit of memory, but we only expect small number of scan keys in
+		 * general, so this should be negligible, and repeated repalloc calls
+		 * are not free either.
+		 */
+		if (consistentFn[keyattno - 1].fn_oid == InvalidOid)
+		{
+			FmgrInfo   *tmp;
+
+			/* First time we see this attribute, so no key/null keys. */
+			Assert(nkeys[keyattno - 1] == 0);
+			Assert(nnullkeys[keyattno - 1] == 0);
+
+			tmp = index_getprocinfo(idxRel, keyattno,
+									BRIN_PROCNUM_CONSISTENT);
+			fmgr_info_copy(&consistentFn[keyattno - 1], tmp,
+						   CurrentMemoryContext);
+		}
+
+		/* Add key to the proper per-attribute array. */
+		if (key->sk_flags & SK_ISNULL)
+		{
+			nullkeys[keyattno - 1][nnullkeys[keyattno - 1]] = key;
+			nnullkeys[keyattno - 1]++;
+		}
+		else
+		{
+			keys[keyattno - 1][nkeys[keyattno - 1]] = key;
+			nkeys[keyattno - 1]++;
+		}
+	}
+
+	/* allocate an initial in-memory tuple, out of the per-range memcxt */
+	dtup = brin_new_memtuple(bdesc);
+
+	/*
+	 * Setup and use a per-range memory context, which is reset every time we
+	 * loop below.  This avoids having to free the tuples within the loop.
+	 */
+	perRangeCxt = AllocSetContextCreate(CurrentMemoryContext,
+										"bringetbitmap cxt",
+										ALLOCSET_DEFAULT_SIZES);
+	oldcxt = MemoryContextSwitchTo(perRangeCxt);
+
+	/*
+	 * Now scan the revmap.  We start by querying for heap page 0,
+	 * incrementing by the number of pages per range; this gives us a full
+	 * view of the table.
+	 */
+	for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
+	{
+		bool		addrange;
+		bool		gottuple = false;
+		BrinTuple  *tup;
+		OffsetNumber off;
+		Size		size;
+
+		CHECK_FOR_INTERRUPTS();
+
+		MemoryContextResetAndDeleteChildren(perRangeCxt);
+
+		tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
+									   &off, &size, BUFFER_LOCK_SHARE,
+									   scan->xs_snapshot);
+		if (tup)
+		{
+			gottuple = true;
+			btup = brin_copy_tuple(tup, size, btup, &btupsz);
+			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+		}
+
+		/*
+		 * For page ranges with no indexed tuple, we must return the whole
+		 * range; otherwise, compare it to the scan keys.
+		 */
+		if (!gottuple)
+		{
+			addrange = true;
+		}
+		else
+		{
+			dtup = brin_deform_tuple(bdesc, btup, dtup);
+			if (dtup->bt_placeholder)
+			{
+				/*
+				 * Placeholder tuples are always returned, regardless of the
+				 * values stored in them.
+				 */
+				addrange = true;
+			}
+			else
+			{
+				int			attno;
+
+				/*
+				 * Compare scan keys with summary values stored for the range.
+				 * If scan keys are matched, the page range must be added to
+				 * the bitmap.  We initially assume the range needs to be
+				 * added; in particular this serves the case where there are
+				 * no keys.
+				 */
+				addrange = true;
+				for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
+				{
+					BrinValues *bval;
+					Datum		add;
+					Oid			collation;
+
+					/*
+					 * skip attributes without any scan keys (both regular and
+					 * IS [NOT] NULL)
+					 */
+					if (nkeys[attno - 1] == 0 && nnullkeys[attno - 1] == 0)
+						continue;
+
+					bval = &dtup->bt_columns[attno - 1];
+
+					/*
+					 * If the BRIN tuple indicates that this range is empty,
+					 * we can skip it: there's nothing to match.  We don't
+					 * need to examine the next columns.
+					 */
+					if (dtup->bt_empty_range)
+					{
+						addrange = false;
+						break;
+					}
+
+					/*
+					 * First check if there are any IS [NOT] NULL scan keys,
+					 * and if we're violating them. In that case we can
+					 * terminate early, without invoking the support function.
+					 *
+					 * As there may be more keys, we can only determine
+					 * mismatch within this loop.
+					 */
+					if (bdesc->bd_info[attno - 1]->oi_regular_nulls &&
+						!check_null_keys(bval, nullkeys[attno - 1],
+										 nnullkeys[attno - 1]))
+					{
+						/*
+						 * If any of the IS [NOT] NULL keys failed, the page
+						 * range as a whole can't pass. So terminate the loop.
+						 */
+						addrange = false;
+						break;
+					}
+
+					/*
+					 * So either there are no IS [NOT] NULL keys, or all
+					 * passed. If there are no regular scan keys, we're done -
+					 * the page range matches. If there are regular keys, but
+					 * the page range is marked as 'all nulls' it can't
+					 * possibly pass (we're assuming the operators are
+					 * strict).
+					 */
+
+					/* No regular scan keys - page range as a whole passes. */
+					if (!nkeys[attno - 1])
+						continue;
+
+					Assert((nkeys[attno - 1] > 0) &&
+						   (nkeys[attno - 1] <= scan->numberOfKeys));
+
+					/* If it is all nulls, it cannot possibly be consistent. */
+					if (bval->bv_allnulls)
+					{
+						addrange = false;
+						break;
+					}
+
+					/*
+					 * Collation from the first key (has to be the same for
+					 * all keys for the same attribute).
+					 */
+					collation = keys[attno - 1][0]->sk_collation;
+
+					/*
+					 * Check whether the scan key is consistent with the page
+					 * range values; if so, have the pages in the range added
+					 * to the output bitmap.
+					 *
+					 * The opclass may or may not support processing of
+					 * multiple scan keys. We can determine that based on the
+					 * number of arguments - functions with extra parameter
+					 * (number of scan keys) do support this, otherwise we
+					 * have to simply pass the scan keys one by one.
+					 */
+					if (consistentFn[attno - 1].fn_nargs >= 4)
+					{
+						/* Check all keys at once */
+						add = FunctionCall4Coll(&consistentFn[attno - 1],
+												collation,
+												PointerGetDatum(bdesc),
+												PointerGetDatum(bval),
+												PointerGetDatum(keys[attno - 1]),
+												Int32GetDatum(nkeys[attno - 1]));
+						addrange = DatumGetBool(add);
+					}
+					else
+					{
+						/*
+						 * Check keys one by one
+						 *
+						 * When there are multiple scan keys, failure to meet
+						 * the criteria for a single one of them is enough to
+						 * discard the range as a whole, so break out of the
+						 * loop as soon as a false return value is obtained.
+						 */
+						int			keyno;
+
+						for (keyno = 0; keyno < nkeys[attno - 1]; keyno++)
+						{
+							add = FunctionCall3Coll(&consistentFn[attno - 1],
+													keys[attno - 1][keyno]->sk_collation,
+													PointerGetDatum(bdesc),
+													PointerGetDatum(bval),
+													PointerGetDatum(keys[attno - 1][keyno]));
+							addrange = DatumGetBool(add);
+							if (!addrange)
+								break;
+						}
+					}
+
+					/*
+					 * If we found a scan key eliminating the range, no need to
+					 * check additional ones.
+					 */
+					if (!addrange)
+						break;
+				}
+			}
+		}
+
+		/* add the pages in the range to the output bitmap, if needed */
+		if (addrange)
+		{
+			BlockNumber pageno;
+
+			for (pageno = heapBlk;
+				 pageno <= Min(nblocks, heapBlk + opaque->bo_pagesPerRange) - 1;
+				 pageno++)
+			{
+				MemoryContextSwitchTo(oldcxt);
+				tbm_add_page(tbm, pageno);
+				totalpages++;
+				MemoryContextSwitchTo(perRangeCxt);
+			}
+		}
+	}
+
+	MemoryContextSwitchTo(oldcxt);
+	MemoryContextDelete(perRangeCxt);
+
+	if (buf != InvalidBuffer)
+		ReleaseBuffer(buf);
+
+	/*
+	 * XXX We have an approximation of the number of *pages* that our scan
+	 * returns, but we don't have a precise idea of the number of heap tuples
+	 * involved.
+	 */
+	return totalpages * 10;
+}
+
+/*
+ * Re-initialize state for a BRIN index scan
+ */
+void
+brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+		   ScanKey orderbys, int norderbys)
+{
+	/*
+	 * Other index AMs preprocess the scan keys at this point, or sometime
+	 * early during the scan; this lets them optimize by removing redundant
+	 * keys, or doing early returns when they are impossible to satisfy; see
+	 * _bt_preprocess_keys for an example.  Something like that could be added
+	 * here someday, too.
+	 */
+
+	if (scankey && scan->numberOfKeys > 0)
+		memmove(scan->keyData, scankey,
+				scan->numberOfKeys * sizeof(ScanKeyData));
+}
+
+/*
+ * Close down a BRIN index scan
+ */
+void
+brinendscan(IndexScanDesc scan)
+{
+	BrinOpaque *opaque = (BrinOpaque *) scan->opaque;
+
+	brinRevmapTerminate(opaque->bo_rmAccess);
+	brin_free_desc(opaque->bo_bdesc);
+	pfree(opaque);
+}
+
+/*
+ * Per-heap-tuple callback for table_index_build_scan.
+ *
+ * Note we don't worry about the page range at the end of the table here; it is
+ * present in the build state struct after we're called the last time, but not
+ * inserted into the index.  Caller must ensure to do so, if appropriate.
+ */
+static void
+brinbuildCallback(Relation index,
+				  ItemPointer tid,
+				  Datum *values,
+				  bool *isnull,
+				  bool tupleIsAlive,
+				  void *brstate)
+{
+	BrinBuildState *state = (BrinBuildState *) brstate;
+	BlockNumber thisblock;
+
+	thisblock = ItemPointerGetBlockNumber(tid);
+
+	/*
+	 * If we're in a block that belongs to a future range, summarize what
+	 * we've got and start afresh.  Note the scan might have skipped many
+	 * pages, if they were devoid of live tuples; make sure to insert index
+	 * tuples for those too.
+	 */
+	while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1)
+	{
+
+		BRIN_elog((DEBUG2,
+				   "brinbuildCallback: completed a range: %u--%u",
+				   state->bs_currRangeStart,
+				   state->bs_currRangeStart + state->bs_pagesPerRange));
+
+		/* create the index tuple and insert it */
+		form_and_insert_tuple(state);
+
+		/* set state to correspond to the next range */
+		state->bs_currRangeStart += state->bs_pagesPerRange;
+
+		/* re-initialize state for it */
+		brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
+	}
+
+	/* Accumulate the current tuple into the running state */
+	(void) add_values_to_range(index, state->bs_bdesc, state->bs_dtuple,
+							   values, isnull);
+}
+
+/*
+ * brinbuild() -- build a new BRIN index.
+ */
+IndexBuildResult *
+brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
+{
+	IndexBuildResult *result;
+	double		reltuples;
+	double		idxtuples;
+	BrinRevmap *revmap;
+	BrinBuildState *state;
+	Buffer		meta;
+	BlockNumber pagesPerRange;
+
+	/*
+	 * We expect to be called exactly once for any index relation.
+	 */
+	if (RelationGetNumberOfBlocks(index) != 0)
+		elog(ERROR, "index \"%s\" already contains data",
+			 RelationGetRelationName(index));
+
+	/*
+	 * Critical section not required, because on error the creation of the
+	 * whole relation will be rolled back.
+	 */
+
+	meta = ReadBuffer(index, P_NEW);
+	Assert(BufferGetBlockNumber(meta) == BRIN_METAPAGE_BLKNO);
+	LockBuffer(meta, BUFFER_LOCK_EXCLUSIVE);
+
+	brin_metapage_init(BufferGetPage(meta), BrinGetPagesPerRange(index),
+					   BRIN_CURRENT_VERSION);
+	MarkBufferDirty(meta);
+
+	if (RelationNeedsWAL(index))
+	{
+		xl_brin_createidx xlrec;
+		XLogRecPtr	recptr;
+		Page		page;
+
+		xlrec.version = BRIN_CURRENT_VERSION;
+		xlrec.pagesPerRange = BrinGetPagesPerRange(index);
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
+		XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT | REGBUF_STANDARD);
+
+		recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
+
+		page = BufferGetPage(meta);
+		PageSetLSN(page, recptr);
+	}
+
+	UnlockReleaseBuffer(meta);
+
+	/*
+	 * Initialize our state, including the deformed tuple state.
+	 */
+	revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
+	state = initialize_brin_buildstate(index, revmap, pagesPerRange);
+
+	/*
+	 * Now scan the relation.  No syncscan allowed here because we want the
+	 * heap blocks in physical order.
+	 */
+	reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
+									   brinbuildCallback, (void *) state, NULL);
+
+	/* process the final batch */
+	form_and_insert_tuple(state);
+
+	/* release resources */
+	idxtuples = state->bs_numtuples;
+	brinRevmapTerminate(state->bs_rmAccess);
+	terminate_brin_buildstate(state);
+
+	/*
+	 * Return statistics
+	 */
+	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+
+	result->heap_tuples = reltuples;
+	result->index_tuples = idxtuples;
+
+	return result;
+}
+
+void
+brinbuildempty(Relation index)
+{
+	Buffer		metabuf;
+
+	/* An empty BRIN index has a metapage only. */
+	metabuf =
+		ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+	LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
+
+	/* Initialize and xlog metabuffer. */
+	START_CRIT_SECTION();
+	brin_metapage_init(BufferGetPage(metabuf), BrinGetPagesPerRange(index),
+					   BRIN_CURRENT_VERSION);
+	MarkBufferDirty(metabuf);
+	log_newpage_buffer(metabuf, true);
+	END_CRIT_SECTION();
+
+	UnlockReleaseBuffer(metabuf);
+}
+
+/*
+ * brinbulkdelete
+ *		Since there are no per-heap-tuple index tuples in BRIN indexes,
+ *		there's not a lot we can do here.
+ *
+ * XXX we could mark item tuples as "dirty" (when a minimum or maximum heap
+ * tuple is deleted), meaning the need to re-run summarization on the affected
+ * range.  Would need to add an extra flag in brintuples for that.
+ */
+IndexBulkDeleteResult *
+brinbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
+			   IndexBulkDeleteCallback callback, void *callback_state)
+{
+	/* allocate stats if first time through, else re-use existing struct */
+	if (stats == NULL)
+		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+
+	return stats;
+}
+
+/*
+ * This routine is in charge of "vacuuming" a BRIN index: we just summarize
+ * ranges that are currently unsummarized.
+ */
+IndexBulkDeleteResult *
+brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
+{
+	Relation	heapRel;
+
+	/* No-op in ANALYZE ONLY mode */
+	if (info->analyze_only)
+		return stats;
+
+	if (!stats)
+		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+	stats->num_pages = RelationGetNumberOfBlocks(info->index);
+	/* rest of stats is initialized by zeroing */
+
+	heapRel = table_open(IndexGetRelation(RelationGetRelid(info->index), false),
+						 AccessShareLock);
+
+	brin_vacuum_scan(info->index, info->strategy);
+
+	brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false,
+				  &stats->num_index_tuples, &stats->num_index_tuples);
+
+	table_close(heapRel, AccessShareLock);
+
+	return stats;
+}
+
+/*
+ * reloptions processor for BRIN indexes
+ */
+bytea *
+brinoptions(Datum reloptions, bool validate)
+{
+	static const relopt_parse_elt tab[] = {
+		{"pages_per_range", RELOPT_TYPE_INT, offsetof(BrinOptions, pagesPerRange)},
+		{"autosummarize", RELOPT_TYPE_BOOL, offsetof(BrinOptions, autosummarize)}
+	};
+
+	return (bytea *) build_reloptions(reloptions, validate,
+									  RELOPT_KIND_BRIN,
+									  sizeof(BrinOptions),
+									  tab, lengthof(tab));
+}
+
+/*
+ * SQL-callable function to scan through an index and summarize all ranges
+ * that are not currently summarized.
+ */
+Datum
+brin_summarize_new_values(PG_FUNCTION_ARGS)
+{
+	Datum		relation = PG_GETARG_DATUM(0);
+
+	return DirectFunctionCall2(brin_summarize_range,
+							   relation,
+							   Int64GetDatum((int64) BRIN_ALL_BLOCKRANGES));
+}
+
+/*
+ * SQL-callable function to summarize the indicated page range, if not already
+ * summarized.  If the second argument is BRIN_ALL_BLOCKRANGES, all
+ * unsummarized ranges are summarized.
+ */
+Datum
+brin_summarize_range(PG_FUNCTION_ARGS)
+{
+	Oid			indexoid = PG_GETARG_OID(0);
+	int64		heapBlk64 = PG_GETARG_INT64(1);
+	BlockNumber heapBlk;
+	Oid			heapoid;
+	Relation	indexRel;
+	Relation	heapRel;
+	Oid			save_userid;
+	int			save_sec_context;
+	int			save_nestlevel;
+	double		numSummarized = 0;
+
+	if (RecoveryInProgress())
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("recovery is in progress"),
+				 errhint("BRIN control functions cannot be executed during recovery.")));
+
+	if (heapBlk64 > BRIN_ALL_BLOCKRANGES || heapBlk64 < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				 errmsg("block number out of range: %lld",
+						(long long) heapBlk64)));
+	heapBlk = (BlockNumber) heapBlk64;
+
+	/*
+	 * We must lock table before index to avoid deadlocks.  However, if the
+	 * passed indexoid isn't an index then IndexGetRelation() will fail.
+	 * Rather than emitting a not-very-helpful error message, postpone
+	 * complaining, expecting that the is-it-an-index test below will fail.
+	 */
+	heapoid = IndexGetRelation(indexoid, true);
+	if (OidIsValid(heapoid))
+	{
+		heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
+
+		/*
+		 * Autovacuum calls us.  For its benefit, switch to the table owner's
+		 * userid, so that any index functions are run as that user.  Also
+		 * lock down security-restricted operations and arrange to make GUC
+		 * variable changes local to this command.  This is harmless, albeit
+		 * unnecessary, when called from SQL, because we fail shortly if the
+		 * user does not own the index.
+		 */
+		GetUserIdAndSecContext(&save_userid, &save_sec_context);
+		SetUserIdAndSecContext(heapRel->rd_rel->relowner,
+							   save_sec_context | SECURITY_RESTRICTED_OPERATION);
+		save_nestlevel = NewGUCNestLevel();
+	}
+	else
+	{
+		heapRel = NULL;
+		/* Set these just to suppress "uninitialized variable" warnings */
+		save_userid = InvalidOid;
+		save_sec_context = -1;
+		save_nestlevel = -1;
+	}
+
+	indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
+
+	/* Must be a BRIN index */
+	if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
+		indexRel->rd_rel->relam != BRIN_AM_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a BRIN index",
+						RelationGetRelationName(indexRel))));
+
+	/* User must own the index (comparable to privileges needed for VACUUM) */
+	if (heapRel != NULL && !pg_class_ownercheck(indexoid, save_userid))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX,
+					   RelationGetRelationName(indexRel));
+
+	/*
+	 * Since we did the IndexGetRelation call above without any lock, it's
+	 * barely possible that a race against an index drop/recreation could have
+	 * netted us the wrong table.  Recheck.
+	 */
+	if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_TABLE),
+				 errmsg("could not open parent table of index \"%s\"",
+						RelationGetRelationName(indexRel))));
+
+	/* see gin_clean_pending_list() */
+	if (indexRel->rd_index->indisvalid)
+		brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL);
+	else
+		ereport(DEBUG1,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("index \"%s\" is not valid",
+						RelationGetRelationName(indexRel))));
+
+	/* Roll back any GUC changes executed by index functions */
+	AtEOXact_GUC(false, save_nestlevel);
+
+	/* Restore userid and security context */
+	SetUserIdAndSecContext(save_userid, save_sec_context);
+
+	relation_close(indexRel, ShareUpdateExclusiveLock);
+	relation_close(heapRel, ShareUpdateExclusiveLock);
+
+	PG_RETURN_INT32((int32) numSummarized);
+}
+
+/*
+ * SQL-callable interface to mark a range as no longer summarized
+ */
+Datum
+brin_desummarize_range(PG_FUNCTION_ARGS)
+{
+	Oid			indexoid = PG_GETARG_OID(0);
+	int64		heapBlk64 = PG_GETARG_INT64(1);
+	BlockNumber heapBlk;
+	Oid			heapoid;
+	Relation	heapRel;
+	Relation	indexRel;
+	bool		done;
+
+	if (RecoveryInProgress())
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("recovery is in progress"),
+				 errhint("BRIN control functions cannot be executed during recovery.")));
+
+	if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				 errmsg("block number out of range: %lld",
+						(long long) heapBlk64)));
+	heapBlk = (BlockNumber) heapBlk64;
+
+	/*
+	 * We must lock table before index to avoid deadlocks.  However, if the
+	 * passed indexoid isn't an index then IndexGetRelation() will fail.
+	 * Rather than emitting a not-very-helpful error message, postpone
+	 * complaining, expecting that the is-it-an-index test below will fail.
+	 *
+	 * Unlike brin_summarize_range(), autovacuum never calls this.  Hence, we
+	 * don't switch userid.
+	 */
+	heapoid = IndexGetRelation(indexoid, true);
+	if (OidIsValid(heapoid))
+		heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
+	else
+		heapRel = NULL;
+
+	indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
+
+	/* Must be a BRIN index */
+	if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
+		indexRel->rd_rel->relam != BRIN_AM_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a BRIN index",
+						RelationGetRelationName(indexRel))));
+
+	/* User must own the index (comparable to privileges needed for VACUUM) */
+	if (!pg_class_ownercheck(indexoid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX,
+					   RelationGetRelationName(indexRel));
+
+	/*
+	 * Since we did the IndexGetRelation call above without any lock, it's
+	 * barely possible that a race against an index drop/recreation could have
+	 * netted us the wrong table.  Recheck.
+	 */
+	if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_TABLE),
+				 errmsg("could not open parent table of index \"%s\"",
+						RelationGetRelationName(indexRel))));
+
+	/* see gin_clean_pending_list() */
+	if (indexRel->rd_index->indisvalid)
+	{
+		/* the revmap does the hard work */
+		do
+		{
+			done = brinRevmapDesummarizeRange(indexRel, heapBlk);
+		}
+		while (!done);
+	}
+	else
+		ereport(DEBUG1,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("index \"%s\" is not valid",
+						RelationGetRelationName(indexRel))));
+
+	relation_close(indexRel, ShareUpdateExclusiveLock);
+	relation_close(heapRel, ShareUpdateExclusiveLock);
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Build a BrinDesc used to create or scan a BRIN index
+ */
+BrinDesc *
+brin_build_desc(Relation rel)
+{
+	BrinOpcInfo **opcinfo;
+	BrinDesc   *bdesc;
+	TupleDesc	tupdesc;
+	int			totalstored = 0;
+	int			keyno;
+	long		totalsize;
+	MemoryContext cxt;
+	MemoryContext oldcxt;
+
+	cxt = AllocSetContextCreate(CurrentMemoryContext,
+								"brin desc cxt",
+								ALLOCSET_SMALL_SIZES);
+	oldcxt = MemoryContextSwitchTo(cxt);
+	tupdesc = RelationGetDescr(rel);
+
+	/*
+	 * Obtain BrinOpcInfo for each indexed column.  While at it, accumulate
+	 * the number of columns stored, since the number is opclass-defined.
+	 */
+	opcinfo = (BrinOpcInfo **) palloc(sizeof(BrinOpcInfo *) * tupdesc->natts);
+	for (keyno = 0; keyno < tupdesc->natts; keyno++)
+	{
+		FmgrInfo   *opcInfoFn;
+		Form_pg_attribute attr = TupleDescAttr(tupdesc, keyno);
+
+		opcInfoFn = index_getprocinfo(rel, keyno + 1, BRIN_PROCNUM_OPCINFO);
+
+		opcinfo[keyno] = (BrinOpcInfo *)
+			DatumGetPointer(FunctionCall1(opcInfoFn, attr->atttypid));
+		totalstored += opcinfo[keyno]->oi_nstored;
+	}
+
+	/* Allocate our result struct and fill it in */
+	totalsize = offsetof(BrinDesc, bd_info) +
+		sizeof(BrinOpcInfo *) * tupdesc->natts;
+
+	bdesc = palloc(totalsize);
+	bdesc->bd_context = cxt;
+	bdesc->bd_index = rel;
+	bdesc->bd_tupdesc = tupdesc;
+	bdesc->bd_disktdesc = NULL; /* generated lazily */
+	bdesc->bd_totalstored = totalstored;
+
+	for (keyno = 0; keyno < tupdesc->natts; keyno++)
+		bdesc->bd_info[keyno] = opcinfo[keyno];
+	pfree(opcinfo);
+
+	MemoryContextSwitchTo(oldcxt);
+
+	return bdesc;
+}
+
+void
+brin_free_desc(BrinDesc *bdesc)
+{
+	/* make sure the tupdesc is still valid */
+	Assert(bdesc->bd_tupdesc->tdrefcount >= 1);
+	/* no need for retail pfree */
+	MemoryContextDelete(bdesc->bd_context);
+}
+
+/*
+ * Fetch index's statistical data into *stats
+ */
+void
+brinGetStats(Relation index, BrinStatsData *stats)
+{
+	Buffer		metabuffer;
+	Page		metapage;
+	BrinMetaPageData *metadata;
+
+	metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO);
+	LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
+	metapage = BufferGetPage(metabuffer);
+	metadata = (BrinMetaPageData *) PageGetContents(metapage);
+
+	stats->pagesPerRange = metadata->pagesPerRange;
+	stats->revmapNumPages = metadata->lastRevmapPage - 1;
+
+	UnlockReleaseBuffer(metabuffer);
+}
+
+/*
+ * Initialize a BrinBuildState appropriate to create tuples on the given index.
+ */
+static BrinBuildState *
+initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap,
+						   BlockNumber pagesPerRange)
+{
+	BrinBuildState *state;
+
+	state = palloc(sizeof(BrinBuildState));
+
+	state->bs_irel = idxRel;
+	state->bs_numtuples = 0;
+	state->bs_currentInsertBuf = InvalidBuffer;
+	state->bs_pagesPerRange = pagesPerRange;
+	state->bs_currRangeStart = 0;
+	state->bs_rmAccess = revmap;
+	state->bs_bdesc = brin_build_desc(idxRel);
+	state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
+
+	return state;
+}
+
+/*
+ * Release resources associated with a BrinBuildState.
+ */
+static void
+terminate_brin_buildstate(BrinBuildState *state)
+{
+	/*
+	 * Release the last index buffer used.  We might as well ensure that
+	 * whatever free space remains in that page is available in FSM, too.
+	 */
+	if (!BufferIsInvalid(state->bs_currentInsertBuf))
+	{
+		Page		page;
+		Size		freespace;
+		BlockNumber blk;
+
+		page = BufferGetPage(state->bs_currentInsertBuf);
+		freespace = PageGetFreeSpace(page);
+		blk = BufferGetBlockNumber(state->bs_currentInsertBuf);
+		ReleaseBuffer(state->bs_currentInsertBuf);
+		RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
+		FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
+	}
+
+	brin_free_desc(state->bs_bdesc);
+	pfree(state->bs_dtuple);
+	pfree(state);
+}
+
+/*
+ * On the given BRIN index, summarize the heap page range that corresponds
+ * to the heap block number given.
+ *
+ * This routine can run in parallel with insertions into the heap.  To avoid
+ * missing those values from the summary tuple, we first insert a placeholder
+ * index tuple into the index, then execute the heap scan; transactions
+ * concurrent with the scan update the placeholder tuple.  After the scan, we
+ * union the placeholder tuple with the one computed by this routine.  The
+ * update of the index value happens in a loop, so that if somebody updates
+ * the placeholder tuple after we read it, we detect the case and try again.
+ * This ensures that the concurrently inserted tuples are not lost.
+ *
+ * A further corner case is this routine being asked to summarize the partial
+ * range at the end of the table.  heapNumBlocks is the (possibly outdated)
+ * table size; if we notice that the requested range lies beyond that size,
+ * we re-compute the table size after inserting the placeholder tuple, to
+ * avoid missing pages that were appended recently.
+ */
+static void
+summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
+				BlockNumber heapBlk, BlockNumber heapNumBlks)
+{
+	Buffer		phbuf;
+	BrinTuple  *phtup;
+	Size		phsz;
+	OffsetNumber offset;
+	BlockNumber scanNumBlks;
+
+	/*
+	 * Insert the placeholder tuple
+	 */
+	phbuf = InvalidBuffer;
+	phtup = brin_form_placeholder_tuple(state->bs_bdesc, heapBlk, &phsz);
+	offset = brin_doinsert(state->bs_irel, state->bs_pagesPerRange,
+						   state->bs_rmAccess, &phbuf,
+						   heapBlk, phtup, phsz);
+
+	/*
+	 * Compute range end.  We hold ShareUpdateExclusive lock on table, so it
+	 * cannot shrink concurrently (but it can grow).
+	 */
+	Assert(heapBlk % state->bs_pagesPerRange == 0);
+	if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
+	{
+		/*
+		 * If we're asked to scan what we believe to be the final range on the
+		 * table (i.e. a range that might be partial) we need to recompute our
+		 * idea of what the latest page is after inserting the placeholder
+		 * tuple.  Anyone that grows the table later will update the
+		 * placeholder tuple, so it doesn't matter that we won't scan these
+		 * pages ourselves.  Careful: the table might have been extended
+		 * beyond the current range, so clamp our result.
+		 *
+		 * Fortunately, this should occur infrequently.
+		 */
+		scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
+						  state->bs_pagesPerRange);
+	}
+	else
+	{
+		/* Easy case: range is known to be complete */
+		scanNumBlks = state->bs_pagesPerRange;
+	}
+
+	/*
+	 * Execute the partial heap scan covering the heap blocks in the specified
+	 * page range, summarizing the heap tuples in it.  This scan stops just
+	 * short of brinbuildCallback creating the new index entry.
+	 *
+	 * Note that it is critical we use the "any visible" mode of
+	 * table_index_build_range_scan here: otherwise, we would miss tuples
+	 * inserted by transactions that are still in progress, among other corner
+	 * cases.
+	 */
+	state->bs_currRangeStart = heapBlk;
+	table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true, false,
+								 heapBlk, scanNumBlks,
+								 brinbuildCallback, (void *) state, NULL);
+
+	/*
+	 * Now we update the values obtained by the scan with the placeholder
+	 * tuple.  We do this in a loop which only terminates if we're able to
+	 * update the placeholder tuple successfully; if we are not, this means
+	 * somebody else modified the placeholder tuple after we read it.
+	 */
+	for (;;)
+	{
+		BrinTuple  *newtup;
+		Size		newsize;
+		bool		didupdate;
+		bool		samepage;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * Update the summary tuple and try to update.
+		 */
+		newtup = brin_form_tuple(state->bs_bdesc,
+								 heapBlk, state->bs_dtuple, &newsize);
+		samepage = brin_can_do_samepage_update(phbuf, phsz, newsize);
+		didupdate =
+			brin_doupdate(state->bs_irel, state->bs_pagesPerRange,
+						  state->bs_rmAccess, heapBlk, phbuf, offset,
+						  phtup, phsz, newtup, newsize, samepage);
+		brin_free_tuple(phtup);
+		brin_free_tuple(newtup);
+
+		/* If the update succeeded, we're done. */
+		if (didupdate)
+			break;
+
+		/*
+		 * If the update didn't work, it might be because somebody updated the
+		 * placeholder tuple concurrently.  Extract the new version, union it
+		 * with the values we have from the scan, and start over.  (There are
+		 * other reasons for the update to fail, but it's simple to treat them
+		 * the same.)
+		 */
+		phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf,
+										 &offset, &phsz, BUFFER_LOCK_SHARE,
+										 NULL);
+		/* the placeholder tuple must exist */
+		if (phtup == NULL)
+			elog(ERROR, "missing placeholder tuple");
+		phtup = brin_copy_tuple(phtup, phsz, NULL, NULL);
+		LockBuffer(phbuf, BUFFER_LOCK_UNLOCK);
+
+		/* merge it into the tuple from the heap scan */
+		union_tuples(state->bs_bdesc, state->bs_dtuple, phtup);
+	}
+
+	ReleaseBuffer(phbuf);
+}
+
+/*
+ * Summarize page ranges that are not already summarized.  If pageRange is
+ * BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
+ * page range containing the given heap page number is scanned.
+ * If include_partial is true, then the partial range at the end of the table
+ * is summarized, otherwise not.
+ *
+ * For each new index tuple inserted, *numSummarized (if not NULL) is
+ * incremented; for each existing tuple, *numExisting (if not NULL) is
+ * incremented.
+ */
+static void
+brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
+			  bool include_partial, double *numSummarized, double *numExisting)
+{
+	BrinRevmap *revmap;
+	BrinBuildState *state = NULL;
+	IndexInfo  *indexInfo = NULL;
+	BlockNumber heapNumBlocks;
+	BlockNumber pagesPerRange;
+	Buffer		buf;
+	BlockNumber startBlk;
+
+	revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
+
+	/* determine range of pages to process */
+	heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
+	if (pageRange == BRIN_ALL_BLOCKRANGES)
+		startBlk = 0;
+	else
+	{
+		startBlk = (pageRange / pagesPerRange) * pagesPerRange;
+		heapNumBlocks = Min(heapNumBlocks, startBlk + pagesPerRange);
+	}
+	if (startBlk > heapNumBlocks)
+	{
+		/* Nothing to do if start point is beyond end of table */
+		brinRevmapTerminate(revmap);
+		return;
+	}
+
+	/*
+	 * Scan the revmap to find unsummarized items.
+	 */
+	buf = InvalidBuffer;
+	for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
+	{
+		BrinTuple  *tup;
+		OffsetNumber off;
+
+		/*
+		 * Unless requested to summarize even a partial range, go away now if
+		 * we think the next range is partial.  Caller would pass true when it
+		 * is typically run once bulk data loading is done
+		 * (brin_summarize_new_values), and false when it is typically the
+		 * result of arbitrarily-scheduled maintenance command (vacuuming).
+		 */
+		if (!include_partial &&
+			(startBlk + pagesPerRange > heapNumBlocks))
+			break;
+
+		CHECK_FOR_INTERRUPTS();
+
+		tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL,
+									   BUFFER_LOCK_SHARE, NULL);
+		if (tup == NULL)
+		{
+			/* no revmap entry for this heap range. Summarize it. */
+			if (state == NULL)
+			{
+				/* first time through */
+				Assert(!indexInfo);
+				state = initialize_brin_buildstate(index, revmap,
+												   pagesPerRange);
+				indexInfo = BuildIndexInfo(index);
+			}
+			summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
+
+			/* and re-initialize state for the next range */
+			brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
+
+			if (numSummarized)
+				*numSummarized += 1.0;
+		}
+		else
+		{
+			if (numExisting)
+				*numExisting += 1.0;
+			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+		}
+	}
+
+	if (BufferIsValid(buf))
+		ReleaseBuffer(buf);
+
+	/* free resources */
+	brinRevmapTerminate(revmap);
+	if (state)
+	{
+		terminate_brin_buildstate(state);
+		pfree(indexInfo);
+	}
+}
+
+/*
+ * Given a deformed tuple in the build state, convert it into the on-disk
+ * format and insert it into the index, making the revmap point to it.
+ */
+static void
+form_and_insert_tuple(BrinBuildState *state)
+{
+	BrinTuple  *tup;
+	Size		size;
+
+	tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
+						  state->bs_dtuple, &size);
+	brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
+				  &state->bs_currentInsertBuf, state->bs_currRangeStart,
+				  tup, size);
+	state->bs_numtuples++;
+
+	pfree(tup);
+}
+
+/*
+ * Given two deformed tuples, adjust the first one so that it's consistent
+ * with the summary values in both.
+ */
+static void
+union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
+{
+	int			keyno;
+	BrinMemTuple *db;
+	MemoryContext cxt;
+	MemoryContext oldcxt;
+
+	/* Use our own memory context to avoid retail pfree */
+	cxt = AllocSetContextCreate(CurrentMemoryContext,
+								"brin union",
+								ALLOCSET_DEFAULT_SIZES);
+	oldcxt = MemoryContextSwitchTo(cxt);
+	db = brin_deform_tuple(bdesc, b, NULL);
+	MemoryContextSwitchTo(oldcxt);
+
+	/*
+	 * Check if the ranges are empty.
+	 *
+	 * If at least one of them is empty, we don't need to call per-key union
+	 * functions at all. If "b" is empty, we just use "a" as the result (it
+	 * might be empty fine, but that's fine). If "a" is empty but "b" is not,
+	 * we use "b" as the result (but we have to copy the data into "a" first).
+	 *
+	 * Only when both ranges are non-empty, we actually do the per-key merge.
+	 */
+
+	/* If "b" is empty - ignore it and just use "a" (even if it's empty etc.). */
+	if (db->bt_empty_range)
+	{
+		/* skip the per-key merge */
+		MemoryContextDelete(cxt);
+		return;
+	}
+
+	/*
+	 * Now we know "b" is not empty. If "a" is empty, then "b" is the result.
+	 * But we need to copy the data from "b" to "a" first, because that's how
+	 * we pass result out.
+	 *
+	 * We have to copy all the global/per-key flags etc. too.
+	 */
+	if (a->bt_empty_range)
+	{
+		for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
+		{
+			int			i;
+			BrinValues *col_a = &a->bt_columns[keyno];
+			BrinValues *col_b = &db->bt_columns[keyno];
+			BrinOpcInfo *opcinfo = bdesc->bd_info[keyno];
+
+			col_a->bv_allnulls = col_b->bv_allnulls;
+			col_a->bv_hasnulls = col_b->bv_hasnulls;
+
+			/* If "b" has no data, we're done. */
+			if (col_b->bv_allnulls)
+				continue;
+
+			for (i = 0; i < opcinfo->oi_nstored; i++)
+				col_a->bv_values[i] =
+					datumCopy(col_b->bv_values[i],
+							  opcinfo->oi_typcache[i]->typbyval,
+							  opcinfo->oi_typcache[i]->typlen);
+		}
+
+		/* "a" started empty, but "b" was not empty, so remember that */
+		a->bt_empty_range = false;
+
+		/* skip the per-key merge */
+		MemoryContextDelete(cxt);
+		return;
+	}
+
+	/* Now we know neither range is empty. */
+	for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
+	{
+		FmgrInfo   *unionFn;
+		BrinValues *col_a = &a->bt_columns[keyno];
+		BrinValues *col_b = &db->bt_columns[keyno];
+		BrinOpcInfo *opcinfo = bdesc->bd_info[keyno];
+
+		if (opcinfo->oi_regular_nulls)
+		{
+			/* Does the "b" summary represent any NULL values? */
+			bool		b_has_nulls = (col_b->bv_hasnulls || col_b->bv_allnulls);
+
+			/* Adjust "hasnulls". */
+			if (!col_a->bv_allnulls && b_has_nulls)
+				col_a->bv_hasnulls = true;
+
+			/* If there are no values in B, there's nothing left to do. */
+			if (col_b->bv_allnulls)
+				continue;
+
+			/*
+			 * Adjust "allnulls".  If A doesn't have values, just copy the
+			 * values from B into A, and we're done.  We cannot run the
+			 * operators in this case, because values in A might contain
+			 * garbage.  Note we already established that B contains values.
+			 *
+			 * Also adjust "hasnulls" in order not to forget the summary
+			 * represents NULL values. This is not redundant with the earlier
+			 * update, because that only happens when allnulls=false.
+			 */
+			if (col_a->bv_allnulls)
+			{
+				int			i;
+
+				col_a->bv_allnulls = false;
+				col_a->bv_hasnulls = true;
+
+				for (i = 0; i < opcinfo->oi_nstored; i++)
+					col_a->bv_values[i] =
+						datumCopy(col_b->bv_values[i],
+								  opcinfo->oi_typcache[i]->typbyval,
+								  opcinfo->oi_typcache[i]->typlen);
+
+				continue;
+			}
+		}
+
+		unionFn = index_getprocinfo(bdesc->bd_index, keyno + 1,
+									BRIN_PROCNUM_UNION);
+		FunctionCall3Coll(unionFn,
+						  bdesc->bd_index->rd_indcollation[keyno],
+						  PointerGetDatum(bdesc),
+						  PointerGetDatum(col_a),
+						  PointerGetDatum(col_b));
+	}
+
+	MemoryContextDelete(cxt);
+}
+
+/*
+ * brin_vacuum_scan
+ *		Do a complete scan of the index during VACUUM.
+ *
+ * This routine scans the complete index looking for uncatalogued index pages,
+ * i.e. those that might have been lost due to a crash after index extension
+ * and such.
+ */
+static void
+brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy)
+{
+	BlockNumber nblocks;
+	BlockNumber blkno;
+
+	/*
+	 * Scan the index in physical order, and clean up any possible mess in
+	 * each page.
+	 */
+	nblocks = RelationGetNumberOfBlocks(idxrel);
+	for (blkno = 0; blkno < nblocks; blkno++)
+	{
+		Buffer		buf;
+
+		CHECK_FOR_INTERRUPTS();
+
+		buf = ReadBufferExtended(idxrel, MAIN_FORKNUM, blkno,
+								 RBM_NORMAL, strategy);
+
+		brin_page_cleanup(idxrel, buf);
+
+		ReleaseBuffer(buf);
+	}
+
+	/*
+	 * Update all upper pages in the index's FSM, as well.  This ensures not
+	 * only that we propagate leaf-page FSM updates made by brin_page_cleanup,
+	 * but also that any pre-existing damage or out-of-dateness is repaired.
+	 */
+	FreeSpaceMapVacuum(idxrel);
+}
+
+static bool
+add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup,
+					Datum *values, bool *nulls)
+{
+	int			keyno;
+
+	/* If the range starts empty, we're certainly going to modify it. */
+	bool		modified = dtup->bt_empty_range;
+
+	/*
+	 * Compare the key values of the new tuple to the stored index values; our
+	 * deformed tuple will get updated if the new tuple doesn't fit the
+	 * original range (note this means we can't break out of the loop early).
+	 * Make a note of whether this happens, so that we know to insert the
+	 * modified tuple later.
+	 */
+	for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
+	{
+		Datum		result;
+		BrinValues *bval;
+		FmgrInfo   *addValue;
+		bool		has_nulls;
+
+		bval = &dtup->bt_columns[keyno];
+
+		/*
+		 * Does the range have actual NULL values? Either of the flags can
+		 * be set, but we ignore the state before adding first row.
+		 *
+		 * We have to remember this, because we'll modify the flags and we
+		 * need to know if the range started as empty.
+		 */
+		has_nulls = ((!dtup->bt_empty_range) &&
+					 (bval->bv_hasnulls || bval->bv_allnulls));
+
+		/*
+		 * If the value we're adding is NULL, handle it locally. Otherwise
+		 * call the BRIN_PROCNUM_ADDVALUE procedure.
+		 */
+		if (bdesc->bd_info[keyno]->oi_regular_nulls && nulls[keyno])
+		{
+			/*
+			 * If the new value is null, we record that we saw it if it's the
+			 * first one; otherwise, there's nothing to do.
+			 */
+			if (!bval->bv_hasnulls)
+			{
+				bval->bv_hasnulls = true;
+				modified = true;
+			}
+
+			continue;
+		}
+
+		addValue = index_getprocinfo(idxRel, keyno + 1,
+									 BRIN_PROCNUM_ADDVALUE);
+		result = FunctionCall4Coll(addValue,
+								   idxRel->rd_indcollation[keyno],
+								   PointerGetDatum(bdesc),
+								   PointerGetDatum(bval),
+								   values[keyno],
+								   nulls[keyno]);
+		/* if that returned true, we need to insert the updated tuple */
+		modified |= DatumGetBool(result);
+
+		/*
+		 * If the range was had actual NULL values (i.e. did not start empty),
+		 * make sure we don't forget about the NULL values. Either the allnulls
+		 * flag is still set to true, or (if the opclass cleared it) we need to
+		 * set hasnulls=true.
+		 *
+		 * XXX This can only happen when the opclass modified the tuple, so the
+		 * modified flag should be set.
+		 */
+		if (has_nulls && !(bval->bv_hasnulls || bval->bv_allnulls))
+		{
+			Assert(modified);
+			bval->bv_hasnulls = true;
+		}
+	}
+
+	/*
+	 * After updating summaries for all the keys, mark it as not empty.
+	 *
+	 * If we're actually changing the flag value (i.e. tuple started as empty),
+	 * we should have modified the tuple. So we should not see empty range that
+	 * was not modified.
+	 */
+	Assert(!dtup->bt_empty_range || modified);
+	dtup->bt_empty_range = false;
+
+	return modified;
+}
+
+static bool
+check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys)
+{
+	int			keyno;
+
+	/*
+	 * First check if there are any IS [NOT] NULL scan keys, and if we're
+	 * violating them.
+	 */
+	for (keyno = 0; keyno < nnullkeys; keyno++)
+	{
+		ScanKey		key = nullkeys[keyno];
+
+		Assert(key->sk_attno == bval->bv_attno);
+
+		/* Handle only IS NULL/IS NOT NULL tests */
+		if (!(key->sk_flags & SK_ISNULL))
+			continue;
+
+		if (key->sk_flags & SK_SEARCHNULL)
+		{
+			/* IS NULL scan key, but range has no NULLs */
+			if (!bval->bv_allnulls && !bval->bv_hasnulls)
+				return false;
+		}
+		else if (key->sk_flags & SK_SEARCHNOTNULL)
+		{
+			/*
+			 * For IS NOT NULL, we can only skip ranges that are known to have
+			 * only nulls.
+			 */
+			if (bval->bv_allnulls)
+				return false;
+		}
+		else
+		{
+			/*
+			 * Neither IS NULL nor IS NOT NULL was used; assume all indexable
+			 * operators are strict and thus return false with NULL value in
+			 * the scan key.
+			 */
+			return false;
+		}
+	}
+
+	return true;
+}
diff --git a/src/backend/access/brin/brin_bloom.c b/src/backend/access/brin/brin_bloom.c
new file mode 100644
index 0000000..6812ca9
--- /dev/null
+++ b/src/backend/access/brin/brin_bloom.c
@@ -0,0 +1,808 @@
+/*
+ * brin_bloom.c
+ *		Implementation of Bloom opclass for BRIN
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * A BRIN opclass summarizing page range into a bloom filter.
+ *
+ * Bloom filters allow efficient testing whether a given page range contains
+ * a particular value. Therefore, if we summarize each page range into a small
+ * bloom filter, we can easily (and cheaply) test whether it contains values
+ * we get later.
+ *
+ * The index only supports equality operators, similarly to hash indexes.
+ * Bloom indexes are however much smaller, and support only bitmap scans.
+ *
+ * Note: Don't confuse this with bloom indexes, implemented in a contrib
+ * module. That extension implements an entirely new AM, building a bloom
+ * filter on multiple columns in a single row. This opclass works with an
+ * existing AM (BRIN) and builds bloom filter on a column.
+ *
+ *
+ * values vs. hashes
+ * -----------------
+ *
+ * The original column values are not used directly, but are first hashed
+ * using the regular type-specific hash function, producing a uint32 hash.
+ * And this hash value is then added to the summary - i.e. it's hashed
+ * again and added to the bloom filter.
+ *
+ * This allows the code to treat all data types (byval/byref/...) the same
+ * way, with only minimal space requirements, because we're working with
+ * hashes and not the original values. Everything is uint32.
+ *
+ * Of course, this assumes the built-in hash function is reasonably good,
+ * without too many collisions etc. But that does seem to be the case, at
+ * least based on past experience. After all, the same hash functions are
+ * used for hash indexes, hash partitioning and so on.
+ *
+ *
+ * hashing scheme
+ * --------------
+ *
+ * Bloom filters require a number of independent hash functions. There are
+ * different schemes how to construct them - for example we might use
+ * hash_uint32_extended with random seeds, but that seems fairly expensive.
+ * We use a scheme requiring only two functions described in this paper:
+ *
+ * Less Hashing, Same Performance:Building a Better Bloom Filter
+ * Adam Kirsch, Michael Mitzenmacher, Harvard School of Engineering and
+ * Applied Sciences, Cambridge, Massachusetts [DOI 10.1002/rsa.20208]
+ *
+ * The two hash functions h1 and h2 are calculated using hard-coded seeds,
+ * and then combined using (h1 + i * h2) to generate the hash functions.
+ *
+ *
+ * sizing the bloom filter
+ * -----------------------
+ *
+ * Size of a bloom filter depends on the number of distinct values we will
+ * store in it, and the desired false positive rate. The higher the number
+ * of distinct values and/or the lower the false positive rate, the larger
+ * the bloom filter. On the other hand, we want to keep the index as small
+ * as possible - that's one of the basic advantages of BRIN indexes.
+ *
+ * Although the number of distinct elements (in a page range) depends on
+ * the data, we can consider it fixed. This simplifies the trade-off to
+ * just false positive rate vs. size.
+ *
+ * At the page range level, false positive rate is a probability the bloom
+ * filter matches a random value. For the whole index (with sufficiently
+ * many page ranges) it represents the fraction of the index ranges (and
+ * thus fraction of the table to be scanned) matching the random value.
+ *
+ * Furthermore, the size of the bloom filter is subject to implementation
+ * limits - it has to fit onto a single index page (8kB by default). As
+ * the bitmap is inherently random (when "full" about half the bits is set
+ * to 1, randomly), compression can't help very much.
+ *
+ * To reduce the size of a filter (to fit to a page), we have to either
+ * accept higher false positive rate (undesirable), or reduce the number
+ * of distinct items to be stored in the filter. We can't alter the input
+ * data, of course, but we may make the BRIN page ranges smaller - instead
+ * of the default 128 pages (1MB) we may build index with 16-page ranges,
+ * or something like that. This should reduce the number of distinct values
+ * in the page range, making the filter smaller (with fixed false positive
+ * rate). Even for random data sets this should help, as the number of rows
+ * per heap page is limited (to ~290 with very narrow tables, likely ~20
+ * in practice).
+ *
+ * Of course, good sizing decisions depend on having the necessary data,
+ * i.e. number of distinct values in a page range (of a given size) and
+ * table size (to estimate cost change due to change in false positive
+ * rate due to having larger index vs. scanning larger indexes). We may
+ * not have that data - for example when building an index on empty table
+ * it's not really possible. And for some data we only have estimates for
+ * the whole table and we can only estimate per-range values (ndistinct).
+ *
+ * Another challenge is that while the bloom filter is per-column, it's
+ * the whole index tuple that has to fit into a page. And for multi-column
+ * indexes that may include pieces we have no control over (not necessarily
+ * bloom filters, the other columns may use other BRIN opclasses). So it's
+ * not entirely clear how to distribute the space between those columns.
+ *
+ * The current logic, implemented in brin_bloom_get_ndistinct, attempts to
+ * make some basic sizing decisions, based on the size of BRIN ranges, and
+ * the maximum number of rows per range.
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/brin/brin_bloom.c
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/brin.h"
+#include "access/brin_internal.h"
+#include "access/brin_page.h"
+#include "access/brin_tuple.h"
+#include "access/hash.h"
+#include "access/htup_details.h"
+#include "access/reloptions.h"
+#include "access/stratnum.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_amop.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+#include <math.h>
+
+#define BloomEqualStrategyNumber	1
+
+/*
+ * Additional SQL level support functions. We only have one, which is
+ * used to calculate hash of the input value.
+ *
+ * Procedure numbers must not use values reserved for BRIN itself; see
+ * brin_internal.h.
+ */
+#define		BLOOM_MAX_PROCNUMS		1	/* maximum support procs we need */
+#define		PROCNUM_HASH			11	/* required */
+
+/*
+ * Subtract this from procnum to obtain index in BloomOpaque arrays
+ * (Must be equal to minimum of private procnums).
+ */
+#define		PROCNUM_BASE			11
+
+/*
+ * Storage type for BRIN's reloptions.
+ */
+typedef struct BloomOptions
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	double		nDistinctPerRange;	/* number of distinct values per range */
+	double		falsePositiveRate;	/* false positive for bloom filter */
+} BloomOptions;
+
+/*
+ * The current min value (16) is somewhat arbitrary, but it's based
+ * on the fact that the filter header is ~20B alone, which is about
+ * the same as the filter bitmap for 16 distinct items with 1% false
+ * positive rate. So by allowing lower values we'd not gain much. In
+ * any case, the min should not be larger than MaxHeapTuplesPerPage
+ * (~290), which is the theoretical maximum for single-page ranges.
+ */
+#define		BLOOM_MIN_NDISTINCT_PER_RANGE		16
+
+/*
+ * Used to determine number of distinct items, based on the number of rows
+ * in a page range. The 10% is somewhat similar to what estimate_num_groups
+ * does, so we use the same factor here.
+ */
+#define		BLOOM_DEFAULT_NDISTINCT_PER_RANGE	-0.1	/* 10% of values */
+
+/*
+ * Allowed range and default value for the false positive range. The exact
+ * values are somewhat arbitrary, but were chosen considering the various
+ * parameters (size of filter vs. page size, etc.).
+ *
+ * The lower the false-positive rate, the more accurate the filter is, but
+ * it also gets larger - at some point this eliminates the main advantage
+ * of BRIN indexes, which is the tiny size. At 0.01% the index is about
+ * 10% of the table (assuming 290 distinct values per 8kB page).
+ *
+ * On the other hand, as the false-positive rate increases, larger part of
+ * the table has to be scanned due to mismatches - at 25% we're probably
+ * close to sequential scan being cheaper.
+ */
+#define		BLOOM_MIN_FALSE_POSITIVE_RATE	0.0001	/* 0.01% fp rate */
+#define		BLOOM_MAX_FALSE_POSITIVE_RATE	0.25	/* 25% fp rate */
+#define		BLOOM_DEFAULT_FALSE_POSITIVE_RATE	0.01	/* 1% fp rate */
+
+#define BloomGetNDistinctPerRange(opts) \
+	((opts) && (((BloomOptions *) (opts))->nDistinctPerRange != 0) ? \
+	 (((BloomOptions *) (opts))->nDistinctPerRange) : \
+	 BLOOM_DEFAULT_NDISTINCT_PER_RANGE)
+
+#define BloomGetFalsePositiveRate(opts) \
+	((opts) && (((BloomOptions *) (opts))->falsePositiveRate != 0.0) ? \
+	 (((BloomOptions *) (opts))->falsePositiveRate) : \
+	 BLOOM_DEFAULT_FALSE_POSITIVE_RATE)
+
+/*
+ * And estimate of the largest bloom we can fit onto a page. This is not
+ * a perfect guarantee, for a couple of reasons. For example, the row may
+ * be larger because the index has multiple columns.
+ */
+#define BloomMaxFilterSize \
+	MAXALIGN_DOWN(BLCKSZ - \
+				  (MAXALIGN(SizeOfPageHeaderData + \
+							sizeof(ItemIdData)) + \
+				   MAXALIGN(sizeof(BrinSpecialSpace)) + \
+				   SizeOfBrinTuple))
+
+/*
+ * Seeds used to calculate two hash functions h1 and h2, which are then used
+ * to generate k hashes using the (h1 + i * h2) scheme.
+ */
+#define BLOOM_SEED_1	0x71d924af
+#define BLOOM_SEED_2	0xba48b314
+
+/*
+ * Bloom Filter
+ *
+ * Represents a bloom filter, built on hashes of the indexed values. That is,
+ * we compute a uint32 hash of the value, and then store this hash into the
+ * bloom filter (and compute additional hashes on it).
+ *
+ * XXX We could implement "sparse" bloom filters, keeping only the bytes that
+ * are not entirely 0. But while indexes don't support TOAST, the varlena can
+ * still be compressed. So this seems unnecessary, because the compression
+ * should do the same job.
+ *
+ * XXX We can also watch the number of bits set in the bloom filter, and then
+ * stop using it (and not store the bitmap, to save space) when the false
+ * positive rate gets too high. But even if the false positive rate exceeds the
+ * desired value, it still can eliminate some page ranges.
+ */
+typedef struct BloomFilter
+{
+	/* varlena header (do not touch directly!) */
+	int32		vl_len_;
+
+	/* space for various flags (unused for now) */
+	uint16		flags;
+
+	/* fields for the HASHED phase */
+	uint8		nhashes;		/* number of hash functions */
+	uint32		nbits;			/* number of bits in the bitmap (size) */
+	uint32		nbits_set;		/* number of bits set to 1 */
+
+	/* data of the bloom filter */
+	char		data[FLEXIBLE_ARRAY_MEMBER];
+} BloomFilter;
+
+
+/*
+ * bloom_init
+ * 		Initialize the Bloom Filter, allocate all the memory.
+ *
+ * The filter is initialized with optimal size for ndistinct expected values
+ * and the requested false positive rate. The filter is stored as varlena.
+ */
+static BloomFilter *
+bloom_init(int ndistinct, double false_positive_rate)
+{
+	Size		len;
+	BloomFilter *filter;
+
+	int			nbits;			/* size of filter / number of bits */
+	int			nbytes;			/* size of filter / number of bytes */
+
+	double		k;				/* number of hash functions */
+
+	Assert(ndistinct > 0);
+	Assert((false_positive_rate >= BLOOM_MIN_FALSE_POSITIVE_RATE) &&
+		   (false_positive_rate < BLOOM_MAX_FALSE_POSITIVE_RATE));
+
+	/* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
+	nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2));
+
+	/* round m to whole bytes */
+	nbytes = ((nbits + 7) / 8);
+	nbits = nbytes * 8;
+
+	/*
+	 * Reject filters that are obviously too large to store on a page.
+	 *
+	 * Initially the bloom filter is just zeroes and so very compressible, but
+	 * as we add values it gets more and more random, and so less and less
+	 * compressible. So initially everything fits on the page, but we might
+	 * get surprising failures later - we want to prevent that, so we reject
+	 * bloom filter that are obviously too large.
+	 *
+	 * XXX It's not uncommon to oversize the bloom filter a bit, to defend
+	 * against unexpected data anomalies (parts of table with more distinct
+	 * values per range etc.). But we still need to make sure even the
+	 * oversized filter fits on page, if such need arises.
+	 *
+	 * XXX This check is not perfect, because the index may have multiple
+	 * filters that are small individually, but too large when combined.
+	 */
+	if (nbytes > BloomMaxFilterSize)
+		elog(ERROR, "the bloom filter is too large (%d > %zu)", nbytes,
+			 BloomMaxFilterSize);
+
+	/*
+	 * round(log(2.0) * m / ndistinct), but assume round() may not be
+	 * available on Windows
+	 */
+	k = log(2.0) * nbits / ndistinct;
+	k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k);
+
+	/*
+	 * We allocate the whole filter. Most of it is going to be 0 bits, so the
+	 * varlena is easy to compress.
+	 */
+	len = offsetof(BloomFilter, data) + nbytes;
+
+	filter = (BloomFilter *) palloc0(len);
+
+	filter->flags = 0;
+	filter->nhashes = (int) k;
+	filter->nbits = nbits;
+
+	SET_VARSIZE(filter, len);
+
+	return filter;
+}
+
+
+/*
+ * bloom_add_value
+ * 		Add value to the bloom filter.
+ */
+static BloomFilter *
+bloom_add_value(BloomFilter *filter, uint32 value, bool *updated)
+{
+	int			i;
+	uint64		h1,
+				h2;
+
+	/* compute the hashes, used for the bloom filter */
+	h1 = hash_bytes_uint32_extended(value, BLOOM_SEED_1) % filter->nbits;
+	h2 = hash_bytes_uint32_extended(value, BLOOM_SEED_2) % filter->nbits;
+
+	/* compute the requested number of hashes */
+	for (i = 0; i < filter->nhashes; i++)
+	{
+		/* h1 + h2 + f(i) */
+		uint32		h = (h1 + i * h2) % filter->nbits;
+		uint32		byte = (h / 8);
+		uint32		bit = (h % 8);
+
+		/* if the bit is not set, set it and remember we did that */
+		if (!(filter->data[byte] & (0x01 << bit)))
+		{
+			filter->data[byte] |= (0x01 << bit);
+			filter->nbits_set++;
+			if (updated)
+				*updated = true;
+		}
+	}
+
+	return filter;
+}
+
+
+/*
+ * bloom_contains_value
+ * 		Check if the bloom filter contains a particular value.
+ */
+static bool
+bloom_contains_value(BloomFilter *filter, uint32 value)
+{
+	int			i;
+	uint64		h1,
+				h2;
+
+	/* calculate the two hashes */
+	h1 = hash_bytes_uint32_extended(value, BLOOM_SEED_1) % filter->nbits;
+	h2 = hash_bytes_uint32_extended(value, BLOOM_SEED_2) % filter->nbits;
+
+	/* compute the requested number of hashes */
+	for (i = 0; i < filter->nhashes; i++)
+	{
+		/* h1 + h2 + f(i) */
+		uint32		h = (h1 + i * h2) % filter->nbits;
+		uint32		byte = (h / 8);
+		uint32		bit = (h % 8);
+
+		/* if the bit is not set, the value is not there */
+		if (!(filter->data[byte] & (0x01 << bit)))
+			return false;
+	}
+
+	/* all hashes found in bloom filter */
+	return true;
+}
+
+typedef struct BloomOpaque
+{
+	/*
+	 * XXX At this point we only need a single proc (to compute the hash), but
+	 * let's keep the array just like inclusion and minmax opclasses, for
+	 * consistency. We may need additional procs in the future.
+	 */
+	FmgrInfo	extra_procinfos[BLOOM_MAX_PROCNUMS];
+	bool		extra_proc_missing[BLOOM_MAX_PROCNUMS];
+} BloomOpaque;
+
+static FmgrInfo *bloom_get_procinfo(BrinDesc *bdesc, uint16 attno,
+									uint16 procnum);
+
+
+Datum
+brin_bloom_opcinfo(PG_FUNCTION_ARGS)
+{
+	BrinOpcInfo *result;
+
+	/*
+	 * opaque->strategy_procinfos is initialized lazily; here it is set to
+	 * all-uninitialized by palloc0 which sets fn_oid to InvalidOid.
+	 *
+	 * bloom indexes only store the filter as a single BYTEA column
+	 */
+
+	result = palloc0(MAXALIGN(SizeofBrinOpcInfo(1)) +
+					 sizeof(BloomOpaque));
+	result->oi_nstored = 1;
+	result->oi_regular_nulls = true;
+	result->oi_opaque = (BloomOpaque *)
+		MAXALIGN((char *) result + SizeofBrinOpcInfo(1));
+	result->oi_typcache[0] = lookup_type_cache(PG_BRIN_BLOOM_SUMMARYOID, 0);
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * brin_bloom_get_ndistinct
+ *		Determine the ndistinct value used to size bloom filter.
+ *
+ * Adjust the ndistinct value based on the pagesPerRange value. First,
+ * if it's negative, it's assumed to be relative to maximum number of
+ * tuples in the range (assuming each page gets MaxHeapTuplesPerPage
+ * tuples, which is likely a significant over-estimate). We also clamp
+ * the value, not to over-size the bloom filter unnecessarily.
+ *
+ * XXX We can only do this when the pagesPerRange value was supplied.
+ * If it wasn't, it has to be a read-only access to the index, in which
+ * case we don't really care. But perhaps we should fall-back to the
+ * default pagesPerRange value?
+ *
+ * XXX We might also fetch info about ndistinct estimate for the column,
+ * and compute the expected number of distinct values in a range. But
+ * that may be tricky due to data being sorted in various ways, so it
+ * seems better to rely on the upper estimate.
+ *
+ * XXX We might also calculate a better estimate of rows per BRIN range,
+ * instead of using MaxHeapTuplesPerPage (which probably produces values
+ * much higher than reality).
+ */
+static int
+brin_bloom_get_ndistinct(BrinDesc *bdesc, BloomOptions *opts)
+{
+	double		ndistinct;
+	double		maxtuples;
+	BlockNumber pagesPerRange;
+
+	pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
+	ndistinct = BloomGetNDistinctPerRange(opts);
+
+	Assert(BlockNumberIsValid(pagesPerRange));
+
+	maxtuples = MaxHeapTuplesPerPage * pagesPerRange;
+
+	/*
+	 * Similarly to n_distinct, negative values are relative - in this case to
+	 * maximum number of tuples in the page range (maxtuples).
+	 */
+	if (ndistinct < 0)
+		ndistinct = (-ndistinct) * maxtuples;
+
+	/*
+	 * Positive values are to be used directly, but we still apply a couple of
+	 * safeties to avoid using unreasonably small bloom filters.
+	 */
+	ndistinct = Max(ndistinct, BLOOM_MIN_NDISTINCT_PER_RANGE);
+
+	/*
+	 * And don't use more than the maximum possible number of tuples, in the
+	 * range, which would be entirely wasteful.
+	 */
+	ndistinct = Min(ndistinct, maxtuples);
+
+	return (int) ndistinct;
+}
+
+/*
+ * Examine the given index tuple (which contains partial status of a certain
+ * page range) by comparing it to the given value that comes from another heap
+ * tuple.  If the new value is outside the bloom filter specified by the
+ * existing tuple values, update the index tuple and return true.  Otherwise,
+ * return false and do not modify in this case.
+ */
+Datum
+brin_bloom_add_value(PG_FUNCTION_ARGS)
+{
+	BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
+	BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
+	Datum		newval = PG_GETARG_DATUM(2);
+	bool		isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3);
+	BloomOptions *opts = (BloomOptions *) PG_GET_OPCLASS_OPTIONS();
+	Oid			colloid = PG_GET_COLLATION();
+	FmgrInfo   *hashFn;
+	uint32		hashValue;
+	bool		updated = false;
+	AttrNumber	attno;
+	BloomFilter *filter;
+
+	Assert(!isnull);
+
+	attno = column->bv_attno;
+
+	/*
+	 * If this is the first non-null value, we need to initialize the bloom
+	 * filter. Otherwise just extract the existing bloom filter from
+	 * BrinValues.
+	 */
+	if (column->bv_allnulls)
+	{
+		filter = bloom_init(brin_bloom_get_ndistinct(bdesc, opts),
+							BloomGetFalsePositiveRate(opts));
+		column->bv_values[0] = PointerGetDatum(filter);
+		column->bv_allnulls = false;
+		updated = true;
+	}
+	else
+		filter = (BloomFilter *) PG_DETOAST_DATUM(column->bv_values[0]);
+
+	/*
+	 * Compute the hash of the new value, using the supplied hash function,
+	 * and then add the hash value to the bloom filter.
+	 */
+	hashFn = bloom_get_procinfo(bdesc, attno, PROCNUM_HASH);
+
+	hashValue = DatumGetUInt32(FunctionCall1Coll(hashFn, colloid, newval));
+
+	filter = bloom_add_value(filter, hashValue, &updated);
+
+	column->bv_values[0] = PointerGetDatum(filter);
+
+	PG_RETURN_BOOL(updated);
+}
+
+/*
+ * Given an index tuple corresponding to a certain page range and a scan key,
+ * return whether the scan key is consistent with the index tuple's bloom
+ * filter.  Return true if so, false otherwise.
+ */
+Datum
+brin_bloom_consistent(PG_FUNCTION_ARGS)
+{
+	BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
+	BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
+	ScanKey    *keys = (ScanKey *) PG_GETARG_POINTER(2);
+	int			nkeys = PG_GETARG_INT32(3);
+	Oid			colloid = PG_GET_COLLATION();
+	AttrNumber	attno;
+	Datum		value;
+	Datum		matches;
+	FmgrInfo   *finfo;
+	uint32		hashValue;
+	BloomFilter *filter;
+	int			keyno;
+
+	filter = (BloomFilter *) PG_DETOAST_DATUM(column->bv_values[0]);
+
+	Assert(filter);
+
+	matches = true;
+
+	for (keyno = 0; keyno < nkeys; keyno++)
+	{
+		ScanKey		key = keys[keyno];
+
+		/* NULL keys are handled and filtered-out in bringetbitmap */
+		Assert(!(key->sk_flags & SK_ISNULL));
+
+		attno = key->sk_attno;
+		value = key->sk_argument;
+
+		switch (key->sk_strategy)
+		{
+			case BloomEqualStrategyNumber:
+
+				/*
+				 * In the equality case (WHERE col = someval), we want to
+				 * return the current page range if the minimum value in the
+				 * range <= scan key, and the maximum value >= scan key.
+				 */
+				finfo = bloom_get_procinfo(bdesc, attno, PROCNUM_HASH);
+
+				hashValue = DatumGetUInt32(FunctionCall1Coll(finfo, colloid, value));
+				matches &= bloom_contains_value(filter, hashValue);
+
+				break;
+			default:
+				/* shouldn't happen */
+				elog(ERROR, "invalid strategy number %d", key->sk_strategy);
+				matches = 0;
+				break;
+		}
+
+		if (!matches)
+			break;
+	}
+
+	PG_RETURN_DATUM(matches);
+}
+
+/*
+ * Given two BrinValues, update the first of them as a union of the summary
+ * values contained in both.  The second one is untouched.
+ *
+ * XXX We assume the bloom filters have the same parameters for now. In the
+ * future we should have 'can union' function, to decide if we can combine
+ * two particular bloom filters.
+ */
+Datum
+brin_bloom_union(PG_FUNCTION_ARGS)
+{
+	int			i;
+	int			nbytes;
+	BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1);
+	BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2);
+	BloomFilter *filter_a;
+	BloomFilter *filter_b;
+
+	Assert(col_a->bv_attno == col_b->bv_attno);
+	Assert(!col_a->bv_allnulls && !col_b->bv_allnulls);
+
+	filter_a = (BloomFilter *) PG_DETOAST_DATUM(col_a->bv_values[0]);
+	filter_b = (BloomFilter *) PG_DETOAST_DATUM(col_b->bv_values[0]);
+
+	/* make sure the filters use the same parameters */
+	Assert(filter_a && filter_b);
+	Assert(filter_a->nbits == filter_b->nbits);
+	Assert(filter_a->nhashes == filter_b->nhashes);
+	Assert((filter_a->nbits > 0) && (filter_a->nbits % 8 == 0));
+
+	nbytes = (filter_a->nbits) / 8;
+
+	/* simply OR the bitmaps */
+	for (i = 0; i < nbytes; i++)
+		filter_a->data[i] |= filter_b->data[i];
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Cache and return inclusion opclass support procedure
+ *
+ * Return the procedure corresponding to the given function support number
+ * or null if it does not exist.
+ */
+static FmgrInfo *
+bloom_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum)
+{
+	BloomOpaque *opaque;
+	uint16		basenum = procnum - PROCNUM_BASE;
+
+	/*
+	 * We cache these in the opaque struct, to avoid repetitive syscache
+	 * lookups.
+	 */
+	opaque = (BloomOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
+
+	/*
+	 * If we already searched for this proc and didn't find it, don't bother
+	 * searching again.
+	 */
+	if (opaque->extra_proc_missing[basenum])
+		return NULL;
+
+	if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid)
+	{
+		if (RegProcedureIsValid(index_getprocid(bdesc->bd_index, attno,
+												procnum)))
+		{
+			fmgr_info_copy(&opaque->extra_procinfos[basenum],
+						   index_getprocinfo(bdesc->bd_index, attno, procnum),
+						   bdesc->bd_context);
+		}
+		else
+		{
+			opaque->extra_proc_missing[basenum] = true;
+			return NULL;
+		}
+	}
+
+	return &opaque->extra_procinfos[basenum];
+}
+
+Datum
+brin_bloom_options(PG_FUNCTION_ARGS)
+{
+	local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
+
+	init_local_reloptions(relopts, sizeof(BloomOptions));
+
+	add_local_real_reloption(relopts, "n_distinct_per_range",
+							 "number of distinct items expected in a BRIN page range",
+							 BLOOM_DEFAULT_NDISTINCT_PER_RANGE,
+							 -1.0, INT_MAX, offsetof(BloomOptions, nDistinctPerRange));
+
+	add_local_real_reloption(relopts, "false_positive_rate",
+							 "desired false-positive rate for the bloom filters",
+							 BLOOM_DEFAULT_FALSE_POSITIVE_RATE,
+							 BLOOM_MIN_FALSE_POSITIVE_RATE,
+							 BLOOM_MAX_FALSE_POSITIVE_RATE,
+							 offsetof(BloomOptions, falsePositiveRate));
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * brin_bloom_summary_in
+ *		- input routine for type brin_bloom_summary.
+ *
+ * brin_bloom_summary is only used internally to represent summaries
+ * in BRIN bloom indexes, so it has no operations of its own, and we
+ * disallow input too.
+ */
+Datum
+brin_bloom_summary_in(PG_FUNCTION_ARGS)
+{
+	/*
+	 * brin_bloom_summary stores the data in binary form and parsing text
+	 * input is not needed, so disallow this.
+	 */
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "pg_brin_bloom_summary")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+
+/*
+ * brin_bloom_summary_out
+ *		- output routine for type brin_bloom_summary.
+ *
+ * BRIN bloom summaries are serialized into a bytea value, but we want
+ * to output something nicer humans can understand.
+ */
+Datum
+brin_bloom_summary_out(PG_FUNCTION_ARGS)
+{
+	BloomFilter *filter;
+	StringInfoData str;
+
+	/* detoast the data to get value with a full 4B header */
+	filter = (BloomFilter *) PG_DETOAST_DATUM(PG_GETARG_BYTEA_PP(0));
+
+	initStringInfo(&str);
+	appendStringInfoChar(&str, '{');
+
+	appendStringInfo(&str, "mode: hashed  nhashes: %u  nbits: %u  nbits_set: %u",
+					 filter->nhashes, filter->nbits, filter->nbits_set);
+
+	appendStringInfoChar(&str, '}');
+
+	PG_RETURN_CSTRING(str.data);
+}
+
+/*
+ * brin_bloom_summary_recv
+ *		- binary input routine for type brin_bloom_summary.
+ */
+Datum
+brin_bloom_summary_recv(PG_FUNCTION_ARGS)
+{
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "pg_brin_bloom_summary")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+/*
+ * brin_bloom_summary_send
+ *		- binary output routine for type brin_bloom_summary.
+ *
+ * BRIN bloom summaries are serialized in a bytea value (although the
+ * type is named differently), so let's just send that.
+ */
+Datum
+brin_bloom_summary_send(PG_FUNCTION_ARGS)
+{
+	return byteasend(fcinfo);
+}
diff --git a/src/backend/access/brin/brin_inclusion.c b/src/backend/access/brin/brin_inclusion.c
new file mode 100644
index 0000000..4b02d37
--- /dev/null
+++ b/src/backend/access/brin/brin_inclusion.c
@@ -0,0 +1,657 @@
+/*
+ * brin_inclusion.c
+ *		Implementation of inclusion opclasses for BRIN
+ *
+ * This module provides framework BRIN support functions for the "inclusion"
+ * operator classes.  A few SQL-level support functions are also required for
+ * each opclass.
+ *
+ * The "inclusion" BRIN strategy is useful for types that support R-Tree
+ * operations.  This implementation is a straight mapping of those operations
+ * to the block-range nature of BRIN, with two exceptions: (a) we explicitly
+ * support "empty" elements: at least with range types, we need to consider
+ * emptiness separately from regular R-Tree strategies; and (b) we need to
+ * consider "unmergeable" elements, that is, a set of elements for whose union
+ * no representation exists.  The only case where that happens as of this
+ * writing is the INET type, where IPv6 values cannot be merged with IPv4
+ * values.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/brin/brin_inclusion.c
+ */
+#include "postgres.h"
+
+#include "access/brin_internal.h"
+#include "access/brin_tuple.h"
+#include "access/genam.h"
+#include "access/skey.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+/*
+ * Additional SQL level support functions
+ *
+ * Procedure numbers must not use values reserved for BRIN itself; see
+ * brin_internal.h.
+ */
+#define		INCLUSION_MAX_PROCNUMS	4	/* maximum support procs we need */
+#define		PROCNUM_MERGE			11	/* required */
+#define		PROCNUM_MERGEABLE		12	/* optional */
+#define		PROCNUM_CONTAINS		13	/* optional */
+#define		PROCNUM_EMPTY			14	/* optional */
+
+
+/*
+ * Subtract this from procnum to obtain index in InclusionOpaque arrays
+ * (Must be equal to minimum of private procnums).
+ */
+#define		PROCNUM_BASE			11
+
+/*-
+ * The values stored in the bv_values arrays correspond to:
+ *
+ * INCLUSION_UNION
+ *		the union of the values in the block range
+ * INCLUSION_UNMERGEABLE
+ *		whether the values in the block range cannot be merged
+ *		(e.g. an IPv6 address amidst IPv4 addresses)
+ * INCLUSION_CONTAINS_EMPTY
+ *		whether an empty value is present in any tuple
+ *		in the block range
+ */
+#define INCLUSION_UNION				0
+#define INCLUSION_UNMERGEABLE		1
+#define INCLUSION_CONTAINS_EMPTY	2
+
+
+typedef struct InclusionOpaque
+{
+	FmgrInfo	extra_procinfos[INCLUSION_MAX_PROCNUMS];
+	bool		extra_proc_missing[INCLUSION_MAX_PROCNUMS];
+	Oid			cached_subtype;
+	FmgrInfo	strategy_procinfos[RTMaxStrategyNumber];
+} InclusionOpaque;
+
+static FmgrInfo *inclusion_get_procinfo(BrinDesc *bdesc, uint16 attno,
+										uint16 procnum);
+static FmgrInfo *inclusion_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno,
+												 Oid subtype, uint16 strategynum);
+
+
+/*
+ * BRIN inclusion OpcInfo function
+ */
+Datum
+brin_inclusion_opcinfo(PG_FUNCTION_ARGS)
+{
+	Oid			typoid = PG_GETARG_OID(0);
+	BrinOpcInfo *result;
+	TypeCacheEntry *bool_typcache = lookup_type_cache(BOOLOID, 0);
+
+	/*
+	 * All members of opaque are initialized lazily; both procinfo arrays
+	 * start out as non-initialized by having fn_oid be InvalidOid, and
+	 * "missing" to false, by zeroing here.  strategy_procinfos elements can
+	 * be invalidated when cached_subtype changes by zeroing fn_oid.
+	 * extra_procinfo entries are never invalidated, but if a lookup fails
+	 * (which is expected), extra_proc_missing is set to true, indicating not
+	 * to look it up again.
+	 */
+	result = palloc0(MAXALIGN(SizeofBrinOpcInfo(3)) + sizeof(InclusionOpaque));
+	result->oi_nstored = 3;
+	result->oi_regular_nulls = true;
+	result->oi_opaque = (InclusionOpaque *)
+		MAXALIGN((char *) result + SizeofBrinOpcInfo(3));
+
+	/* the union */
+	result->oi_typcache[INCLUSION_UNION] =
+		lookup_type_cache(typoid, 0);
+
+	/* includes elements that are not mergeable */
+	result->oi_typcache[INCLUSION_UNMERGEABLE] = bool_typcache;
+
+	/* includes the empty element */
+	result->oi_typcache[INCLUSION_CONTAINS_EMPTY] = bool_typcache;
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * BRIN inclusion add value function
+ *
+ * Examine the given index tuple (which contains partial status of a certain
+ * page range) by comparing it to the given value that comes from another heap
+ * tuple.  If the new value is outside the union specified by the existing
+ * tuple values, update the index tuple and return true.  Otherwise, return
+ * false and do not modify in this case.
+ */
+Datum
+brin_inclusion_add_value(PG_FUNCTION_ARGS)
+{
+	BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
+	BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
+	Datum		newval = PG_GETARG_DATUM(2);
+	bool		isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_BOOL(3);
+	Oid			colloid = PG_GET_COLLATION();
+	FmgrInfo   *finfo;
+	Datum		result;
+	bool		new = false;
+	AttrNumber	attno;
+	Form_pg_attribute attr;
+
+	Assert(!isnull);
+
+	attno = column->bv_attno;
+	attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
+
+	/*
+	 * If the recorded value is null, copy the new value (which we know to be
+	 * not null), and we're almost done.
+	 */
+	if (column->bv_allnulls)
+	{
+		column->bv_values[INCLUSION_UNION] =
+			datumCopy(newval, attr->attbyval, attr->attlen);
+		column->bv_values[INCLUSION_UNMERGEABLE] = BoolGetDatum(false);
+		column->bv_values[INCLUSION_CONTAINS_EMPTY] = BoolGetDatum(false);
+		column->bv_allnulls = false;
+		new = true;
+	}
+
+	/*
+	 * No need for further processing if the block range is marked as
+	 * containing unmergeable values.
+	 */
+	if (DatumGetBool(column->bv_values[INCLUSION_UNMERGEABLE]))
+		PG_RETURN_BOOL(false);
+
+	/*
+	 * If the opclass supports the concept of empty values, test the passed
+	 * new value for emptiness; if it returns true, we need to set the
+	 * "contains empty" flag in the element (unless already set).
+	 */
+	finfo = inclusion_get_procinfo(bdesc, attno, PROCNUM_EMPTY);
+	if (finfo != NULL && DatumGetBool(FunctionCall1Coll(finfo, colloid, newval)))
+	{
+		if (!DatumGetBool(column->bv_values[INCLUSION_CONTAINS_EMPTY]))
+		{
+			column->bv_values[INCLUSION_CONTAINS_EMPTY] = BoolGetDatum(true);
+			PG_RETURN_BOOL(true);
+		}
+
+		PG_RETURN_BOOL(false);
+	}
+
+	if (new)
+		PG_RETURN_BOOL(true);
+
+	/* Check if the new value is already contained. */
+	finfo = inclusion_get_procinfo(bdesc, attno, PROCNUM_CONTAINS);
+	if (finfo != NULL &&
+		DatumGetBool(FunctionCall2Coll(finfo, colloid,
+									   column->bv_values[INCLUSION_UNION],
+									   newval)))
+		PG_RETURN_BOOL(false);
+
+	/*
+	 * Check if the new value is mergeable to the existing union.  If it is
+	 * not, mark the value as containing unmergeable elements and get out.
+	 *
+	 * Note: at this point we could remove the value from the union, since
+	 * it's not going to be used any longer.  However, the BRIN framework
+	 * doesn't allow for the value not being present.  Improve someday.
+	 */
+	finfo = inclusion_get_procinfo(bdesc, attno, PROCNUM_MERGEABLE);
+	if (finfo != NULL &&
+		!DatumGetBool(FunctionCall2Coll(finfo, colloid,
+										column->bv_values[INCLUSION_UNION],
+										newval)))
+	{
+		column->bv_values[INCLUSION_UNMERGEABLE] = BoolGetDatum(true);
+		PG_RETURN_BOOL(true);
+	}
+
+	/* Finally, merge the new value to the existing union. */
+	finfo = inclusion_get_procinfo(bdesc, attno, PROCNUM_MERGE);
+	Assert(finfo != NULL);
+	result = FunctionCall2Coll(finfo, colloid,
+							   column->bv_values[INCLUSION_UNION], newval);
+	if (!attr->attbyval &&
+		DatumGetPointer(result) != DatumGetPointer(column->bv_values[INCLUSION_UNION]))
+	{
+		pfree(DatumGetPointer(column->bv_values[INCLUSION_UNION]));
+
+		if (result == newval)
+			result = datumCopy(result, attr->attbyval, attr->attlen);
+	}
+	column->bv_values[INCLUSION_UNION] = result;
+
+	PG_RETURN_BOOL(true);
+}
+
+/*
+ * BRIN inclusion consistent function
+ *
+ * We're no longer dealing with NULL keys in the consistent function, that is
+ * now handled by the AM code. That means we should not get any all-NULL ranges
+ * either, because those can't be consistent with regular (not [IS] NULL) keys.
+ *
+ * All of the strategies are optional.
+ */
+Datum
+brin_inclusion_consistent(PG_FUNCTION_ARGS)
+{
+	BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
+	BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
+	ScanKey		key = (ScanKey) PG_GETARG_POINTER(2);
+	Oid			colloid = PG_GET_COLLATION(),
+				subtype;
+	Datum		unionval;
+	AttrNumber	attno;
+	Datum		query;
+	FmgrInfo   *finfo;
+	Datum		result;
+
+	/* This opclass uses the old signature with only three arguments. */
+	Assert(PG_NARGS() == 3);
+
+	/* Should not be dealing with all-NULL ranges. */
+	Assert(!column->bv_allnulls);
+
+	/* It has to be checked, if it contains elements that are not mergeable. */
+	if (DatumGetBool(column->bv_values[INCLUSION_UNMERGEABLE]))
+		PG_RETURN_BOOL(true);
+
+	attno = key->sk_attno;
+	subtype = key->sk_subtype;
+	query = key->sk_argument;
+	unionval = column->bv_values[INCLUSION_UNION];
+	switch (key->sk_strategy)
+	{
+			/*
+			 * Placement strategies
+			 *
+			 * These are implemented by logically negating the result of the
+			 * converse placement operator; for this to work, the converse
+			 * operator must be part of the opclass.  An error will be thrown
+			 * by inclusion_get_strategy_procinfo() if the required strategy
+			 * is not part of the opclass.
+			 *
+			 * These all return false if either argument is empty, so there is
+			 * no need to check for empty elements.
+			 */
+
+		case RTLeftStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTOverRightStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			PG_RETURN_BOOL(!DatumGetBool(result));
+
+		case RTOverLeftStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTRightStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			PG_RETURN_BOOL(!DatumGetBool(result));
+
+		case RTOverRightStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTLeftStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			PG_RETURN_BOOL(!DatumGetBool(result));
+
+		case RTRightStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTOverLeftStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			PG_RETURN_BOOL(!DatumGetBool(result));
+
+		case RTBelowStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTOverAboveStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			PG_RETURN_BOOL(!DatumGetBool(result));
+
+		case RTOverBelowStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTAboveStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			PG_RETURN_BOOL(!DatumGetBool(result));
+
+		case RTOverAboveStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTBelowStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			PG_RETURN_BOOL(!DatumGetBool(result));
+
+		case RTAboveStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTOverBelowStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			PG_RETURN_BOOL(!DatumGetBool(result));
+
+			/*
+			 * Overlap and contains strategies
+			 *
+			 * These strategies are simple enough that we can simply call the
+			 * operator and return its result.  Empty elements don't change
+			 * the result.
+			 */
+
+		case RTOverlapStrategyNumber:
+		case RTContainsStrategyNumber:
+		case RTContainsElemStrategyNumber:
+		case RTSubStrategyNumber:
+		case RTSubEqualStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													key->sk_strategy);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			PG_RETURN_DATUM(result);
+
+			/*
+			 * Contained by strategies
+			 *
+			 * We cannot just call the original operator for the contained by
+			 * strategies because some elements can be contained even though
+			 * the union is not; instead we use the overlap operator.
+			 *
+			 * We check for empty elements separately as they are not merged
+			 * to the union but contained by everything.
+			 */
+
+		case RTContainedByStrategyNumber:
+		case RTSuperStrategyNumber:
+		case RTSuperEqualStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTOverlapStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			if (DatumGetBool(result))
+				PG_RETURN_BOOL(true);
+
+			PG_RETURN_DATUM(column->bv_values[INCLUSION_CONTAINS_EMPTY]);
+
+			/*
+			 * Adjacent strategy
+			 *
+			 * We test for overlap first but to be safe we need to call the
+			 * actual adjacent operator also.
+			 *
+			 * An empty element cannot be adjacent to any other, so there is
+			 * no need to check for it.
+			 */
+
+		case RTAdjacentStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTOverlapStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			if (DatumGetBool(result))
+				PG_RETURN_BOOL(true);
+
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTAdjacentStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			PG_RETURN_DATUM(result);
+
+			/*
+			 * Basic comparison strategies
+			 *
+			 * It is straightforward to support the equality strategies with
+			 * the contains operator.  Generally, inequality strategies do not
+			 * make much sense for the types which will be used with the
+			 * inclusion BRIN family of opclasses, but it is possible to
+			 * implement them with logical negation of the left-of and
+			 * right-of operators.
+			 *
+			 * NB: These strategies cannot be used with geometric datatypes
+			 * that use comparison of areas!  The only exception is the "same"
+			 * strategy.
+			 *
+			 * Empty elements are considered to be less than the others.  We
+			 * cannot use the empty support function to check the query is an
+			 * empty element, because the query can be another data type than
+			 * the empty support function argument.  So we will return true,
+			 * if there is a possibility that empty elements will change the
+			 * result.
+			 */
+
+		case RTLessStrategyNumber:
+		case RTLessEqualStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTRightStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			if (!DatumGetBool(result))
+				PG_RETURN_BOOL(true);
+
+			PG_RETURN_DATUM(column->bv_values[INCLUSION_CONTAINS_EMPTY]);
+
+		case RTSameStrategyNumber:
+		case RTEqualStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTContainsStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			if (DatumGetBool(result))
+				PG_RETURN_BOOL(true);
+
+			PG_RETURN_DATUM(column->bv_values[INCLUSION_CONTAINS_EMPTY]);
+
+		case RTGreaterEqualStrategyNumber:
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTLeftStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			if (!DatumGetBool(result))
+				PG_RETURN_BOOL(true);
+
+			PG_RETURN_DATUM(column->bv_values[INCLUSION_CONTAINS_EMPTY]);
+
+		case RTGreaterStrategyNumber:
+			/* no need to check for empty elements */
+			finfo = inclusion_get_strategy_procinfo(bdesc, attno, subtype,
+													RTLeftStrategyNumber);
+			result = FunctionCall2Coll(finfo, colloid, unionval, query);
+			PG_RETURN_BOOL(!DatumGetBool(result));
+
+		default:
+			/* shouldn't happen */
+			elog(ERROR, "invalid strategy number %d", key->sk_strategy);
+			PG_RETURN_BOOL(false);
+	}
+}
+
+/*
+ * BRIN inclusion union function
+ *
+ * Given two BrinValues, update the first of them as a union of the summary
+ * values contained in both.  The second one is untouched.
+ */
+Datum
+brin_inclusion_union(PG_FUNCTION_ARGS)
+{
+	BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
+	BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1);
+	BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2);
+	Oid			colloid = PG_GET_COLLATION();
+	AttrNumber	attno;
+	Form_pg_attribute attr;
+	FmgrInfo   *finfo;
+	Datum		result;
+
+	Assert(col_a->bv_attno == col_b->bv_attno);
+	Assert(!col_a->bv_allnulls && !col_b->bv_allnulls);
+
+	attno = col_a->bv_attno;
+	attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
+
+	/* If B includes empty elements, mark A similarly, if needed. */
+	if (!DatumGetBool(col_a->bv_values[INCLUSION_CONTAINS_EMPTY]) &&
+		DatumGetBool(col_b->bv_values[INCLUSION_CONTAINS_EMPTY]))
+		col_a->bv_values[INCLUSION_CONTAINS_EMPTY] = BoolGetDatum(true);
+
+	/* Check if A includes elements that are not mergeable. */
+	if (DatumGetBool(col_a->bv_values[INCLUSION_UNMERGEABLE]))
+		PG_RETURN_VOID();
+
+	/* If B includes elements that are not mergeable, mark A similarly. */
+	if (DatumGetBool(col_b->bv_values[INCLUSION_UNMERGEABLE]))
+	{
+		col_a->bv_values[INCLUSION_UNMERGEABLE] = BoolGetDatum(true);
+		PG_RETURN_VOID();
+	}
+
+	/* Check if A and B are mergeable; if not, mark A unmergeable. */
+	finfo = inclusion_get_procinfo(bdesc, attno, PROCNUM_MERGEABLE);
+	if (finfo != NULL &&
+		!DatumGetBool(FunctionCall2Coll(finfo, colloid,
+										col_a->bv_values[INCLUSION_UNION],
+										col_b->bv_values[INCLUSION_UNION])))
+	{
+		col_a->bv_values[INCLUSION_UNMERGEABLE] = BoolGetDatum(true);
+		PG_RETURN_VOID();
+	}
+
+	/* Finally, merge B to A. */
+	finfo = inclusion_get_procinfo(bdesc, attno, PROCNUM_MERGE);
+	Assert(finfo != NULL);
+	result = FunctionCall2Coll(finfo, colloid,
+							   col_a->bv_values[INCLUSION_UNION],
+							   col_b->bv_values[INCLUSION_UNION]);
+	if (!attr->attbyval &&
+		DatumGetPointer(result) != DatumGetPointer(col_a->bv_values[INCLUSION_UNION]))
+	{
+		pfree(DatumGetPointer(col_a->bv_values[INCLUSION_UNION]));
+
+		if (result == col_b->bv_values[INCLUSION_UNION])
+			result = datumCopy(result, attr->attbyval, attr->attlen);
+	}
+	col_a->bv_values[INCLUSION_UNION] = result;
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Cache and return inclusion opclass support procedure
+ *
+ * Return the procedure corresponding to the given function support number
+ * or null if it is not exists.
+ */
+static FmgrInfo *
+inclusion_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum)
+{
+	InclusionOpaque *opaque;
+	uint16		basenum = procnum - PROCNUM_BASE;
+
+	/*
+	 * We cache these in the opaque struct, to avoid repetitive syscache
+	 * lookups.
+	 */
+	opaque = (InclusionOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
+
+	/*
+	 * If we already searched for this proc and didn't find it, don't bother
+	 * searching again.
+	 */
+	if (opaque->extra_proc_missing[basenum])
+		return NULL;
+
+	if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid)
+	{
+		if (RegProcedureIsValid(index_getprocid(bdesc->bd_index, attno,
+												procnum)))
+		{
+			fmgr_info_copy(&opaque->extra_procinfos[basenum],
+						   index_getprocinfo(bdesc->bd_index, attno, procnum),
+						   bdesc->bd_context);
+		}
+		else
+		{
+			opaque->extra_proc_missing[basenum] = true;
+			return NULL;
+		}
+	}
+
+	return &opaque->extra_procinfos[basenum];
+}
+
+/*
+ * Cache and return the procedure of the given strategy
+ *
+ * Return the procedure corresponding to the given sub-type and strategy
+ * number.  The data type of the index will be used as the left hand side of
+ * the operator and the given sub-type will be used as the right hand side.
+ * Throws an error if the pg_amop row does not exist, but that should not
+ * happen with a properly configured opclass.
+ *
+ * It always throws an error when the data type of the opclass is different
+ * from the data type of the column or the expression.  That happens when the
+ * column data type has implicit cast to the opclass data type.  We don't
+ * bother casting types, because this situation can easily be avoided by
+ * setting storage data type to that of the opclass.  The same problem does not
+ * apply to the data type of the right hand side, because the type in the
+ * ScanKey always matches the opclass' one.
+ *
+ * Note: this function mirrors minmax_get_strategy_procinfo; if changes are
+ * made here, see that function too.
+ */
+static FmgrInfo *
+inclusion_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, Oid subtype,
+								uint16 strategynum)
+{
+	InclusionOpaque *opaque;
+
+	Assert(strategynum >= 1 &&
+		   strategynum <= RTMaxStrategyNumber);
+
+	opaque = (InclusionOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
+
+	/*
+	 * We cache the procedures for the last sub-type in the opaque struct, to
+	 * avoid repetitive syscache lookups.  If the sub-type is changed,
+	 * invalidate all the cached entries.
+	 */
+	if (opaque->cached_subtype != subtype)
+	{
+		uint16		i;
+
+		for (i = 1; i <= RTMaxStrategyNumber; i++)
+			opaque->strategy_procinfos[i - 1].fn_oid = InvalidOid;
+		opaque->cached_subtype = subtype;
+	}
+
+	if (opaque->strategy_procinfos[strategynum - 1].fn_oid == InvalidOid)
+	{
+		Form_pg_attribute attr;
+		HeapTuple	tuple;
+		Oid			opfamily,
+					oprid;
+		bool		isNull;
+
+		opfamily = bdesc->bd_index->rd_opfamily[attno - 1];
+		attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
+		tuple = SearchSysCache4(AMOPSTRATEGY, ObjectIdGetDatum(opfamily),
+								ObjectIdGetDatum(attr->atttypid),
+								ObjectIdGetDatum(subtype),
+								Int16GetDatum(strategynum));
+
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+				 strategynum, attr->atttypid, subtype, opfamily);
+
+		oprid = DatumGetObjectId(SysCacheGetAttr(AMOPSTRATEGY, tuple,
+												 Anum_pg_amop_amopopr, &isNull));
+		ReleaseSysCache(tuple);
+		Assert(!isNull && RegProcedureIsValid(oprid));
+
+		fmgr_info_cxt(get_opcode(oprid),
+					  &opaque->strategy_procinfos[strategynum - 1],
+					  bdesc->bd_context);
+	}
+
+	return &opaque->strategy_procinfos[strategynum - 1];
+}
diff --git a/src/backend/access/brin/brin_minmax.c b/src/backend/access/brin/brin_minmax.c
new file mode 100644
index 0000000..9e8a8e0
--- /dev/null
+++ b/src/backend/access/brin/brin_minmax.c
@@ -0,0 +1,317 @@
+/*
+ * brin_minmax.c
+ *		Implementation of Min/Max opclass for BRIN
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/brin/brin_minmax.c
+ */
+#include "postgres.h"
+
+#include "access/brin_internal.h"
+#include "access/brin_tuple.h"
+#include "access/genam.h"
+#include "access/stratnum.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+typedef struct MinmaxOpaque
+{
+	Oid			cached_subtype;
+	FmgrInfo	strategy_procinfos[BTMaxStrategyNumber];
+} MinmaxOpaque;
+
+static FmgrInfo *minmax_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno,
+											  Oid subtype, uint16 strategynum);
+
+
+Datum
+brin_minmax_opcinfo(PG_FUNCTION_ARGS)
+{
+	Oid			typoid = PG_GETARG_OID(0);
+	BrinOpcInfo *result;
+
+	/*
+	 * opaque->strategy_procinfos is initialized lazily; here it is set to
+	 * all-uninitialized by palloc0 which sets fn_oid to InvalidOid.
+	 */
+
+	result = palloc0(MAXALIGN(SizeofBrinOpcInfo(2)) +
+					 sizeof(MinmaxOpaque));
+	result->oi_nstored = 2;
+	result->oi_regular_nulls = true;
+	result->oi_opaque = (MinmaxOpaque *)
+		MAXALIGN((char *) result + SizeofBrinOpcInfo(2));
+	result->oi_typcache[0] = result->oi_typcache[1] =
+		lookup_type_cache(typoid, 0);
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * Examine the given index tuple (which contains partial status of a certain
+ * page range) by comparing it to the given value that comes from another heap
+ * tuple.  If the new value is outside the min/max range specified by the
+ * existing tuple values, update the index tuple and return true.  Otherwise,
+ * return false and do not modify in this case.
+ */
+Datum
+brin_minmax_add_value(PG_FUNCTION_ARGS)
+{
+	BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
+	BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
+	Datum		newval = PG_GETARG_DATUM(2);
+	bool		isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3);
+	Oid			colloid = PG_GET_COLLATION();
+	FmgrInfo   *cmpFn;
+	Datum		compar;
+	bool		updated = false;
+	Form_pg_attribute attr;
+	AttrNumber	attno;
+
+	Assert(!isnull);
+
+	attno = column->bv_attno;
+	attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
+
+	/*
+	 * If the recorded value is null, store the new value (which we know to be
+	 * not null) as both minimum and maximum, and we're done.
+	 */
+	if (column->bv_allnulls)
+	{
+		column->bv_values[0] = datumCopy(newval, attr->attbyval, attr->attlen);
+		column->bv_values[1] = datumCopy(newval, attr->attbyval, attr->attlen);
+		column->bv_allnulls = false;
+		PG_RETURN_BOOL(true);
+	}
+
+	/*
+	 * Otherwise, need to compare the new value with the existing boundaries
+	 * and update them accordingly.  First check if it's less than the
+	 * existing minimum.
+	 */
+	cmpFn = minmax_get_strategy_procinfo(bdesc, attno, attr->atttypid,
+										 BTLessStrategyNumber);
+	compar = FunctionCall2Coll(cmpFn, colloid, newval, column->bv_values[0]);
+	if (DatumGetBool(compar))
+	{
+		if (!attr->attbyval)
+			pfree(DatumGetPointer(column->bv_values[0]));
+		column->bv_values[0] = datumCopy(newval, attr->attbyval, attr->attlen);
+		updated = true;
+	}
+
+	/*
+	 * And now compare it to the existing maximum.
+	 */
+	cmpFn = minmax_get_strategy_procinfo(bdesc, attno, attr->atttypid,
+										 BTGreaterStrategyNumber);
+	compar = FunctionCall2Coll(cmpFn, colloid, newval, column->bv_values[1]);
+	if (DatumGetBool(compar))
+	{
+		if (!attr->attbyval)
+			pfree(DatumGetPointer(column->bv_values[1]));
+		column->bv_values[1] = datumCopy(newval, attr->attbyval, attr->attlen);
+		updated = true;
+	}
+
+	PG_RETURN_BOOL(updated);
+}
+
+/*
+ * Given an index tuple corresponding to a certain page range and a scan key,
+ * return whether the scan key is consistent with the index tuple's min/max
+ * values.  Return true if so, false otherwise.
+ *
+ * We're no longer dealing with NULL keys in the consistent function, that is
+ * now handled by the AM code. That means we should not get any all-NULL ranges
+ * either, because those can't be consistent with regular (not [IS] NULL) keys.
+ */
+Datum
+brin_minmax_consistent(PG_FUNCTION_ARGS)
+{
+	BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
+	BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
+	ScanKey		key = (ScanKey) PG_GETARG_POINTER(2);
+	Oid			colloid = PG_GET_COLLATION(),
+				subtype;
+	AttrNumber	attno;
+	Datum		value;
+	Datum		matches;
+	FmgrInfo   *finfo;
+
+	/* This opclass uses the old signature with only three arguments. */
+	Assert(PG_NARGS() == 3);
+
+	/* Should not be dealing with all-NULL ranges. */
+	Assert(!column->bv_allnulls);
+
+	attno = key->sk_attno;
+	subtype = key->sk_subtype;
+	value = key->sk_argument;
+	switch (key->sk_strategy)
+	{
+		case BTLessStrategyNumber:
+		case BTLessEqualStrategyNumber:
+			finfo = minmax_get_strategy_procinfo(bdesc, attno, subtype,
+												 key->sk_strategy);
+			matches = FunctionCall2Coll(finfo, colloid, column->bv_values[0],
+										value);
+			break;
+		case BTEqualStrategyNumber:
+
+			/*
+			 * In the equality case (WHERE col = someval), we want to return
+			 * the current page range if the minimum value in the range <=
+			 * scan key, and the maximum value >= scan key.
+			 */
+			finfo = minmax_get_strategy_procinfo(bdesc, attno, subtype,
+												 BTLessEqualStrategyNumber);
+			matches = FunctionCall2Coll(finfo, colloid, column->bv_values[0],
+										value);
+			if (!DatumGetBool(matches))
+				break;
+			/* max() >= scankey */
+			finfo = minmax_get_strategy_procinfo(bdesc, attno, subtype,
+												 BTGreaterEqualStrategyNumber);
+			matches = FunctionCall2Coll(finfo, colloid, column->bv_values[1],
+										value);
+			break;
+		case BTGreaterEqualStrategyNumber:
+		case BTGreaterStrategyNumber:
+			finfo = minmax_get_strategy_procinfo(bdesc, attno, subtype,
+												 key->sk_strategy);
+			matches = FunctionCall2Coll(finfo, colloid, column->bv_values[1],
+										value);
+			break;
+		default:
+			/* shouldn't happen */
+			elog(ERROR, "invalid strategy number %d", key->sk_strategy);
+			matches = 0;
+			break;
+	}
+
+	PG_RETURN_DATUM(matches);
+}
+
+/*
+ * Given two BrinValues, update the first of them as a union of the summary
+ * values contained in both.  The second one is untouched.
+ */
+Datum
+brin_minmax_union(PG_FUNCTION_ARGS)
+{
+	BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
+	BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1);
+	BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2);
+	Oid			colloid = PG_GET_COLLATION();
+	AttrNumber	attno;
+	Form_pg_attribute attr;
+	FmgrInfo   *finfo;
+	bool		needsadj;
+
+	Assert(col_a->bv_attno == col_b->bv_attno);
+	Assert(!col_a->bv_allnulls && !col_b->bv_allnulls);
+
+	attno = col_a->bv_attno;
+	attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
+
+	/* Adjust minimum, if B's min is less than A's min */
+	finfo = minmax_get_strategy_procinfo(bdesc, attno, attr->atttypid,
+										 BTLessStrategyNumber);
+	needsadj = FunctionCall2Coll(finfo, colloid, col_b->bv_values[0],
+								 col_a->bv_values[0]);
+	if (needsadj)
+	{
+		if (!attr->attbyval)
+			pfree(DatumGetPointer(col_a->bv_values[0]));
+		col_a->bv_values[0] = datumCopy(col_b->bv_values[0],
+										attr->attbyval, attr->attlen);
+	}
+
+	/* Adjust maximum, if B's max is greater than A's max */
+	finfo = minmax_get_strategy_procinfo(bdesc, attno, attr->atttypid,
+										 BTGreaterStrategyNumber);
+	needsadj = FunctionCall2Coll(finfo, colloid, col_b->bv_values[1],
+								 col_a->bv_values[1]);
+	if (needsadj)
+	{
+		if (!attr->attbyval)
+			pfree(DatumGetPointer(col_a->bv_values[1]));
+		col_a->bv_values[1] = datumCopy(col_b->bv_values[1],
+										attr->attbyval, attr->attlen);
+	}
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Cache and return the procedure for the given strategy.
+ *
+ * Note: this function mirrors inclusion_get_strategy_procinfo; see notes
+ * there.  If changes are made here, see that function too.
+ */
+static FmgrInfo *
+minmax_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, Oid subtype,
+							 uint16 strategynum)
+{
+	MinmaxOpaque *opaque;
+
+	Assert(strategynum >= 1 &&
+		   strategynum <= BTMaxStrategyNumber);
+
+	opaque = (MinmaxOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
+
+	/*
+	 * We cache the procedures for the previous subtype in the opaque struct,
+	 * to avoid repetitive syscache lookups.  If the subtype changed,
+	 * invalidate all the cached entries.
+	 */
+	if (opaque->cached_subtype != subtype)
+	{
+		uint16		i;
+
+		for (i = 1; i <= BTMaxStrategyNumber; i++)
+			opaque->strategy_procinfos[i - 1].fn_oid = InvalidOid;
+		opaque->cached_subtype = subtype;
+	}
+
+	if (opaque->strategy_procinfos[strategynum - 1].fn_oid == InvalidOid)
+	{
+		Form_pg_attribute attr;
+		HeapTuple	tuple;
+		Oid			opfamily,
+					oprid;
+		bool		isNull;
+
+		opfamily = bdesc->bd_index->rd_opfamily[attno - 1];
+		attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
+		tuple = SearchSysCache4(AMOPSTRATEGY, ObjectIdGetDatum(opfamily),
+								ObjectIdGetDatum(attr->atttypid),
+								ObjectIdGetDatum(subtype),
+								Int16GetDatum(strategynum));
+
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+				 strategynum, attr->atttypid, subtype, opfamily);
+
+		oprid = DatumGetObjectId(SysCacheGetAttr(AMOPSTRATEGY, tuple,
+												 Anum_pg_amop_amopopr, &isNull));
+		ReleaseSysCache(tuple);
+		Assert(!isNull && RegProcedureIsValid(oprid));
+
+		fmgr_info_cxt(get_opcode(oprid),
+					  &opaque->strategy_procinfos[strategynum - 1],
+					  bdesc->bd_context);
+	}
+
+	return &opaque->strategy_procinfos[strategynum - 1];
+}
diff --git a/src/backend/access/brin/brin_minmax_multi.c b/src/backend/access/brin/brin_minmax_multi.c
new file mode 100644
index 0000000..9e29a08
--- /dev/null
+++ b/src/backend/access/brin/brin_minmax_multi.c
@@ -0,0 +1,3145 @@
+/*
+ * brin_minmax_multi.c
+ *		Implementation of Multi Min/Max opclass for BRIN
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * Implements a variant of minmax opclass, where the summary is composed of
+ * multiple smaller intervals. This allows us to handle outliers, which
+ * usually make the simple minmax opclass inefficient.
+ *
+ * Consider for example page range with simple minmax interval [1000,2000],
+ * and assume a new row gets inserted into the range with value 1000000.
+ * Due to that the interval gets [1000,1000000]. I.e. the minmax interval
+ * got 1000x wider and won't be useful to eliminate scan keys between 2001
+ * and 1000000.
+ *
+ * With minmax-multi opclass, we may have [1000,2000] interval initially,
+ * but after adding the new row we start tracking it as two interval:
+ *
+ *   [1000,2000] and [1000000,1000000]
+ *
+ * This allows us to still eliminate the page range when the scan keys hit
+ * the gap between 2000 and 1000000, making it useful in cases when the
+ * simple minmax opclass gets inefficient.
+ *
+ * The number of intervals tracked per page range is somewhat flexible.
+ * What is restricted is the number of values per page range, and the limit
+ * is currently 32 (see values_per_range reloption). Collapsed intervals
+ * (with equal minimum and maximum value) are stored as a single value,
+ * while regular intervals require two values.
+ *
+ * When the number of values gets too high (by adding new values to the
+ * summary), we merge some of the intervals to free space for more values.
+ * This is done in a greedy way - we simply pick the two closest intervals,
+ * merge them, and repeat this until the number of values to store gets
+ * sufficiently low (below 50% of maximum values), but that is mostly
+ * arbitrary threshold and may be changed easily).
+ *
+ * To pick the closest intervals we use the "distance" support procedure,
+ * which measures space between two ranges (i.e. the length of an interval).
+ * The computed value may be an approximation - in the worst case we will
+ * merge two ranges that are slightly less optimal at that step, but the
+ * index should still produce correct results.
+ *
+ * The compactions (reducing the number of values) is fairly expensive, as
+ * it requires calling the distance functions, sorting etc. So when building
+ * the summary, we use a significantly larger buffer, and only enforce the
+ * exact limit at the very end. This improves performance, and it also helps
+ * with building better ranges (due to the greedy approach).
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/brin/brin_minmax_multi.c
+ */
+#include "postgres.h"
+
+/* needed for PGSQL_AF_INET */
+#include <sys/socket.h>
+
+#include "access/genam.h"
+#include "access/brin.h"
+#include "access/brin_internal.h"
+#include "access/brin_tuple.h"
+#include "access/reloptions.h"
+#include "access/stratnum.h"
+#include "access/htup_details.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_amop.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/date.h"
+#include "utils/datum.h"
+#include "utils/float.h"
+#include "utils/inet.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/numeric.h"
+#include "utils/pg_lsn.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+#include "utils/uuid.h"
+
+/*
+ * Additional SQL level support functions
+ *
+ * Procedure numbers must not use values reserved for BRIN itself; see
+ * brin_internal.h.
+ */
+#define		MINMAX_MAX_PROCNUMS		1	/* maximum support procs we need */
+#define		PROCNUM_DISTANCE		11	/* required, distance between values */
+
+/*
+ * Subtract this from procnum to obtain index in MinmaxMultiOpaque arrays
+ * (Must be equal to minimum of private procnums).
+ */
+#define		PROCNUM_BASE			11
+
+/*
+ * Sizing the insert buffer - we use 10x the number of values specified
+ * in the reloption, but we cap it to 8192 not to get too large. When
+ * the buffer gets full, we reduce the number of values by half.
+ */
+#define		MINMAX_BUFFER_FACTOR			10
+#define		MINMAX_BUFFER_MIN				256
+#define		MINMAX_BUFFER_MAX				8192
+#define		MINMAX_BUFFER_LOAD_FACTOR		0.5
+
+typedef struct MinmaxMultiOpaque
+{
+	FmgrInfo	extra_procinfos[MINMAX_MAX_PROCNUMS];
+	bool		extra_proc_missing[MINMAX_MAX_PROCNUMS];
+	Oid			cached_subtype;
+	FmgrInfo	strategy_procinfos[BTMaxStrategyNumber];
+} MinmaxMultiOpaque;
+
+/*
+ * Storage type for BRIN's minmax reloptions
+ */
+typedef struct MinMaxMultiOptions
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	int			valuesPerRange; /* number of values per range */
+} MinMaxMultiOptions;
+
+#define MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE		32
+
+#define MinMaxMultiGetValuesPerRange(opts) \
+		((opts) && (((MinMaxMultiOptions *) (opts))->valuesPerRange != 0) ? \
+		 ((MinMaxMultiOptions *) (opts))->valuesPerRange : \
+		 MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE)
+
+#define SAMESIGN(a,b) (((a) < 0) == ((b) < 0))
+
+/*
+ * The summary of minmax-multi indexes has two representations - Ranges for
+ * convenient processing, and SerializedRanges for storage in bytea value.
+ *
+ * The Ranges struct stores the boundary values in a single array, but we
+ * treat regular and single-point ranges differently to save space. For
+ * regular ranges (with different boundary values) we have to store both
+ * the lower and upper bound of the range, while for "single-point ranges"
+ * we only need to store a single value.
+ *
+ * The 'values' array stores boundary values for regular ranges first (there
+ * are 2*nranges values to store), and then the nvalues boundary values for
+ * single-point ranges. That is, we have (2*nranges + nvalues) boundary
+ * values in the array.
+ *
+ * +-------------------------+----------------------------------+
+ * | ranges (2 * nranges of) | single point values (nvalues of) |
+ * +-------------------------+----------------------------------+
+ *
+ * This allows us to quickly add new values, and store outliers without
+ * having to widen any of the existing range values.
+ *
+ * 'nsorted' denotes how many of 'nvalues' in the values[] array are sorted.
+ * When nsorted == nvalues, all single point values are sorted.
+ *
+ * We never store more than maxvalues values (as set by values_per_range
+ * reloption). If needed we merge some of the ranges.
+ *
+ * To minimize palloc overhead, we always allocate the full array with
+ * space for maxvalues elements. This should be fine as long as the
+ * maxvalues is reasonably small (64 seems fine), which is the case
+ * thanks to values_per_range reloption being limited to 256.
+ */
+typedef struct Ranges
+{
+	/* Cache information that we need quite often. */
+	Oid			typid;
+	Oid			colloid;
+	AttrNumber	attno;
+	FmgrInfo   *cmp;
+
+	/* (2*nranges + nvalues) <= maxvalues */
+	int			nranges;		/* number of ranges in the values[] array */
+	int			nsorted;		/* number of nvalues which are sorted */
+	int			nvalues;		/* number of point values in values[] array */
+	int			maxvalues;		/* number of elements in the values[] array */
+
+	/*
+	 * We simply add the values into a large buffer, without any expensive
+	 * steps (sorting, deduplication, ...). The buffer is a multiple of the
+	 * target number of values, so the compaction happens less often,
+	 * amortizing the costs. We keep the actual target and compact to the
+	 * requested number of values at the very end, before serializing to
+	 * on-disk representation.
+	 */
+	/* requested number of values */
+	int			target_maxvalues;
+
+	/* values stored for this range - either raw values, or ranges */
+	Datum		values[FLEXIBLE_ARRAY_MEMBER];
+} Ranges;
+
+/*
+ * On-disk the summary is stored as a bytea value, with a simple header
+ * with basic metadata, followed by the boundary values. It has a varlena
+ * header, so can be treated as varlena directly.
+ *
+ * See brin_range_serialize/brin_range_deserialize for serialization details.
+ */
+typedef struct SerializedRanges
+{
+	/* varlena header (do not touch directly!) */
+	int32		vl_len_;
+
+	/* type of values stored in the data array */
+	Oid			typid;
+
+	/* (2*nranges + nvalues) <= maxvalues */
+	int			nranges;		/* number of ranges in the array (stored) */
+	int			nvalues;		/* number of values in the data array (all) */
+	int			maxvalues;		/* maximum number of values (reloption) */
+
+	/* contains the actual data */
+	char		data[FLEXIBLE_ARRAY_MEMBER];
+} SerializedRanges;
+
+static SerializedRanges *brin_range_serialize(Ranges *range);
+
+static Ranges *brin_range_deserialize(int maxvalues, SerializedRanges *range);
+
+
+/*
+ * Used to represent ranges expanded to make merging and combining easier.
+ *
+ * Each expanded range is essentially an interval, represented by min/max
+ * values, along with a flag whether it's a collapsed range (in which case
+ * the min and max values are equal). We have the flag to handle by-ref
+ * data types - we can't simply compare the datums, and this saves some
+ * calls to the type-specific comparator function.
+ */
+typedef struct ExpandedRange
+{
+	Datum		minval;			/* lower boundary */
+	Datum		maxval;			/* upper boundary */
+	bool		collapsed;		/* true if minval==maxval */
+} ExpandedRange;
+
+/*
+ * Represents a distance between two ranges (identified by index into
+ * an array of extended ranges).
+ */
+typedef struct DistanceValue
+{
+	int			index;
+	double		value;
+} DistanceValue;
+
+
+/* Cache for support and strategy procedures. */
+
+static FmgrInfo *minmax_multi_get_procinfo(BrinDesc *bdesc, uint16 attno,
+										   uint16 procnum);
+
+static FmgrInfo *minmax_multi_get_strategy_procinfo(BrinDesc *bdesc,
+													uint16 attno, Oid subtype,
+													uint16 strategynum);
+
+typedef struct compare_context
+{
+	FmgrInfo   *cmpFn;
+	Oid			colloid;
+} compare_context;
+
+static int	compare_values(const void *a, const void *b, void *arg);
+
+
+#ifdef USE_ASSERT_CHECKING
+/*
+ * Check that the order of the array values is correct, using the cmp
+ * function (which should be BTLessStrategyNumber).
+ */
+static void
+AssertArrayOrder(FmgrInfo *cmp, Oid colloid, Datum *values, int nvalues)
+{
+	int			i;
+	Datum		lt;
+
+	for (i = 0; i < (nvalues - 1); i++)
+	{
+		lt = FunctionCall2Coll(cmp, colloid, values[i], values[i + 1]);
+		Assert(DatumGetBool(lt));
+	}
+}
+#endif
+
+/*
+ * Comprehensive check of the Ranges structure.
+ */
+static void
+AssertCheckRanges(Ranges *ranges, FmgrInfo *cmpFn, Oid colloid)
+{
+#ifdef USE_ASSERT_CHECKING
+	int			i;
+
+	/* some basic sanity checks */
+	Assert(ranges->nranges >= 0);
+	Assert(ranges->nsorted >= 0);
+	Assert(ranges->nvalues >= ranges->nsorted);
+	Assert(ranges->maxvalues >= 2 * ranges->nranges + ranges->nvalues);
+	Assert(ranges->typid != InvalidOid);
+
+	/*
+	 * First the ranges - there are 2*nranges boundary values, and the values
+	 * have to be strictly ordered (equal values would mean the range is
+	 * collapsed, and should be stored as a point). This also guarantees that
+	 * the ranges do not overlap.
+	 */
+	AssertArrayOrder(cmpFn, colloid, ranges->values, 2 * ranges->nranges);
+
+	/* then the single-point ranges (with nvalues boundary values ) */
+	AssertArrayOrder(cmpFn, colloid, &ranges->values[2 * ranges->nranges],
+					 ranges->nsorted);
+
+	/*
+	 * Check that none of the values are not covered by ranges (both sorted
+	 * and unsorted)
+	 */
+	if (ranges->nranges > 0)
+	{
+		for (i = 0; i < ranges->nvalues; i++)
+		{
+			Datum		compar;
+			int			start,
+						end;
+			Datum		minvalue = ranges->values[0];
+			Datum		maxvalue = ranges->values[2 * ranges->nranges - 1];
+			Datum		value = ranges->values[2 * ranges->nranges + i];
+
+			compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue);
+
+			/*
+			 * If the value is smaller than the lower bound in the first range
+			 * then it cannot possibly be in any of the ranges.
+			 */
+			if (DatumGetBool(compar))
+				continue;
+
+			compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value);
+
+			/*
+			 * Likewise, if the value is larger than the upper bound of the
+			 * final range, then it cannot possibly be inside any of the
+			 * ranges.
+			 */
+			if (DatumGetBool(compar))
+				continue;
+
+			/* bsearch the ranges to see if 'value' fits within any of them */
+			start = 0;			/* first range */
+			end = ranges->nranges - 1;	/* last range */
+			while (true)
+			{
+				int			midpoint = (start + end) / 2;
+
+				/* this means we ran out of ranges in the last step */
+				if (start > end)
+					break;
+
+				/* copy the min/max values from the ranges */
+				minvalue = ranges->values[2 * midpoint];
+				maxvalue = ranges->values[2 * midpoint + 1];
+
+				/*
+				 * Is the value smaller than the minval? If yes, we'll recurse
+				 * to the left side of range array.
+				 */
+				compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue);
+
+				/* smaller than the smallest value in this range */
+				if (DatumGetBool(compar))
+				{
+					end = (midpoint - 1);
+					continue;
+				}
+
+				/*
+				 * Is the value greater than the minval? If yes, we'll recurse
+				 * to the right side of range array.
+				 */
+				compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value);
+
+				/* larger than the largest value in this range */
+				if (DatumGetBool(compar))
+				{
+					start = (midpoint + 1);
+					continue;
+				}
+
+				/* hey, we found a matching range */
+				Assert(false);
+			}
+		}
+	}
+
+	/* and values in the unsorted part must not be in the sorted part */
+	if (ranges->nsorted > 0)
+	{
+		compare_context cxt;
+
+		cxt.colloid = ranges->colloid;
+		cxt.cmpFn = ranges->cmp;
+
+		for (i = ranges->nsorted; i < ranges->nvalues; i++)
+		{
+			Datum		value = ranges->values[2 * ranges->nranges + i];
+
+			Assert(bsearch_arg(&value, &ranges->values[2 * ranges->nranges],
+							   ranges->nsorted, sizeof(Datum),
+							   compare_values, (void *) &cxt) == NULL);
+		}
+	}
+#endif
+}
+
+/*
+ * Check that the expanded ranges (built when reducing the number of ranges
+ * by combining some of them) are correctly sorted and do not overlap.
+ */
+static void
+AssertCheckExpandedRanges(BrinDesc *bdesc, Oid colloid, AttrNumber attno,
+						  Form_pg_attribute attr, ExpandedRange *ranges,
+						  int nranges)
+{
+#ifdef USE_ASSERT_CHECKING
+	int			i;
+	FmgrInfo   *eq;
+	FmgrInfo   *lt;
+
+	eq = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
+											BTEqualStrategyNumber);
+
+	lt = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
+											BTLessStrategyNumber);
+
+	/*
+	 * Each range independently should be valid, i.e. that for the boundary
+	 * values (lower <= upper).
+	 */
+	for (i = 0; i < nranges; i++)
+	{
+		Datum		r;
+		Datum		minval = ranges[i].minval;
+		Datum		maxval = ranges[i].maxval;
+
+		if (ranges[i].collapsed)	/* collapsed: minval == maxval */
+			r = FunctionCall2Coll(eq, colloid, minval, maxval);
+		else					/* non-collapsed: minval < maxval */
+			r = FunctionCall2Coll(lt, colloid, minval, maxval);
+
+		Assert(DatumGetBool(r));
+	}
+
+	/*
+	 * And the ranges should be ordered and must not overlap, i.e. upper <
+	 * lower for boundaries of consecutive ranges.
+	 */
+	for (i = 0; i < nranges - 1; i++)
+	{
+		Datum		r;
+		Datum		maxval = ranges[i].maxval;
+		Datum		minval = ranges[i + 1].minval;
+
+		r = FunctionCall2Coll(lt, colloid, maxval, minval);
+
+		Assert(DatumGetBool(r));
+	}
+#endif
+}
+
+
+/*
+ * minmax_multi_init
+ * 		Initialize the deserialized range list, allocate all the memory.
+ *
+ * This is only in-memory representation of the ranges, so we allocate
+ * enough space for the maximum number of values (so as not to have to do
+ * repallocs as the ranges grow).
+ */
+static Ranges *
+minmax_multi_init(int maxvalues)
+{
+	Size		len;
+	Ranges	   *ranges;
+
+	Assert(maxvalues > 0);
+
+	len = offsetof(Ranges, values); /* fixed header */
+	len += maxvalues * sizeof(Datum);	/* Datum values */
+
+	ranges = (Ranges *) palloc0(len);
+
+	ranges->maxvalues = maxvalues;
+
+	return ranges;
+}
+
+
+/*
+ * range_deduplicate_values
+ *		Deduplicate the part with values in the simple points.
+ *
+ * This is meant to be a cheaper way of reducing the size of the ranges. It
+ * does not touch the ranges, and only sorts the other values - it does not
+ * call the distance functions, which may be quite expensive, etc.
+ *
+ * We do know the values are not duplicate with the ranges, because we check
+ * that before adding a new value. Same for the sorted part of values.
+ */
+static void
+range_deduplicate_values(Ranges *range)
+{
+	int			i,
+				n;
+	int			start;
+	compare_context cxt;
+
+	/*
+	 * If there are no unsorted values, we're done (this probably can't
+	 * happen, as we're adding values to unsorted part).
+	 */
+	if (range->nsorted == range->nvalues)
+		return;
+
+	/* sort the values */
+	cxt.colloid = range->colloid;
+	cxt.cmpFn = range->cmp;
+
+	/* the values start right after the ranges (which are always sorted) */
+	start = 2 * range->nranges;
+
+	/*
+	 * XXX This might do a merge sort, to leverage that the first part of the
+	 * array is already sorted. If the sorted part is large, it might be quite
+	 * a bit faster.
+	 */
+	qsort_arg(&range->values[start],
+			  range->nvalues, sizeof(Datum),
+			  compare_values, (void *) &cxt);
+
+	n = 1;
+	for (i = 1; i < range->nvalues; i++)
+	{
+		/* same as preceding value, so store it */
+		if (compare_values(&range->values[start + i - 1],
+						   &range->values[start + i],
+						   (void *) &cxt) == 0)
+			continue;
+
+		range->values[start + n] = range->values[start + i];
+
+		n++;
+	}
+
+	/* now all the values are sorted */
+	range->nvalues = n;
+	range->nsorted = n;
+
+	AssertCheckRanges(range, range->cmp, range->colloid);
+}
+
+
+/*
+ * brin_range_serialize
+ *	  Serialize the in-memory representation into a compact varlena value.
+ *
+ * Simply copy the header and then also the individual values, as stored
+ * in the in-memory value array.
+ */
+static SerializedRanges *
+brin_range_serialize(Ranges *range)
+{
+	Size		len;
+	int			nvalues;
+	SerializedRanges *serialized;
+	Oid			typid;
+	int			typlen;
+	bool		typbyval;
+
+	int			i;
+	char	   *ptr;
+
+	/* simple sanity checks */
+	Assert(range->nranges >= 0);
+	Assert(range->nsorted >= 0);
+	Assert(range->nvalues >= 0);
+	Assert(range->maxvalues > 0);
+	Assert(range->target_maxvalues > 0);
+
+	/* at this point the range should be compacted to the target size */
+	Assert(2 * range->nranges + range->nvalues <= range->target_maxvalues);
+
+	Assert(range->target_maxvalues <= range->maxvalues);
+
+	/* range boundaries are always sorted */
+	Assert(range->nvalues >= range->nsorted);
+
+	/* deduplicate values, if there's unsorted part */
+	range_deduplicate_values(range);
+
+	/* see how many Datum values we actually have */
+	nvalues = 2 * range->nranges + range->nvalues;
+
+	typid = range->typid;
+	typbyval = get_typbyval(typid);
+	typlen = get_typlen(typid);
+
+	/* header is always needed */
+	len = offsetof(SerializedRanges, data);
+
+	/*
+	 * The space needed depends on data type - for fixed-length data types
+	 * (by-value and some by-reference) it's pretty simple, just multiply
+	 * (attlen * nvalues) and we're done. For variable-length by-reference
+	 * types we need to actually walk all the values and sum the lengths.
+	 */
+	if (typlen == -1)			/* varlena */
+	{
+		int			i;
+
+		for (i = 0; i < nvalues; i++)
+		{
+			len += VARSIZE_ANY(range->values[i]);
+		}
+	}
+	else if (typlen == -2)		/* cstring */
+	{
+		int			i;
+
+		for (i = 0; i < nvalues; i++)
+		{
+			/* don't forget to include the null terminator ;-) */
+			len += strlen(DatumGetCString(range->values[i])) + 1;
+		}
+	}
+	else						/* fixed-length types (even by-reference) */
+	{
+		Assert(typlen > 0);
+		len += nvalues * typlen;
+	}
+
+	/*
+	 * Allocate the serialized object, copy the basic information. The
+	 * serialized object is a varlena, so update the header.
+	 */
+	serialized = (SerializedRanges *) palloc0(len);
+	SET_VARSIZE(serialized, len);
+
+	serialized->typid = typid;
+	serialized->nranges = range->nranges;
+	serialized->nvalues = range->nvalues;
+	serialized->maxvalues = range->target_maxvalues;
+
+	/*
+	 * And now copy also the boundary values (like the length calculation this
+	 * depends on the particular data type).
+	 */
+	ptr = serialized->data;		/* start of the serialized data */
+
+	for (i = 0; i < nvalues; i++)
+	{
+		if (typbyval)			/* simple by-value data types */
+		{
+			Datum		tmp;
+
+			/*
+			 * For byval types, we need to copy just the significant bytes -
+			 * we can't use memcpy directly, as that assumes little-endian
+			 * behavior.  store_att_byval does almost what we need, but it
+			 * requires a properly aligned buffer - the output buffer does not
+			 * guarantee that. So we simply use a local Datum variable (which
+			 * guarantees proper alignment), and then copy the value from it.
+			 */
+			store_att_byval(&tmp, range->values[i], typlen);
+
+			memcpy(ptr, &tmp, typlen);
+			ptr += typlen;
+		}
+		else if (typlen > 0)	/* fixed-length by-ref types */
+		{
+			memcpy(ptr, DatumGetPointer(range->values[i]), typlen);
+			ptr += typlen;
+		}
+		else if (typlen == -1)	/* varlena */
+		{
+			int			tmp = VARSIZE_ANY(DatumGetPointer(range->values[i]));
+
+			memcpy(ptr, DatumGetPointer(range->values[i]), tmp);
+			ptr += tmp;
+		}
+		else if (typlen == -2)	/* cstring */
+		{
+			int			tmp = strlen(DatumGetCString(range->values[i])) + 1;
+
+			memcpy(ptr, DatumGetCString(range->values[i]), tmp);
+			ptr += tmp;
+		}
+
+		/* make sure we haven't overflown the buffer end */
+		Assert(ptr <= ((char *) serialized + len));
+	}
+
+	/* exact size */
+	Assert(ptr == ((char *) serialized + len));
+
+	return serialized;
+}
+
+/*
+ * brin_range_deserialize
+ *	  Serialize the in-memory representation into a compact varlena value.
+ *
+ * Simply copy the header and then also the individual values, as stored
+ * in the in-memory value array.
+ */
+static Ranges *
+brin_range_deserialize(int maxvalues, SerializedRanges *serialized)
+{
+	int			i,
+				nvalues;
+	char	   *ptr,
+			   *dataptr;
+	bool		typbyval;
+	int			typlen;
+	Size		datalen;
+
+	Ranges	   *range;
+
+	Assert(serialized->nranges >= 0);
+	Assert(serialized->nvalues >= 0);
+	Assert(serialized->maxvalues > 0);
+
+	nvalues = 2 * serialized->nranges + serialized->nvalues;
+
+	Assert(nvalues <= serialized->maxvalues);
+	Assert(serialized->maxvalues <= maxvalues);
+
+	range = minmax_multi_init(maxvalues);
+
+	/* copy the header info */
+	range->nranges = serialized->nranges;
+	range->nvalues = serialized->nvalues;
+	range->nsorted = serialized->nvalues;
+	range->maxvalues = maxvalues;
+	range->target_maxvalues = serialized->maxvalues;
+
+	range->typid = serialized->typid;
+
+	typbyval = get_typbyval(serialized->typid);
+	typlen = get_typlen(serialized->typid);
+
+	/*
+	 * And now deconstruct the values into Datum array. We have to copy the
+	 * data because the serialized representation ignores alignment, and we
+	 * don't want to rely on it being kept around anyway.
+	 */
+	ptr = serialized->data;
+
+	/*
+	 * We don't want to allocate many pieces, so we just allocate everything
+	 * in one chunk. How much space will we need?
+	 *
+	 * XXX We don't need to copy simple by-value data types.
+	 */
+	datalen = 0;
+	dataptr = NULL;
+	for (i = 0; (i < nvalues) && (!typbyval); i++)
+	{
+		if (typlen > 0)			/* fixed-length by-ref types */
+			datalen += MAXALIGN(typlen);
+		else if (typlen == -1)	/* varlena */
+		{
+			datalen += MAXALIGN(VARSIZE_ANY(DatumGetPointer(ptr)));
+			ptr += VARSIZE_ANY(DatumGetPointer(ptr));
+		}
+		else if (typlen == -2)	/* cstring */
+		{
+			Size		slen = strlen(DatumGetCString(ptr)) + 1;
+
+			datalen += MAXALIGN(slen);
+			ptr += slen;
+		}
+	}
+
+	if (datalen > 0)
+		dataptr = palloc(datalen);
+
+	/*
+	 * Restore the source pointer (might have been modified when calculating
+	 * the space we need to allocate).
+	 */
+	ptr = serialized->data;
+
+	for (i = 0; i < nvalues; i++)
+	{
+		if (typbyval)			/* simple by-value data types */
+		{
+			Datum		v = 0;
+
+			memcpy(&v, ptr, typlen);
+
+			range->values[i] = fetch_att(&v, true, typlen);
+			ptr += typlen;
+		}
+		else if (typlen > 0)	/* fixed-length by-ref types */
+		{
+			range->values[i] = PointerGetDatum(dataptr);
+
+			memcpy(dataptr, ptr, typlen);
+			dataptr += MAXALIGN(typlen);
+
+			ptr += typlen;
+		}
+		else if (typlen == -1)	/* varlena */
+		{
+			range->values[i] = PointerGetDatum(dataptr);
+
+			memcpy(dataptr, ptr, VARSIZE_ANY(ptr));
+			dataptr += MAXALIGN(VARSIZE_ANY(ptr));
+			ptr += VARSIZE_ANY(ptr);
+		}
+		else if (typlen == -2)	/* cstring */
+		{
+			Size		slen = strlen(ptr) + 1;
+
+			range->values[i] = PointerGetDatum(dataptr);
+
+			memcpy(dataptr, ptr, slen);
+			dataptr += MAXALIGN(slen);
+			ptr += slen;
+		}
+
+		/* make sure we haven't overflown the buffer end */
+		Assert(ptr <= ((char *) serialized + VARSIZE_ANY(serialized)));
+	}
+
+	/* should have consumed the whole input value exactly */
+	Assert(ptr == ((char *) serialized + VARSIZE_ANY(serialized)));
+
+	/* return the deserialized value */
+	return range;
+}
+
+/*
+ * compare_expanded_ranges
+ *	  Compare the expanded ranges - first by minimum, then by maximum.
+ *
+ * We do guarantee that ranges in a single Ranges object do not overlap, so it
+ * may seem strange that we don't order just by minimum. But when merging two
+ * Ranges (which happens in the union function), the ranges may in fact
+ * overlap. So we do compare both.
+ */
+static int
+compare_expanded_ranges(const void *a, const void *b, void *arg)
+{
+	ExpandedRange *ra = (ExpandedRange *) a;
+	ExpandedRange *rb = (ExpandedRange *) b;
+	Datum		r;
+
+	compare_context *cxt = (compare_context *) arg;
+
+	/* first compare minvals */
+	r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->minval, rb->minval);
+
+	if (DatumGetBool(r))
+		return -1;
+
+	r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->minval, ra->minval);
+
+	if (DatumGetBool(r))
+		return 1;
+
+	/* then compare maxvals */
+	r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->maxval, rb->maxval);
+
+	if (DatumGetBool(r))
+		return -1;
+
+	r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->maxval, ra->maxval);
+
+	if (DatumGetBool(r))
+		return 1;
+
+	return 0;
+}
+
+/*
+ * compare_values
+ *	  Compare the values.
+ */
+static int
+compare_values(const void *a, const void *b, void *arg)
+{
+	Datum	   *da = (Datum *) a;
+	Datum	   *db = (Datum *) b;
+	Datum		r;
+
+	compare_context *cxt = (compare_context *) arg;
+
+	r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *da, *db);
+
+	if (DatumGetBool(r))
+		return -1;
+
+	r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *db, *da);
+
+	if (DatumGetBool(r))
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Check if the new value matches one of the existing ranges.
+ */
+static bool
+has_matching_range(BrinDesc *bdesc, Oid colloid, Ranges *ranges,
+				   Datum newval, AttrNumber attno, Oid typid)
+{
+	Datum		compar;
+
+	Datum		minvalue;
+	Datum		maxvalue;
+
+	FmgrInfo   *cmpLessFn;
+	FmgrInfo   *cmpGreaterFn;
+
+	/* binary search on ranges */
+	int			start,
+				end;
+
+	if (ranges->nranges == 0)
+		return false;
+
+	minvalue = ranges->values[0];
+	maxvalue = ranges->values[2 * ranges->nranges - 1];
+
+	/*
+	 * Otherwise, need to compare the new value with boundaries of all the
+	 * ranges. First check if it's less than the absolute minimum, which is
+	 * the first value in the array.
+	 */
+	cmpLessFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
+												   BTLessStrategyNumber);
+	compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue);
+
+	/* smaller than the smallest value in the range list */
+	if (DatumGetBool(compar))
+		return false;
+
+	/*
+	 * And now compare it to the existing maximum (last value in the data
+	 * array). But only if we haven't already ruled out a possible match in
+	 * the minvalue check.
+	 */
+	cmpGreaterFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
+													  BTGreaterStrategyNumber);
+	compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue);
+
+	if (DatumGetBool(compar))
+		return false;
+
+	/*
+	 * So we know it's in the general min/max, the question is whether it
+	 * falls in one of the ranges or gaps. We'll do a binary search on
+	 * individual ranges - for each range we check equality (value falls into
+	 * the range), and then check ranges either above or below the current
+	 * range.
+	 */
+	start = 0;					/* first range */
+	end = (ranges->nranges - 1);	/* last range */
+	while (true)
+	{
+		int			midpoint = (start + end) / 2;
+
+		/* this means we ran out of ranges in the last step */
+		if (start > end)
+			return false;
+
+		/* copy the min/max values from the ranges */
+		minvalue = ranges->values[2 * midpoint];
+		maxvalue = ranges->values[2 * midpoint + 1];
+
+		/*
+		 * Is the value smaller than the minval? If yes, we'll recurse to the
+		 * left side of range array.
+		 */
+		compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue);
+
+		/* smaller than the smallest value in this range */
+		if (DatumGetBool(compar))
+		{
+			end = (midpoint - 1);
+			continue;
+		}
+
+		/*
+		 * Is the value greater than the minval? If yes, we'll recurse to the
+		 * right side of range array.
+		 */
+		compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue);
+
+		/* larger than the largest value in this range */
+		if (DatumGetBool(compar))
+		{
+			start = (midpoint + 1);
+			continue;
+		}
+
+		/* hey, we found a matching range */
+		return true;
+	}
+
+	return false;
+}
+
+
+/*
+ * range_contains_value
+ * 		See if the new value is already contained in the range list.
+ *
+ * We first inspect the list of intervals. We use a small trick - we check
+ * the value against min/max of the whole range (min of the first interval,
+ * max of the last one) first, and only inspect the individual intervals if
+ * this passes.
+ *
+ * If the value matches none of the intervals, we check the exact values.
+ * We simply loop through them and invoke equality operator on them.
+ *
+ * The last parameter (full) determines whether we need to search all the
+ * values, including the unsorted part. With full=false, the unsorted part
+ * is not searched, which may produce false negatives and duplicate values
+ * (in the unsorted part only), but when we're building the range that's
+ * fine - we'll deduplicate before serialization, and it can only happen
+ * if there already are unsorted values (so it was already modified).
+ *
+ * Serialized ranges don't have any unsorted values, so this can't cause
+ * false negatives during querying.
+ */
+static bool
+range_contains_value(BrinDesc *bdesc, Oid colloid,
+					 AttrNumber attno, Form_pg_attribute attr,
+					 Ranges *ranges, Datum newval, bool full)
+{
+	int			i;
+	FmgrInfo   *cmpEqualFn;
+	Oid			typid = attr->atttypid;
+
+	/*
+	 * First inspect the ranges, if there are any. We first check the whole
+	 * range, and only when there's still a chance of getting a match we
+	 * inspect the individual ranges.
+	 */
+	if (has_matching_range(bdesc, colloid, ranges, newval, attno, typid))
+		return true;
+
+	cmpEqualFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
+													BTEqualStrategyNumber);
+
+	/*
+	 * There is no matching range, so let's inspect the sorted values.
+	 *
+	 * We do a sequential search for small numbers of values, and binary
+	 * search once we have more than 16 values. This threshold is somewhat
+	 * arbitrary, as it depends on how expensive the comparison function is.
+	 *
+	 * XXX If we use the threshold here, maybe we should do the same thing in
+	 * has_matching_range? Or maybe we should do the bin search all the time?
+	 *
+	 * XXX We could use the same optimization as for ranges, to check if the
+	 * value is between min/max, to maybe rule out all sorted values without
+	 * having to inspect all of them.
+	 */
+	if (ranges->nsorted >= 16)
+	{
+		compare_context cxt;
+
+		cxt.colloid = ranges->colloid;
+		cxt.cmpFn = ranges->cmp;
+
+		if (bsearch_arg(&newval, &ranges->values[2 * ranges->nranges],
+						ranges->nsorted, sizeof(Datum),
+						compare_values, (void *) &cxt) != NULL)
+			return true;
+	}
+	else
+	{
+		for (i = 2 * ranges->nranges; i < 2 * ranges->nranges + ranges->nsorted; i++)
+		{
+			Datum		compar;
+
+			compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]);
+
+			/* found an exact match */
+			if (DatumGetBool(compar))
+				return true;
+		}
+	}
+
+	/* If not asked to inspect the unsorted part, we're done. */
+	if (!full)
+		return false;
+
+	/* Inspect the unsorted part. */
+	for (i = 2 * ranges->nranges + ranges->nsorted; i < 2 * ranges->nranges + ranges->nvalues; i++)
+	{
+		Datum		compar;
+
+		compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]);
+
+		/* found an exact match */
+		if (DatumGetBool(compar))
+			return true;
+	}
+
+	/* the value is not covered by this BRIN tuple */
+	return false;
+}
+
+/*
+ * Expand ranges from Ranges into ExpandedRange array. This expects the
+ * eranges to be pre-allocated and with the correct size - there needs to be
+ * (nranges + nvalues) elements.
+ *
+ * The order of expanded ranges is arbitrary. We do expand the ranges first,
+ * and this part is sorted. But then we expand the values, and this part may
+ * be unsorted.
+ */
+static void
+fill_expanded_ranges(ExpandedRange *eranges, int neranges, Ranges *ranges)
+{
+	int			idx;
+	int			i;
+
+	/* Check that the output array has the right size. */
+	Assert(neranges == (ranges->nranges + ranges->nvalues));
+
+	idx = 0;
+	for (i = 0; i < ranges->nranges; i++)
+	{
+		eranges[idx].minval = ranges->values[2 * i];
+		eranges[idx].maxval = ranges->values[2 * i + 1];
+		eranges[idx].collapsed = false;
+		idx++;
+
+		Assert(idx <= neranges);
+	}
+
+	for (i = 0; i < ranges->nvalues; i++)
+	{
+		eranges[idx].minval = ranges->values[2 * ranges->nranges + i];
+		eranges[idx].maxval = ranges->values[2 * ranges->nranges + i];
+		eranges[idx].collapsed = true;
+		idx++;
+
+		Assert(idx <= neranges);
+	}
+
+	/* Did we produce the expected number of elements? */
+	Assert(idx == neranges);
+
+	return;
+}
+
+/*
+ * Sort and deduplicate expanded ranges.
+ *
+ * The ranges may be deduplicated - we're simply appending values, without
+ * checking for duplicates etc. So maybe the deduplication will reduce the
+ * number of ranges enough, and we won't have to compute the distances etc.
+ *
+ * Returns the number of expanded ranges.
+ */
+static int
+sort_expanded_ranges(FmgrInfo *cmp, Oid colloid,
+					 ExpandedRange *eranges, int neranges)
+{
+	int			n;
+	int			i;
+	compare_context cxt;
+
+	Assert(neranges > 0);
+
+	/* sort the values */
+	cxt.colloid = colloid;
+	cxt.cmpFn = cmp;
+
+	/*
+	 * XXX We do qsort on all the values, but we could also leverage the fact
+	 * that some of the input data is already sorted (all the ranges and maybe
+	 * some of the points) and do merge sort.
+	 */
+	qsort_arg(eranges, neranges, sizeof(ExpandedRange),
+			  compare_expanded_ranges, (void *) &cxt);
+
+	/*
+	 * Deduplicate the ranges - simply compare each range to the preceding
+	 * one, and skip the duplicate ones.
+	 */
+	n = 1;
+	for (i = 1; i < neranges; i++)
+	{
+		/* if the current range is equal to the preceding one, do nothing */
+		if (!compare_expanded_ranges(&eranges[i - 1], &eranges[i], (void *) &cxt))
+			continue;
+
+		/* otherwise, copy it to n-th place (if not already there) */
+		if (i != n)
+			memcpy(&eranges[n], &eranges[i], sizeof(ExpandedRange));
+
+		n++;
+	}
+
+	Assert((n > 0) && (n <= neranges));
+
+	return n;
+}
+
+/*
+ * When combining multiple Range values (in union function), some of the
+ * ranges may overlap. We simply merge the overlapping ranges to fix that.
+ *
+ * XXX This assumes the expanded ranges were previously sorted (by minval
+ * and then maxval). We leverage this when detecting overlap.
+ */
+static int
+merge_overlapping_ranges(FmgrInfo *cmp, Oid colloid,
+						 ExpandedRange *eranges, int neranges)
+{
+	int			idx;
+
+	/* Merge ranges (idx) and (idx+1) if they overlap. */
+	idx = 0;
+	while (idx < (neranges - 1))
+	{
+		Datum		r;
+
+		/*
+		 * comparing [?,maxval] vs. [minval,?] - the ranges overlap if (minval
+		 * < maxval)
+		 */
+		r = FunctionCall2Coll(cmp, colloid,
+							  eranges[idx].maxval,
+							  eranges[idx + 1].minval);
+
+		/*
+		 * Nope, maxval < minval, so no overlap. And we know the ranges are
+		 * ordered, so there are no more overlaps, because all the remaining
+		 * ranges have greater or equal minval.
+		 */
+		if (DatumGetBool(r))
+		{
+			/* proceed to the next range */
+			idx += 1;
+			continue;
+		}
+
+		/*
+		 * So ranges 'idx' and 'idx+1' do overlap, but we don't know if
+		 * 'idx+1' is contained in 'idx', or if they overlap only partially.
+		 * So compare the upper bounds and keep the larger one.
+		 */
+		r = FunctionCall2Coll(cmp, colloid,
+							  eranges[idx].maxval,
+							  eranges[idx + 1].maxval);
+
+		if (DatumGetBool(r))
+			eranges[idx].maxval = eranges[idx + 1].maxval;
+
+		/*
+		 * The range certainly is no longer collapsed (irrespectively of the
+		 * previous state).
+		 */
+		eranges[idx].collapsed = false;
+
+		/*
+		 * Now get rid of the (idx+1) range entirely by shifting the remaining
+		 * ranges by 1. There are neranges elements, and we need to move
+		 * elements from (idx+2). That means the number of elements to move is
+		 * [ncranges - (idx+2)].
+		 */
+		memmove(&eranges[idx + 1], &eranges[idx + 2],
+				(neranges - (idx + 2)) * sizeof(ExpandedRange));
+
+		/*
+		 * Decrease the number of ranges, and repeat (with the same range, as
+		 * it might overlap with additional ranges thanks to the merge).
+		 */
+		neranges--;
+	}
+
+	return neranges;
+}
+
+/*
+ * Simple comparator for distance values, comparing the double value.
+ * This is intentionally sorting the distances in descending order, i.e.
+ * the longer gaps will be at the front.
+ */
+static int
+compare_distances(const void *a, const void *b)
+{
+	DistanceValue *da = (DistanceValue *) a;
+	DistanceValue *db = (DistanceValue *) b;
+
+	if (da->value < db->value)
+		return 1;
+	else if (da->value > db->value)
+		return -1;
+
+	return 0;
+}
+
+/*
+ * Given an array of expanded ranges, compute size of the gaps between each
+ * range.  For neranges there are (neranges-1) gaps.
+ *
+ * We simply call the "distance" function to compute the (max-min) for pairs
+ * of consecutive ranges. The function may be fairly expensive, so we do that
+ * just once (and then use it to pick as many ranges to merge as possible).
+ *
+ * See reduce_expanded_ranges for details.
+ */
+static DistanceValue *
+build_distances(FmgrInfo *distanceFn, Oid colloid,
+				ExpandedRange *eranges, int neranges)
+{
+	int			i;
+	int			ndistances;
+	DistanceValue *distances;
+
+	Assert(neranges > 0);
+
+	/* If there's only a single range, there's no distance to calculate. */
+	if (neranges == 1)
+		return NULL;
+
+	ndistances = (neranges - 1);
+	distances = (DistanceValue *) palloc0(sizeof(DistanceValue) * ndistances);
+
+	/*
+	 * Walk through the ranges once and compute the distance between the
+	 * ranges so that we can sort them once.
+	 */
+	for (i = 0; i < ndistances; i++)
+	{
+		Datum		a1,
+					a2,
+					r;
+
+		a1 = eranges[i].maxval;
+		a2 = eranges[i + 1].minval;
+
+		/* compute length of the gap (between max/min) */
+		r = FunctionCall2Coll(distanceFn, colloid, a1, a2);
+
+		/* remember the index of the gap the distance is for */
+		distances[i].index = i;
+		distances[i].value = DatumGetFloat8(r);
+	}
+
+	/*
+	 * Sort the distances in descending order, so that the longest gaps are at
+	 * the front.
+	 */
+	pg_qsort(distances, ndistances, sizeof(DistanceValue), compare_distances);
+
+	return distances;
+}
+
+/*
+ * Builds expanded ranges for the existing ranges (and single-point ranges),
+ * and also the new value (which did not fit into the array).  This expanded
+ * representation makes the processing a bit easier, as it allows handling
+ * ranges and points the same way.
+ *
+ * We sort and deduplicate the expanded ranges - this is necessary, because
+ * the points may be unsorted. And moreover the two parts (ranges and
+ * points) are sorted on their own.
+ */
+static ExpandedRange *
+build_expanded_ranges(FmgrInfo *cmp, Oid colloid, Ranges *ranges,
+					  int *nranges)
+{
+	int			neranges;
+	ExpandedRange *eranges;
+
+	/* both ranges and points are expanded into a separate element */
+	neranges = ranges->nranges + ranges->nvalues;
+
+	eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange));
+
+	/* fill the expanded ranges */
+	fill_expanded_ranges(eranges, neranges, ranges);
+
+	/* sort and deduplicate the expanded ranges */
+	neranges = sort_expanded_ranges(cmp, colloid, eranges, neranges);
+
+	/* remember how many ranges we built */
+	*nranges = neranges;
+
+	return eranges;
+}
+
+#ifdef USE_ASSERT_CHECKING
+/*
+ * Counts boundary values needed to store the ranges. Each single-point
+ * range is stored using a single value, each regular range needs two.
+ */
+static int
+count_values(ExpandedRange *cranges, int ncranges)
+{
+	int			i;
+	int			count;
+
+	count = 0;
+	for (i = 0; i < ncranges; i++)
+	{
+		if (cranges[i].collapsed)
+			count += 1;
+		else
+			count += 2;
+	}
+
+	return count;
+}
+#endif
+
+/*
+ * reduce_expanded_ranges
+ *		reduce the ranges until the number of values is low enough
+ *
+ * Combines ranges until the number of boundary values drops below the
+ * threshold specified by max_values. This happens by merging enough
+ * ranges by the distance between them.
+ *
+ * Returns the number of result ranges.
+ *
+ * We simply use the global min/max and then add boundaries for enough
+ * largest gaps. Each gap adds 2 values, so we simply use (target/2-1)
+ * distances. Then we simply sort all the values - each two values are
+ * a boundary of a range (possibly collapsed).
+ *
+ * XXX Some of the ranges may be collapsed (i.e. the min/max values are
+ * equal), but we ignore that for now. We could repeat the process,
+ * adding a couple more gaps recursively.
+ *
+ * XXX The ranges to merge are selected solely using the distance. But
+ * that may not be the best strategy, for example when multiple gaps
+ * are of equal (or very similar) length.
+ *
+ * Consider for example points 1, 2, 3, .., 64, which have gaps of the
+ * same length 1 of course. In that case, we tend to pick the first
+ * gap of that length, which leads to this:
+ *
+ *    step 1:  [1, 2], 3, 4, 5, .., 64
+ *    step 2:  [1, 3], 4, 5,    .., 64
+ *    step 3:  [1, 4], 5,       .., 64
+ *    ...
+ *
+ * So in the end we'll have one "large" range and multiple small points.
+ * That may be fine, but it seems a bit strange and non-optimal. Maybe
+ * we should consider other things when picking ranges to merge - e.g.
+ * length of the ranges? Or perhaps randomize the choice of ranges, with
+ * probability inversely proportional to the distance (the gap lengths
+ * may be very close, but not exactly the same).
+ *
+ * XXX Or maybe we could just handle this by using random value as a
+ * tie-break, or by adding random noise to the actual distance.
+ */
+static int
+reduce_expanded_ranges(ExpandedRange *eranges, int neranges,
+					   DistanceValue *distances, int max_values,
+					   FmgrInfo *cmp, Oid colloid)
+{
+	int			i;
+	int			nvalues;
+	Datum	   *values;
+
+	compare_context cxt;
+
+	/* total number of gaps between ranges */
+	int			ndistances = (neranges - 1);
+
+	/* number of gaps to keep */
+	int			keep = (max_values / 2 - 1);
+
+	/*
+	 * Maybe we have a sufficiently low number of ranges already?
+	 *
+	 * XXX This should happen before we actually do the expensive stuff like
+	 * sorting, so maybe this should be just an assert.
+	 */
+	if (keep >= ndistances)
+		return neranges;
+
+	/* sort the values */
+	cxt.colloid = colloid;
+	cxt.cmpFn = cmp;
+
+	/* allocate space for the boundary values */
+	nvalues = 0;
+	values = (Datum *) palloc(sizeof(Datum) * max_values);
+
+	/* add the global min/max values, from the first/last range */
+	values[nvalues++] = eranges[0].minval;
+	values[nvalues++] = eranges[neranges - 1].maxval;
+
+	/* add boundary values for enough gaps */
+	for (i = 0; i < keep; i++)
+	{
+		/* index of the gap between (index) and (index+1) ranges */
+		int			index = distances[i].index;
+
+		Assert((index >= 0) && ((index + 1) < neranges));
+
+		/* add max from the preceding range, minval from the next one */
+		values[nvalues++] = eranges[index].maxval;
+		values[nvalues++] = eranges[index + 1].minval;
+
+		Assert(nvalues <= max_values);
+	}
+
+	/* We should have an even number of range values. */
+	Assert(nvalues % 2 == 0);
+
+	/*
+	 * Sort the values using the comparator function, and form ranges from the
+	 * sorted result.
+	 */
+	qsort_arg(values, nvalues, sizeof(Datum),
+			  compare_values, (void *) &cxt);
+
+	/* We have nvalues boundary values, which means nvalues/2 ranges. */
+	for (i = 0; i < (nvalues / 2); i++)
+	{
+		eranges[i].minval = values[2 * i];
+		eranges[i].maxval = values[2 * i + 1];
+
+		/* if the boundary values are the same, it's a collapsed range */
+		eranges[i].collapsed = (compare_values(&values[2 * i],
+											   &values[2 * i + 1],
+											   &cxt) == 0);
+	}
+
+	return (nvalues / 2);
+}
+
+/*
+ * Store the boundary values from ExpandedRanges back into 'ranges' (using
+ * only the minimal number of values needed).
+ */
+static void
+store_expanded_ranges(Ranges *ranges, ExpandedRange *eranges, int neranges)
+{
+	int			i;
+	int			idx = 0;
+
+	/* first copy in the regular ranges */
+	ranges->nranges = 0;
+	for (i = 0; i < neranges; i++)
+	{
+		if (!eranges[i].collapsed)
+		{
+			ranges->values[idx++] = eranges[i].minval;
+			ranges->values[idx++] = eranges[i].maxval;
+			ranges->nranges++;
+		}
+	}
+
+	/* now copy in the collapsed ones */
+	ranges->nvalues = 0;
+	for (i = 0; i < neranges; i++)
+	{
+		if (eranges[i].collapsed)
+		{
+			ranges->values[idx++] = eranges[i].minval;
+			ranges->nvalues++;
+		}
+	}
+
+	/* all the values are sorted */
+	ranges->nsorted = ranges->nvalues;
+
+	Assert(count_values(eranges, neranges) == 2 * ranges->nranges + ranges->nvalues);
+	Assert(2 * ranges->nranges + ranges->nvalues <= ranges->maxvalues);
+}
+
+
+/*
+ * Consider freeing space in the ranges. Checks if there's space for at least
+ * one new value, and performs compaction if needed.
+ *
+ * Returns true if the value was actually modified.
+ */
+static bool
+ensure_free_space_in_buffer(BrinDesc *bdesc, Oid colloid,
+							AttrNumber attno, Form_pg_attribute attr,
+							Ranges *range)
+{
+	MemoryContext ctx;
+	MemoryContext oldctx;
+
+	FmgrInfo   *cmpFn,
+			   *distanceFn;
+
+	/* expanded ranges */
+	ExpandedRange *eranges;
+	int			neranges;
+	DistanceValue *distances;
+
+	/*
+	 * If there is free space in the buffer, we're done without having to
+	 * modify anything.
+	 */
+	if (2 * range->nranges + range->nvalues < range->maxvalues)
+		return false;
+
+	/* we'll certainly need the comparator, so just look it up now */
+	cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
+											   BTLessStrategyNumber);
+
+	/* deduplicate values, if there's an unsorted part */
+	range_deduplicate_values(range);
+
+	/*
+	 * Did we reduce enough free space by just the deduplication?
+	 *
+	 * We don't simply check against range->maxvalues again. The deduplication
+	 * might have freed very little space (e.g. just one value), forcing us to
+	 * do deduplication very often. In that case, it's better to do the
+	 * compaction and reduce more space.
+	 */
+	if (2 * range->nranges + range->nvalues <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR)
+		return true;
+
+	/*
+	 * We need to combine some of the existing ranges, to reduce the number of
+	 * values we have to store.
+	 *
+	 * The distanceFn calls (which may internally call e.g. numeric_le) may
+	 * allocate quite a bit of memory, and we must not leak it (we might have
+	 * to do this repeatedly, even for a single BRIN page range). Otherwise
+	 * we'd have problems e.g. when building new indexes. So we use a memory
+	 * context and make sure we free the memory at the end (so if we call the
+	 * distance function many times, it might be an issue, but meh).
+	 */
+	ctx = AllocSetContextCreate(CurrentMemoryContext,
+								"minmax-multi context",
+								ALLOCSET_DEFAULT_SIZES);
+
+	oldctx = MemoryContextSwitchTo(ctx);
+
+	/* build the expanded ranges */
+	eranges = build_expanded_ranges(cmpFn, colloid, range, &neranges);
+
+	/* and we'll also need the 'distance' procedure */
+	distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE);
+
+	/* build array of gap distances and sort them in ascending order */
+	distances = build_distances(distanceFn, colloid, eranges, neranges);
+
+	/*
+	 * Combine ranges until we release at least 50% of the space. This
+	 * threshold is somewhat arbitrary, perhaps needs tuning. We must not use
+	 * too low or high value.
+	 */
+	neranges = reduce_expanded_ranges(eranges, neranges, distances,
+									  range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR,
+									  cmpFn, colloid);
+
+	/* Make sure we've sufficiently reduced the number of ranges. */
+	Assert(count_values(eranges, neranges) <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR);
+
+	/* decompose the expanded ranges into regular ranges and single values */
+	store_expanded_ranges(range, eranges, neranges);
+
+	MemoryContextSwitchTo(oldctx);
+	MemoryContextDelete(ctx);
+
+	/* Did we break the ranges somehow? */
+	AssertCheckRanges(range, cmpFn, colloid);
+
+	return true;
+}
+
+/*
+ * range_add_value
+ * 		Add the new value to the minmax-multi range.
+ */
+static bool
+range_add_value(BrinDesc *bdesc, Oid colloid,
+				AttrNumber attno, Form_pg_attribute attr,
+				Ranges *ranges, Datum newval)
+{
+	FmgrInfo   *cmpFn;
+	bool		modified = false;
+
+	/* we'll certainly need the comparator, so just look it up now */
+	cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
+											   BTLessStrategyNumber);
+
+	/* comprehensive checks of the input ranges */
+	AssertCheckRanges(ranges, cmpFn, colloid);
+
+	/*
+	 * Make sure there's enough free space in the buffer. We only trigger this
+	 * when the buffer is full, which means it had to be modified as we size
+	 * it to be larger than what is stored on disk.
+	 *
+	 * This needs to happen before we check if the value is contained in the
+	 * range, because the value might be in the unsorted part, and we don't
+	 * check that in range_contains_value. The deduplication would then move
+	 * it to the sorted part, and we'd add the value too, which violates the
+	 * rule that we never have duplicates with the ranges or sorted values.
+	 *
+	 * We might also deduplicate and recheck if the value is contained, but
+	 * that seems like overkill. We'd need to deduplicate anyway, so why not
+	 * do it now.
+	 */
+	modified = ensure_free_space_in_buffer(bdesc, colloid,
+										   attno, attr, ranges);
+
+	/*
+	 * Bail out if the value already is covered by the range.
+	 *
+	 * We could also add values until we hit values_per_range, and then do the
+	 * deduplication in a batch, hoping for better efficiency. But that would
+	 * mean we actually modify the range every time, which means having to
+	 * serialize the value, which does palloc, walks the values, copies them,
+	 * etc. Not exactly cheap.
+	 *
+	 * So instead we do the check, which should be fairly cheap - assuming the
+	 * comparator function is not very expensive.
+	 *
+	 * This also implies the values array can't contain duplicate values.
+	 */
+	if (range_contains_value(bdesc, colloid, attno, attr, ranges, newval, false))
+		return modified;
+
+	/* Make a copy of the value, if needed. */
+	newval = datumCopy(newval, attr->attbyval, attr->attlen);
+
+	/*
+	 * If there's space in the values array, copy it in and we're done.
+	 *
+	 * We do want to keep the values sorted (to speed up searches), so we do a
+	 * simple insertion sort. We could do something more elaborate, e.g. by
+	 * sorting the values only now and then, but for small counts (e.g. when
+	 * maxvalues is 64) this should be fine.
+	 */
+	ranges->values[2 * ranges->nranges + ranges->nvalues] = newval;
+	ranges->nvalues++;
+
+	/* If we added the first value, we can consider it as sorted. */
+	if (ranges->nvalues == 1)
+		ranges->nsorted = 1;
+
+	/*
+	 * Check we haven't broken the ordering of boundary values (checks both
+	 * parts, but that doesn't hurt).
+	 */
+	AssertCheckRanges(ranges, cmpFn, colloid);
+
+	/* Check the range contains the value we just added. */
+	Assert(range_contains_value(bdesc, colloid, attno, attr, ranges, newval, true));
+
+	/* yep, we've modified the range */
+	return true;
+}
+
+/*
+ * Generate range representation of data collected during "batch mode".
+ * This is similar to reduce_expanded_ranges, except that we can't assume
+ * the values are sorted and there may be duplicate values.
+ */
+static void
+compactify_ranges(BrinDesc *bdesc, Ranges *ranges, int max_values)
+{
+	FmgrInfo   *cmpFn,
+			   *distanceFn;
+
+	/* expanded ranges */
+	ExpandedRange *eranges;
+	int			neranges;
+	DistanceValue *distances;
+
+	MemoryContext ctx;
+	MemoryContext oldctx;
+
+	/*
+	 * Do we need to actually compactify anything?
+	 *
+	 * There are two reasons why compaction may be needed - firstly, there may
+	 * be too many values, or some of the values may be unsorted.
+	 */
+	if ((ranges->nranges * 2 + ranges->nvalues <= max_values) &&
+		(ranges->nsorted == ranges->nvalues))
+		return;
+
+	/* we'll certainly need the comparator, so just look it up now */
+	cmpFn = minmax_multi_get_strategy_procinfo(bdesc, ranges->attno, ranges->typid,
+											   BTLessStrategyNumber);
+
+	/* and we'll also need the 'distance' procedure */
+	distanceFn = minmax_multi_get_procinfo(bdesc, ranges->attno, PROCNUM_DISTANCE);
+
+	/*
+	 * The distanceFn calls (which may internally call e.g. numeric_le) may
+	 * allocate quite a bit of memory, and we must not leak it. Otherwise,
+	 * we'd have problems e.g. when building indexes. So we create a local
+	 * memory context and make sure we free the memory before leaving this
+	 * function (not after every call).
+	 */
+	ctx = AllocSetContextCreate(CurrentMemoryContext,
+								"minmax-multi context",
+								ALLOCSET_DEFAULT_SIZES);
+
+	oldctx = MemoryContextSwitchTo(ctx);
+
+	/* build the expanded ranges */
+	eranges = build_expanded_ranges(cmpFn, ranges->colloid, ranges, &neranges);
+
+	/* build array of gap distances and sort them in ascending order */
+	distances = build_distances(distanceFn, ranges->colloid,
+								eranges, neranges);
+
+	/*
+	 * Combine ranges until we get below max_values. We don't use any scale
+	 * factor, because this is used during serialization, and we don't expect
+	 * more tuples to be inserted anytime soon.
+	 */
+	neranges = reduce_expanded_ranges(eranges, neranges, distances,
+									  max_values, cmpFn, ranges->colloid);
+
+	Assert(count_values(eranges, neranges) <= max_values);
+
+	/* transform back into regular ranges and single values */
+	store_expanded_ranges(ranges, eranges, neranges);
+
+	/* check all the range invariants */
+	AssertCheckRanges(ranges, cmpFn, ranges->colloid);
+
+	MemoryContextSwitchTo(oldctx);
+	MemoryContextDelete(ctx);
+}
+
+Datum
+brin_minmax_multi_opcinfo(PG_FUNCTION_ARGS)
+{
+	BrinOpcInfo *result;
+
+	/*
+	 * opaque->strategy_procinfos is initialized lazily; here it is set to
+	 * all-uninitialized by palloc0 which sets fn_oid to InvalidOid.
+	 */
+
+	result = palloc0(MAXALIGN(SizeofBrinOpcInfo(1)) +
+					 sizeof(MinmaxMultiOpaque));
+	result->oi_nstored = 1;
+	result->oi_regular_nulls = true;
+	result->oi_opaque = (MinmaxMultiOpaque *)
+		MAXALIGN((char *) result + SizeofBrinOpcInfo(1));
+	result->oi_typcache[0] = lookup_type_cache(PG_BRIN_MINMAX_MULTI_SUMMARYOID, 0);
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * Compute the distance between two float4 values (plain subtraction).
+ */
+Datum
+brin_minmax_multi_distance_float4(PG_FUNCTION_ARGS)
+{
+	float		a1 = PG_GETARG_FLOAT4(0);
+	float		a2 = PG_GETARG_FLOAT4(1);
+
+	/* if both values are NaN, then we consider them the same */
+	if (isnan(a1) && isnan(a2))
+		PG_RETURN_FLOAT8(0.0);
+
+	/* if one value is NaN, use infinite distance */
+	if (isnan(a1) || isnan(a2))
+		PG_RETURN_FLOAT8(get_float8_infinity());
+
+	/*
+	 * We know the values are range boundaries, but the range may be collapsed
+	 * (i.e. single points), with equal values.
+	 */
+	Assert(a1 <= a2);
+
+	PG_RETURN_FLOAT8((double) a2 - (double) a1);
+}
+
+/*
+ * Compute the distance between two float8 values (plain subtraction).
+ */
+Datum
+brin_minmax_multi_distance_float8(PG_FUNCTION_ARGS)
+{
+	double		a1 = PG_GETARG_FLOAT8(0);
+	double		a2 = PG_GETARG_FLOAT8(1);
+
+	/* if both values are NaN, then we consider them the same */
+	if (isnan(a1) && isnan(a2))
+		PG_RETURN_FLOAT8(0.0);
+
+	/* if one value is NaN, use infinite distance */
+	if (isnan(a1) || isnan(a2))
+		PG_RETURN_FLOAT8(get_float8_infinity());
+
+	/*
+	 * We know the values are range boundaries, but the range may be collapsed
+	 * (i.e. single points), with equal values.
+	 */
+	Assert(a1 <= a2);
+
+	PG_RETURN_FLOAT8(a2 - a1);
+}
+
+/*
+ * Compute the distance between two int2 values (plain subtraction).
+ */
+Datum
+brin_minmax_multi_distance_int2(PG_FUNCTION_ARGS)
+{
+	int16		a1 = PG_GETARG_INT16(0);
+	int16		a2 = PG_GETARG_INT16(1);
+
+	/*
+	 * We know the values are range boundaries, but the range may be collapsed
+	 * (i.e. single points), with equal values.
+	 */
+	Assert(a1 <= a2);
+
+	PG_RETURN_FLOAT8((double) a2 - (double) a1);
+}
+
+/*
+ * Compute the distance between two int4 values (plain subtraction).
+ */
+Datum
+brin_minmax_multi_distance_int4(PG_FUNCTION_ARGS)
+{
+	int32		a1 = PG_GETARG_INT32(0);
+	int32		a2 = PG_GETARG_INT32(1);
+
+	/*
+	 * We know the values are range boundaries, but the range may be collapsed
+	 * (i.e. single points), with equal values.
+	 */
+	Assert(a1 <= a2);
+
+	PG_RETURN_FLOAT8((double) a2 - (double) a1);
+}
+
+/*
+ * Compute the distance between two int8 values (plain subtraction).
+ */
+Datum
+brin_minmax_multi_distance_int8(PG_FUNCTION_ARGS)
+{
+	int64		a1 = PG_GETARG_INT64(0);
+	int64		a2 = PG_GETARG_INT64(1);
+
+	/*
+	 * We know the values are range boundaries, but the range may be collapsed
+	 * (i.e. single points), with equal values.
+	 */
+	Assert(a1 <= a2);
+
+	PG_RETURN_FLOAT8((double) a2 - (double) a1);
+}
+
+/*
+ * Compute the distance between two tid values (by mapping them to float8 and
+ * then subtracting them).
+ */
+Datum
+brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS)
+{
+	double		da1,
+				da2;
+
+	ItemPointer pa1 = (ItemPointer) PG_GETARG_DATUM(0);
+	ItemPointer pa2 = (ItemPointer) PG_GETARG_DATUM(1);
+
+	/*
+	 * We know the values are range boundaries, but the range may be collapsed
+	 * (i.e. single points), with equal values.
+	 */
+	Assert(ItemPointerCompare(pa1, pa2) <= 0);
+
+	/*
+	 * We use the no-check variants here, because user-supplied values may
+	 * have (ip_posid == 0). See ItemPointerCompare.
+	 */
+	da1 = ItemPointerGetBlockNumberNoCheck(pa1) * MaxHeapTuplesPerPage +
+		ItemPointerGetOffsetNumberNoCheck(pa1);
+
+	da2 = ItemPointerGetBlockNumberNoCheck(pa2) * MaxHeapTuplesPerPage +
+		ItemPointerGetOffsetNumberNoCheck(pa2);
+
+	PG_RETURN_FLOAT8(da2 - da1);
+}
+
+/*
+ * Compute the distance between two numeric values (plain subtraction).
+ */
+Datum
+brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS)
+{
+	Datum		d;
+	Datum		a1 = PG_GETARG_DATUM(0);
+	Datum		a2 = PG_GETARG_DATUM(1);
+
+	/*
+	 * We know the values are range boundaries, but the range may be collapsed
+	 * (i.e. single points), with equal values.
+	 */
+	Assert(DatumGetBool(DirectFunctionCall2(numeric_le, a1, a2)));
+
+	d = DirectFunctionCall2(numeric_sub, a2, a1);	/* a2 - a1 */
+
+	PG_RETURN_FLOAT8(DirectFunctionCall1(numeric_float8, d));
+}
+
+/*
+ * Compute the approximate distance between two UUID values.
+ *
+ * XXX We do not need a perfectly accurate value, so we approximate the
+ * deltas (which would have to be 128-bit integers) with a 64-bit float.
+ * The small inaccuracies do not matter in practice, in the worst case
+ * we'll decide to merge ranges that are not the closest ones.
+ */
+Datum
+brin_minmax_multi_distance_uuid(PG_FUNCTION_ARGS)
+{
+	int			i;
+	float8		delta = 0;
+
+	Datum		a1 = PG_GETARG_DATUM(0);
+	Datum		a2 = PG_GETARG_DATUM(1);
+
+	pg_uuid_t  *u1 = DatumGetUUIDP(a1);
+	pg_uuid_t  *u2 = DatumGetUUIDP(a2);
+
+	/*
+	 * We know the values are range boundaries, but the range may be collapsed
+	 * (i.e. single points), with equal values.
+	 */
+	Assert(DatumGetBool(DirectFunctionCall2(uuid_le, a1, a2)));
+
+	/* compute approximate delta as a double precision value */
+	for (i = UUID_LEN - 1; i >= 0; i--)
+	{
+		delta += (int) u2->data[i] - (int) u1->data[i];
+		delta /= 256;
+	}
+
+	Assert(delta >= 0);
+
+	PG_RETURN_FLOAT8(delta);
+}
+
+/*
+ * Compute the approximate distance between two dates.
+ */
+Datum
+brin_minmax_multi_distance_date(PG_FUNCTION_ARGS)
+{
+	float8		delta = 0;
+	DateADT		dateVal1 = PG_GETARG_DATEADT(0);
+	DateADT		dateVal2 = PG_GETARG_DATEADT(1);
+
+	delta = (float8) dateVal2 - (float8) dateVal1;
+
+	Assert(delta >= 0);
+
+	PG_RETURN_FLOAT8(delta);
+}
+
+/*
+ * Compute the approximate distance between two time (without tz) values.
+ *
+ * TimeADT is just an int64, so we simply subtract the values directly.
+ */
+Datum
+brin_minmax_multi_distance_time(PG_FUNCTION_ARGS)
+{
+	float8		delta = 0;
+
+	TimeADT		ta = PG_GETARG_TIMEADT(0);
+	TimeADT		tb = PG_GETARG_TIMEADT(1);
+
+	delta = (tb - ta);
+
+	Assert(delta >= 0);
+
+	PG_RETURN_FLOAT8(delta);
+}
+
+/*
+ * Compute the approximate distance between two timetz values.
+ *
+ * Simply subtracts the TimeADT (int64) values embedded in TimeTzADT.
+ */
+Datum
+brin_minmax_multi_distance_timetz(PG_FUNCTION_ARGS)
+{
+	float8		delta = 0;
+
+	TimeTzADT  *ta = PG_GETARG_TIMETZADT_P(0);
+	TimeTzADT  *tb = PG_GETARG_TIMETZADT_P(1);
+
+	delta = (tb->time - ta->time) + (tb->zone - ta->zone) * USECS_PER_SEC;
+
+	Assert(delta >= 0);
+
+	PG_RETURN_FLOAT8(delta);
+}
+
+/*
+ * Compute the distance between two timestamp values.
+ */
+Datum
+brin_minmax_multi_distance_timestamp(PG_FUNCTION_ARGS)
+{
+	float8		delta = 0;
+
+	Timestamp	dt1 = PG_GETARG_TIMESTAMP(0);
+	Timestamp	dt2 = PG_GETARG_TIMESTAMP(1);
+
+	delta = (float8) dt2 - (float8) dt1;
+
+	Assert(delta >= 0);
+
+	PG_RETURN_FLOAT8(delta);
+}
+
+/*
+ * Compute the distance between two interval values.
+ */
+Datum
+brin_minmax_multi_distance_interval(PG_FUNCTION_ARGS)
+{
+	float8		delta = 0;
+
+	Interval   *ia = PG_GETARG_INTERVAL_P(0);
+	Interval   *ib = PG_GETARG_INTERVAL_P(1);
+
+	int64		dayfraction;
+	int64		days;
+
+	/*
+	 * Delta is (fractional) number of days between the intervals. Assume
+	 * months have 30 days for consistency with interval_cmp_internal. We
+	 * don't need to be exact, in the worst case we'll build a bit less
+	 * efficient ranges. But we should not contradict interval_cmp.
+	 */
+	dayfraction = (ib->time % USECS_PER_DAY) - (ia->time % USECS_PER_DAY);
+	days = (ib->time / USECS_PER_DAY) - (ia->time / USECS_PER_DAY);
+	days += (int64) ib->day - (int64) ia->day;
+	days += ((int64) ib->month - (int64) ia->month) * INT64CONST(30);
+
+	/* convert to double precision */
+	delta = (double) days + dayfraction / (double) USECS_PER_DAY;
+
+	Assert(delta >= 0);
+
+	PG_RETURN_FLOAT8(delta);
+}
+
+/*
+ * Compute the distance between two pg_lsn values.
+ *
+ * LSN is just an int64 encoding position in the stream, so just subtract
+ * those int64 values directly.
+ */
+Datum
+brin_minmax_multi_distance_pg_lsn(PG_FUNCTION_ARGS)
+{
+	float8		delta = 0;
+
+	XLogRecPtr	lsna = PG_GETARG_LSN(0);
+	XLogRecPtr	lsnb = PG_GETARG_LSN(1);
+
+	delta = (lsnb - lsna);
+
+	Assert(delta >= 0);
+
+	PG_RETURN_FLOAT8(delta);
+}
+
+/*
+ * Compute the distance between two macaddr values.
+ *
+ * mac addresses are treated as 6 unsigned chars, so do the same thing we
+ * already do for UUID values.
+ */
+Datum
+brin_minmax_multi_distance_macaddr(PG_FUNCTION_ARGS)
+{
+	float8		delta;
+
+	macaddr    *a = PG_GETARG_MACADDR_P(0);
+	macaddr    *b = PG_GETARG_MACADDR_P(1);
+
+	delta = ((float8) b->f - (float8) a->f);
+	delta /= 256;
+
+	delta += ((float8) b->e - (float8) a->e);
+	delta /= 256;
+
+	delta += ((float8) b->d - (float8) a->d);
+	delta /= 256;
+
+	delta += ((float8) b->c - (float8) a->c);
+	delta /= 256;
+
+	delta += ((float8) b->b - (float8) a->b);
+	delta /= 256;
+
+	delta += ((float8) b->a - (float8) a->a);
+	delta /= 256;
+
+	Assert(delta >= 0);
+
+	PG_RETURN_FLOAT8(delta);
+}
+
+/*
+ * Compute the distance between two macaddr8 values.
+ *
+ * macaddr8 addresses are 8 unsigned chars, so do the same thing we
+ * already do for UUID values.
+ */
+Datum
+brin_minmax_multi_distance_macaddr8(PG_FUNCTION_ARGS)
+{
+	float8		delta;
+
+	macaddr8   *a = PG_GETARG_MACADDR8_P(0);
+	macaddr8   *b = PG_GETARG_MACADDR8_P(1);
+
+	delta = ((float8) b->h - (float8) a->h);
+	delta /= 256;
+
+	delta += ((float8) b->g - (float8) a->g);
+	delta /= 256;
+
+	delta += ((float8) b->f - (float8) a->f);
+	delta /= 256;
+
+	delta += ((float8) b->e - (float8) a->e);
+	delta /= 256;
+
+	delta += ((float8) b->d - (float8) a->d);
+	delta /= 256;
+
+	delta += ((float8) b->c - (float8) a->c);
+	delta /= 256;
+
+	delta += ((float8) b->b - (float8) a->b);
+	delta /= 256;
+
+	delta += ((float8) b->a - (float8) a->a);
+	delta /= 256;
+
+	Assert(delta >= 0);
+
+	PG_RETURN_FLOAT8(delta);
+}
+
+/*
+ * Compute the distance between two inet values.
+ *
+ * The distance is defined as the difference between 32-bit/128-bit values,
+ * depending on the IP version. The distance is computed by subtracting
+ * the bytes and normalizing it to [0,1] range for each IP family.
+ * Addresses from different families are considered to be in maximum
+ * distance, which is 1.0.
+ *
+ * XXX Does this need to consider the mask (bits)?  For now, it's ignored.
+ */
+Datum
+brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS)
+{
+	float8		delta;
+	int			i;
+	int			len;
+	unsigned char *addra,
+			   *addrb;
+
+	inet	   *ipa = PG_GETARG_INET_PP(0);
+	inet	   *ipb = PG_GETARG_INET_PP(1);
+
+	int			lena,
+				lenb;
+
+	/*
+	 * If the addresses are from different families, consider them to be in
+	 * maximal possible distance (which is 1.0).
+	 */
+	if (ip_family(ipa) != ip_family(ipb))
+		PG_RETURN_FLOAT8(1.0);
+
+	addra = (unsigned char *) palloc(ip_addrsize(ipa));
+	memcpy(addra, ip_addr(ipa), ip_addrsize(ipa));
+
+	addrb = (unsigned char *) palloc(ip_addrsize(ipb));
+	memcpy(addrb, ip_addr(ipb), ip_addrsize(ipb));
+
+	/*
+	 * The length is calculated from the mask length, because we sort the
+	 * addresses by first address in the range, so A.B.C.D/24 < A.B.C.1 (the
+	 * first range starts at A.B.C.0, which is before A.B.C.1). We don't want
+	 * to produce a negative delta in this case, so we just cut the extra
+	 * bytes.
+	 *
+	 * XXX Maybe this should be a bit more careful and cut the bits, not just
+	 * whole bytes.
+	 */
+	lena = ip_bits(ipa);
+	lenb = ip_bits(ipb);
+
+	len = ip_addrsize(ipa);
+
+	/* apply the network mask to both addresses */
+	for (i = 0; i < len; i++)
+	{
+		unsigned char mask;
+		int			nbits;
+
+		nbits = Max(0, lena - (i * 8));
+		if (nbits < 8)
+		{
+			mask = (0xFF << (8 - nbits));
+			addra[i] = (addra[i] & mask);
+		}
+
+		nbits = Max(0, lenb - (i * 8));
+		if (nbits < 8)
+		{
+			mask = (0xFF << (8 - nbits));
+			addrb[i] = (addrb[i] & mask);
+		}
+	}
+
+	/* Calculate the difference between the addresses. */
+	delta = 0;
+	for (i = len - 1; i >= 0; i--)
+	{
+		unsigned char a = addra[i];
+		unsigned char b = addrb[i];
+
+		delta += (float8) b - (float8) a;
+		delta /= 256;
+	}
+
+	Assert((delta >= 0) && (delta <= 1));
+
+	pfree(addra);
+	pfree(addrb);
+
+	PG_RETURN_FLOAT8(delta);
+}
+
+static void
+brin_minmax_multi_serialize(BrinDesc *bdesc, Datum src, Datum *dst)
+{
+	Ranges	   *ranges = (Ranges *) DatumGetPointer(src);
+	SerializedRanges *s;
+
+	/*
+	 * In batch mode, we need to compress the accumulated values to the
+	 * actually requested number of values/ranges.
+	 */
+	compactify_ranges(bdesc, ranges, ranges->target_maxvalues);
+
+	/* At this point everything has to be fully sorted. */
+	Assert(ranges->nsorted == ranges->nvalues);
+
+	s = brin_range_serialize(ranges);
+	dst[0] = PointerGetDatum(s);
+}
+
+static int
+brin_minmax_multi_get_values(BrinDesc *bdesc, MinMaxMultiOptions *opts)
+{
+	return MinMaxMultiGetValuesPerRange(opts);
+}
+
+/*
+ * Examine the given index tuple (which contains the partial status of a
+ * certain page range) by comparing it to the given value that comes from
+ * another heap tuple.  If the new value is outside the min/max range
+ * specified by the existing tuple values, update the index tuple and return
+ * true.  Otherwise, return false and do not modify in this case.
+ */
+Datum
+brin_minmax_multi_add_value(PG_FUNCTION_ARGS)
+{
+	BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
+	BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
+	Datum		newval = PG_GETARG_DATUM(2);
+	bool		isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3);
+	MinMaxMultiOptions *opts = (MinMaxMultiOptions *) PG_GET_OPCLASS_OPTIONS();
+	Oid			colloid = PG_GET_COLLATION();
+	bool		modified = false;
+	Form_pg_attribute attr;
+	AttrNumber	attno;
+	Ranges	   *ranges;
+	SerializedRanges *serialized = NULL;
+
+	Assert(!isnull);
+
+	attno = column->bv_attno;
+	attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
+
+	/* use the already deserialized value, if possible */
+	ranges = (Ranges *) DatumGetPointer(column->bv_mem_value);
+
+	/*
+	 * If this is the first non-null value, we need to initialize the range
+	 * list. Otherwise, just extract the existing range list from BrinValues.
+	 *
+	 * When starting with an empty range, we assume this is a batch mode and
+	 * we use a larger buffer. The buffer size is derived from the BRIN range
+	 * size, number of rows per page, with some sensible min/max values. A
+	 * small buffer would be bad for performance, but a large buffer might
+	 * require a lot of memory (because of keeping all the values).
+	 */
+	if (column->bv_allnulls)
+	{
+		MemoryContext oldctx;
+
+		int			target_maxvalues;
+		int			maxvalues;
+		BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
+
+		/* what was specified as a reloption? */
+		target_maxvalues = brin_minmax_multi_get_values(bdesc, opts);
+
+		/*
+		 * Determine the insert buffer size - we use 10x the target, capped to
+		 * the maximum number of values in the heap range. This is more than
+		 * enough, considering the actual number of rows per page is likely
+		 * much lower, but meh.
+		 */
+		maxvalues = Min(target_maxvalues * MINMAX_BUFFER_FACTOR,
+						MaxHeapTuplesPerPage * pagesPerRange);
+
+		/* but always at least the original value */
+		maxvalues = Max(maxvalues, target_maxvalues);
+
+		/* always cap by MIN/MAX */
+		maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN);
+		maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX);
+
+		oldctx = MemoryContextSwitchTo(column->bv_context);
+		ranges = minmax_multi_init(maxvalues);
+		ranges->attno = attno;
+		ranges->colloid = colloid;
+		ranges->typid = attr->atttypid;
+		ranges->target_maxvalues = target_maxvalues;
+
+		/* we'll certainly need the comparator, so just look it up now */
+		ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
+														 BTLessStrategyNumber);
+
+		MemoryContextSwitchTo(oldctx);
+
+		column->bv_allnulls = false;
+		modified = true;
+
+		column->bv_mem_value = PointerGetDatum(ranges);
+		column->bv_serialize = brin_minmax_multi_serialize;
+	}
+	else if (!ranges)
+	{
+		MemoryContext oldctx;
+
+		int			maxvalues;
+		BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
+
+		oldctx = MemoryContextSwitchTo(column->bv_context);
+
+		serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]);
+
+		/*
+		 * Determine the insert buffer size - we use 10x the target, capped to
+		 * the maximum number of values in the heap range. This is more than
+		 * enough, considering the actual number of rows per page is likely
+		 * much lower, but meh.
+		 */
+		maxvalues = Min(serialized->maxvalues * MINMAX_BUFFER_FACTOR,
+						MaxHeapTuplesPerPage * pagesPerRange);
+
+		/* but always at least the original value */
+		maxvalues = Max(maxvalues, serialized->maxvalues);
+
+		/* always cap by MIN/MAX */
+		maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN);
+		maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX);
+
+		ranges = brin_range_deserialize(maxvalues, serialized);
+
+		ranges->attno = attno;
+		ranges->colloid = colloid;
+		ranges->typid = attr->atttypid;
+
+		/* we'll certainly need the comparator, so just look it up now */
+		ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
+														 BTLessStrategyNumber);
+
+		column->bv_mem_value = PointerGetDatum(ranges);
+		column->bv_serialize = brin_minmax_multi_serialize;
+
+		MemoryContextSwitchTo(oldctx);
+	}
+
+	/*
+	 * Try to add the new value to the range. We need to update the modified
+	 * flag, so that we serialize the updated summary later.
+	 */
+	modified |= range_add_value(bdesc, colloid, attno, attr, ranges, newval);
+
+
+	PG_RETURN_BOOL(modified);
+}
+
+/*
+ * Given an index tuple corresponding to a certain page range and a scan key,
+ * return whether the scan key is consistent with the index tuple's min/max
+ * values.  Return true if so, false otherwise.
+ */
+Datum
+brin_minmax_multi_consistent(PG_FUNCTION_ARGS)
+{
+	BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
+	BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
+	ScanKey    *keys = (ScanKey *) PG_GETARG_POINTER(2);
+	int			nkeys = PG_GETARG_INT32(3);
+
+	Oid			colloid = PG_GET_COLLATION(),
+				subtype;
+	AttrNumber	attno;
+	Datum		value;
+	FmgrInfo   *finfo;
+	SerializedRanges *serialized;
+	Ranges	   *ranges;
+	int			keyno;
+	int			rangeno;
+	int			i;
+
+	attno = column->bv_attno;
+
+	serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]);
+	ranges = brin_range_deserialize(serialized->maxvalues, serialized);
+
+	/* inspect the ranges, and for each one evaluate the scan keys */
+	for (rangeno = 0; rangeno < ranges->nranges; rangeno++)
+	{
+		Datum		minval = ranges->values[2 * rangeno];
+		Datum		maxval = ranges->values[2 * rangeno + 1];
+
+		/* assume the range is matching, and we'll try to prove otherwise */
+		bool		matching = true;
+
+		for (keyno = 0; keyno < nkeys; keyno++)
+		{
+			Datum		matches;
+			ScanKey		key = keys[keyno];
+
+			/* NULL keys are handled and filtered-out in bringetbitmap */
+			Assert(!(key->sk_flags & SK_ISNULL));
+
+			attno = key->sk_attno;
+			subtype = key->sk_subtype;
+			value = key->sk_argument;
+			switch (key->sk_strategy)
+			{
+				case BTLessStrategyNumber:
+				case BTLessEqualStrategyNumber:
+					finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
+															   key->sk_strategy);
+					/* first value from the array */
+					matches = FunctionCall2Coll(finfo, colloid, minval, value);
+					break;
+
+				case BTEqualStrategyNumber:
+					{
+						Datum		compar;
+						FmgrInfo   *cmpFn;
+
+						/* by default this range does not match */
+						matches = false;
+
+						/*
+						 * Otherwise, need to compare the new value with
+						 * boundaries of all the ranges. First check if it's
+						 * less than the absolute minimum, which is the first
+						 * value in the array.
+						 */
+						cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
+																   BTGreaterStrategyNumber);
+						compar = FunctionCall2Coll(cmpFn, colloid, minval, value);
+
+						/* smaller than the smallest value in this range */
+						if (DatumGetBool(compar))
+							break;
+
+						cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
+																   BTLessStrategyNumber);
+						compar = FunctionCall2Coll(cmpFn, colloid, maxval, value);
+
+						/* larger than the largest value in this range */
+						if (DatumGetBool(compar))
+							break;
+
+						/*
+						 * We haven't managed to eliminate this range, so
+						 * consider it matching.
+						 */
+						matches = true;
+
+						break;
+					}
+				case BTGreaterEqualStrategyNumber:
+				case BTGreaterStrategyNumber:
+					finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
+															   key->sk_strategy);
+					/* last value from the array */
+					matches = FunctionCall2Coll(finfo, colloid, maxval, value);
+					break;
+
+				default:
+					/* shouldn't happen */
+					elog(ERROR, "invalid strategy number %d", key->sk_strategy);
+					matches = 0;
+					break;
+			}
+
+			/* the range has to match all the scan keys */
+			matching &= DatumGetBool(matches);
+
+			/* once we find a non-matching key, we're done */
+			if (!matching)
+				break;
+		}
+
+		/*
+		 * have we found a range matching all scan keys? if yes, we're done
+		 */
+		if (matching)
+			PG_RETURN_DATUM(BoolGetDatum(true));
+	}
+
+	/*
+	 * And now inspect the values. We don't bother with doing a binary search
+	 * here, because we're dealing with serialized / fully compacted ranges,
+	 * so there should be only very few values.
+	 */
+	for (i = 0; i < ranges->nvalues; i++)
+	{
+		Datum		val = ranges->values[2 * ranges->nranges + i];
+
+		/* assume the range is matching, and we'll try to prove otherwise */
+		bool		matching = true;
+
+		for (keyno = 0; keyno < nkeys; keyno++)
+		{
+			Datum		matches;
+			ScanKey		key = keys[keyno];
+
+			/* we've already dealt with NULL keys at the beginning */
+			if (key->sk_flags & SK_ISNULL)
+				continue;
+
+			attno = key->sk_attno;
+			subtype = key->sk_subtype;
+			value = key->sk_argument;
+			switch (key->sk_strategy)
+			{
+				case BTLessStrategyNumber:
+				case BTLessEqualStrategyNumber:
+				case BTEqualStrategyNumber:
+				case BTGreaterEqualStrategyNumber:
+				case BTGreaterStrategyNumber:
+
+					finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
+															   key->sk_strategy);
+					matches = FunctionCall2Coll(finfo, colloid, val, value);
+					break;
+
+				default:
+					/* shouldn't happen */
+					elog(ERROR, "invalid strategy number %d", key->sk_strategy);
+					matches = 0;
+					break;
+			}
+
+			/* the range has to match all the scan keys */
+			matching &= DatumGetBool(matches);
+
+			/* once we find a non-matching key, we're done */
+			if (!matching)
+				break;
+		}
+
+		/* have we found a range matching all scan keys? if yes, we're done */
+		if (matching)
+			PG_RETURN_DATUM(BoolGetDatum(true));
+	}
+
+	PG_RETURN_DATUM(BoolGetDatum(false));
+}
+
+/*
+ * Given two BrinValues, update the first of them as a union of the summary
+ * values contained in both.  The second one is untouched.
+ */
+Datum
+brin_minmax_multi_union(PG_FUNCTION_ARGS)
+{
+	BrinDesc   *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
+	BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1);
+	BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2);
+
+	Oid			colloid = PG_GET_COLLATION();
+	SerializedRanges *serialized_a;
+	SerializedRanges *serialized_b;
+	Ranges	   *ranges_a;
+	Ranges	   *ranges_b;
+	AttrNumber	attno;
+	Form_pg_attribute attr;
+	ExpandedRange *eranges;
+	int			neranges;
+	FmgrInfo   *cmpFn,
+			   *distanceFn;
+	DistanceValue *distances;
+	MemoryContext ctx;
+	MemoryContext oldctx;
+
+	Assert(col_a->bv_attno == col_b->bv_attno);
+	Assert(!col_a->bv_allnulls && !col_b->bv_allnulls);
+
+	attno = col_a->bv_attno;
+	attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
+
+	serialized_a = (SerializedRanges *) PG_DETOAST_DATUM(col_a->bv_values[0]);
+	serialized_b = (SerializedRanges *) PG_DETOAST_DATUM(col_b->bv_values[0]);
+
+	ranges_a = brin_range_deserialize(serialized_a->maxvalues, serialized_a);
+	ranges_b = brin_range_deserialize(serialized_b->maxvalues, serialized_b);
+
+	/* make sure neither of the ranges is NULL */
+	Assert(ranges_a && ranges_b);
+
+	neranges = (ranges_a->nranges + ranges_a->nvalues) +
+		(ranges_b->nranges + ranges_b->nvalues);
+
+	/*
+	 * The distanceFn calls (which may internally call e.g. numeric_le) may
+	 * allocate quite a bit of memory, and we must not leak it. Otherwise,
+	 * we'd have problems e.g. when building indexes. So we create a local
+	 * memory context and make sure we free the memory before leaving this
+	 * function (not after every call).
+	 */
+	ctx = AllocSetContextCreate(CurrentMemoryContext,
+								"minmax-multi context",
+								ALLOCSET_DEFAULT_SIZES);
+
+	oldctx = MemoryContextSwitchTo(ctx);
+
+	/* allocate and fill */
+	eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange));
+
+	/* fill the expanded ranges with entries for the first range */
+	fill_expanded_ranges(eranges, ranges_a->nranges + ranges_a->nvalues,
+						 ranges_a);
+
+	/* and now add combine ranges for the second range */
+	fill_expanded_ranges(&eranges[ranges_a->nranges + ranges_a->nvalues],
+						 ranges_b->nranges + ranges_b->nvalues,
+						 ranges_b);
+
+	cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
+											   BTLessStrategyNumber);
+
+	/* sort the expanded ranges */
+	neranges = sort_expanded_ranges(cmpFn, colloid, eranges, neranges);
+
+	/*
+	 * We've loaded two different lists of expanded ranges, so some of them
+	 * may be overlapping. So walk through them and merge them.
+	 */
+	neranges = merge_overlapping_ranges(cmpFn, colloid, eranges, neranges);
+
+	/* check that the combine ranges are correct (no overlaps, ordering) */
+	AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
+
+	/*
+	 * If needed, reduce some of the ranges.
+	 *
+	 * XXX This may be fairly expensive, so maybe we should do it only when
+	 * it's actually needed (when we have too many ranges).
+	 */
+
+	/* build array of gap distances and sort them in ascending order */
+	distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE);
+	distances = build_distances(distanceFn, colloid, eranges, neranges);
+
+	/*
+	 * See how many values would be needed to store the current ranges, and if
+	 * needed combine as many of them to get below the threshold. The
+	 * collapsed ranges will be stored as a single value.
+	 *
+	 * XXX This does not apply the load factor, as we don't expect to add more
+	 * values to the range, so we prefer to keep as many ranges as possible.
+	 *
+	 * XXX Can the maxvalues be different in the two ranges? Perhaps we should
+	 * use maximum of those?
+	 */
+	neranges = reduce_expanded_ranges(eranges, neranges, distances,
+									  ranges_a->maxvalues,
+									  cmpFn, colloid);
+
+	/* update the first range summary */
+	store_expanded_ranges(ranges_a, eranges, neranges);
+
+	MemoryContextSwitchTo(oldctx);
+	MemoryContextDelete(ctx);
+
+	/* cleanup and update the serialized value */
+	pfree(serialized_a);
+	col_a->bv_values[0] = PointerGetDatum(brin_range_serialize(ranges_a));
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Cache and return minmax multi opclass support procedure
+ *
+ * Return the procedure corresponding to the given function support number
+ * or null if it does not exist.
+ */
+static FmgrInfo *
+minmax_multi_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum)
+{
+	MinmaxMultiOpaque *opaque;
+	uint16		basenum = procnum - PROCNUM_BASE;
+
+	/*
+	 * We cache these in the opaque struct, to avoid repetitive syscache
+	 * lookups.
+	 */
+	opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
+
+	/*
+	 * If we already searched for this proc and didn't find it, don't bother
+	 * searching again.
+	 */
+	if (opaque->extra_proc_missing[basenum])
+		return NULL;
+
+	if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid)
+	{
+		if (RegProcedureIsValid(index_getprocid(bdesc->bd_index, attno,
+												procnum)))
+		{
+			fmgr_info_copy(&opaque->extra_procinfos[basenum],
+						   index_getprocinfo(bdesc->bd_index, attno, procnum),
+						   bdesc->bd_context);
+		}
+		else
+		{
+			opaque->extra_proc_missing[basenum] = true;
+			return NULL;
+		}
+	}
+
+	return &opaque->extra_procinfos[basenum];
+}
+
+/*
+ * Cache and return the procedure for the given strategy.
+ *
+ * Note: this function mirrors minmax_multi_get_strategy_procinfo; see notes
+ * there.  If changes are made here, see that function too.
+ */
+static FmgrInfo *
+minmax_multi_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, Oid subtype,
+								   uint16 strategynum)
+{
+	MinmaxMultiOpaque *opaque;
+
+	Assert(strategynum >= 1 &&
+		   strategynum <= BTMaxStrategyNumber);
+
+	opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
+
+	/*
+	 * We cache the procedures for the previous subtype in the opaque struct,
+	 * to avoid repetitive syscache lookups.  If the subtype changed,
+	 * invalidate all the cached entries.
+	 */
+	if (opaque->cached_subtype != subtype)
+	{
+		uint16		i;
+
+		for (i = 1; i <= BTMaxStrategyNumber; i++)
+			opaque->strategy_procinfos[i - 1].fn_oid = InvalidOid;
+		opaque->cached_subtype = subtype;
+	}
+
+	if (opaque->strategy_procinfos[strategynum - 1].fn_oid == InvalidOid)
+	{
+		Form_pg_attribute attr;
+		HeapTuple	tuple;
+		Oid			opfamily,
+					oprid;
+		bool		isNull;
+
+		opfamily = bdesc->bd_index->rd_opfamily[attno - 1];
+		attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
+		tuple = SearchSysCache4(AMOPSTRATEGY, ObjectIdGetDatum(opfamily),
+								ObjectIdGetDatum(attr->atttypid),
+								ObjectIdGetDatum(subtype),
+								Int16GetDatum(strategynum));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+				 strategynum, attr->atttypid, subtype, opfamily);
+
+		oprid = DatumGetObjectId(SysCacheGetAttr(AMOPSTRATEGY, tuple,
+												 Anum_pg_amop_amopopr, &isNull));
+		ReleaseSysCache(tuple);
+		Assert(!isNull && RegProcedureIsValid(oprid));
+
+		fmgr_info_cxt(get_opcode(oprid),
+					  &opaque->strategy_procinfos[strategynum - 1],
+					  bdesc->bd_context);
+	}
+
+	return &opaque->strategy_procinfos[strategynum - 1];
+}
+
+Datum
+brin_minmax_multi_options(PG_FUNCTION_ARGS)
+{
+	local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
+
+	init_local_reloptions(relopts, sizeof(MinMaxMultiOptions));
+
+	add_local_int_reloption(relopts, "values_per_range", "desc",
+							MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE, 8, 256,
+							offsetof(MinMaxMultiOptions, valuesPerRange));
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * brin_minmax_multi_summary_in
+ *		- input routine for type brin_minmax_multi_summary.
+ *
+ * brin_minmax_multi_summary is only used internally to represent summaries
+ * in BRIN minmax-multi indexes, so it has no operations of its own, and we
+ * disallow input too.
+ */
+Datum
+brin_minmax_multi_summary_in(PG_FUNCTION_ARGS)
+{
+	/*
+	 * brin_minmax_multi_summary stores the data in binary form and parsing
+	 * text input is not needed, so disallow this.
+	 */
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+
+/*
+ * brin_minmax_multi_summary_out
+ *		- output routine for type brin_minmax_multi_summary.
+ *
+ * BRIN minmax-multi summaries are serialized into a bytea value, but we
+ * want to output something nicer humans can understand.
+ */
+Datum
+brin_minmax_multi_summary_out(PG_FUNCTION_ARGS)
+{
+	int			i;
+	int			idx;
+	SerializedRanges *ranges;
+	Ranges	   *ranges_deserialized;
+	StringInfoData str;
+	bool		isvarlena;
+	Oid			outfunc;
+	FmgrInfo	fmgrinfo;
+	ArrayBuildState *astate_values = NULL;
+
+	initStringInfo(&str);
+	appendStringInfoChar(&str, '{');
+
+	/*
+	 * Detoast to get value with full 4B header (can't be stored in a toast
+	 * table, but can use 1B header).
+	 */
+	ranges = (SerializedRanges *) PG_DETOAST_DATUM(PG_GETARG_BYTEA_PP(0));
+
+	/* lookup output func for the type */
+	getTypeOutputInfo(ranges->typid, &outfunc, &isvarlena);
+	fmgr_info(outfunc, &fmgrinfo);
+
+	/* deserialize the range info easy-to-process pieces */
+	ranges_deserialized = brin_range_deserialize(ranges->maxvalues, ranges);
+
+	appendStringInfo(&str, "nranges: %d  nvalues: %d  maxvalues: %d",
+					 ranges_deserialized->nranges,
+					 ranges_deserialized->nvalues,
+					 ranges_deserialized->maxvalues);
+
+	/* serialize ranges */
+	idx = 0;
+	for (i = 0; i < ranges_deserialized->nranges; i++)
+	{
+		char	   *a,
+				   *b;
+		text	   *c;
+		StringInfoData str;
+
+		initStringInfo(&str);
+
+		a = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]);
+		b = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]);
+
+		appendStringInfo(&str, "%s ... %s", a, b);
+
+		c = cstring_to_text(str.data);
+
+		astate_values = accumArrayResult(astate_values,
+										 PointerGetDatum(c),
+										 false,
+										 TEXTOID,
+										 CurrentMemoryContext);
+	}
+
+	if (ranges_deserialized->nranges > 0)
+	{
+		Oid			typoutput;
+		bool		typIsVarlena;
+		Datum		val;
+		char	   *extval;
+
+		getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena);
+
+		val = PointerGetDatum(makeArrayResult(astate_values, CurrentMemoryContext));
+
+		extval = OidOutputFunctionCall(typoutput, val);
+
+		appendStringInfo(&str, " ranges: %s", extval);
+	}
+
+	/* serialize individual values */
+	astate_values = NULL;
+
+	for (i = 0; i < ranges_deserialized->nvalues; i++)
+	{
+		Datum		a;
+		text	   *b;
+		StringInfoData str;
+
+		initStringInfo(&str);
+
+		a = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]);
+
+		appendStringInfoString(&str, DatumGetCString(a));
+
+		b = cstring_to_text(str.data);
+
+		astate_values = accumArrayResult(astate_values,
+										 PointerGetDatum(b),
+										 false,
+										 TEXTOID,
+										 CurrentMemoryContext);
+	}
+
+	if (ranges_deserialized->nvalues > 0)
+	{
+		Oid			typoutput;
+		bool		typIsVarlena;
+		Datum		val;
+		char	   *extval;
+
+		getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena);
+
+		val = PointerGetDatum(makeArrayResult(astate_values, CurrentMemoryContext));
+
+		extval = OidOutputFunctionCall(typoutput, val);
+
+		appendStringInfo(&str, " values: %s", extval);
+	}
+
+
+	appendStringInfoChar(&str, '}');
+
+	PG_RETURN_CSTRING(str.data);
+}
+
+/*
+ * brin_minmax_multi_summary_recv
+ *		- binary input routine for type brin_minmax_multi_summary.
+ */
+Datum
+brin_minmax_multi_summary_recv(PG_FUNCTION_ARGS)
+{
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+/*
+ * brin_minmax_multi_summary_send
+ *		- binary output routine for type brin_minmax_multi_summary.
+ *
+ * BRIN minmax-multi summaries are serialized in a bytea value (although
+ * the type is named differently), so let's just send that.
+ */
+Datum
+brin_minmax_multi_summary_send(PG_FUNCTION_ARGS)
+{
+	return byteasend(fcinfo);
+}
diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c
new file mode 100644
index 0000000..f17aad5
--- /dev/null
+++ b/src/backend/access/brin/brin_pageops.c
@@ -0,0 +1,920 @@
+/*
+ * brin_pageops.c
+ *		Page-handling routines for BRIN indexes
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/brin/brin_pageops.c
+ */
+#include "postgres.h"
+
+#include "access/brin_page.h"
+#include "access/brin_pageops.h"
+#include "access/brin_revmap.h"
+#include "access/brin_xlog.h"
+#include "access/xloginsert.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/lmgr.h"
+#include "storage/smgr.h"
+#include "utils/rel.h"
+
+/*
+ * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page.  We can tolerate
+ * a single item per page, unlike other index AMs.
+ */
+#define BrinMaxItemSize \
+	MAXALIGN_DOWN(BLCKSZ - \
+				  (MAXALIGN(SizeOfPageHeaderData + \
+							sizeof(ItemIdData)) + \
+				   MAXALIGN(sizeof(BrinSpecialSpace))))
+
+static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
+								   bool *extended);
+static Size br_page_get_freespace(Page page);
+static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
+
+
+/*
+ * Update tuple origtup (size origsz), located in offset oldoff of buffer
+ * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
+ * at heapBlk.  oldbuf must not be locked on entry, and is not locked at exit.
+ *
+ * If samepage is true, attempt to put the new tuple in the same page, but if
+ * there's no room, use some other one.
+ *
+ * If the update is successful, return true; the revmap is updated to point to
+ * the new tuple.  If the update is not done for whatever reason, return false.
+ * Caller may retry the update if this happens.
+ */
+bool
+brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
+			  BrinRevmap *revmap, BlockNumber heapBlk,
+			  Buffer oldbuf, OffsetNumber oldoff,
+			  const BrinTuple *origtup, Size origsz,
+			  const BrinTuple *newtup, Size newsz,
+			  bool samepage)
+{
+	Page		oldpage;
+	ItemId		oldlp;
+	BrinTuple  *oldtup;
+	Size		oldsz;
+	Buffer		newbuf;
+	BlockNumber newblk = InvalidBlockNumber;
+	bool		extended;
+
+	Assert(newsz == MAXALIGN(newsz));
+
+	/* If the item is oversized, don't bother. */
+	if (newsz > BrinMaxItemSize)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
+						newsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
+		return false;			/* keep compiler quiet */
+	}
+
+	/* make sure the revmap is long enough to contain the entry we need */
+	brinRevmapExtend(revmap, heapBlk);
+
+	if (!samepage)
+	{
+		/* need a page on which to put the item */
+		newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
+		if (!BufferIsValid(newbuf))
+		{
+			Assert(!extended);
+			return false;
+		}
+
+		/*
+		 * Note: it's possible (though unlikely) that the returned newbuf is
+		 * the same as oldbuf, if brin_getinsertbuffer determined that the old
+		 * buffer does in fact have enough space.
+		 */
+		if (newbuf == oldbuf)
+		{
+			Assert(!extended);
+			newbuf = InvalidBuffer;
+		}
+		else
+			newblk = BufferGetBlockNumber(newbuf);
+	}
+	else
+	{
+		LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
+		newbuf = InvalidBuffer;
+		extended = false;
+	}
+	oldpage = BufferGetPage(oldbuf);
+	oldlp = PageGetItemId(oldpage, oldoff);
+
+	/*
+	 * Check that the old tuple wasn't updated concurrently: it might have
+	 * moved someplace else entirely, and for that matter the whole page
+	 * might've become a revmap page.  Note that in the first two cases
+	 * checked here, the "oldlp" we just calculated is garbage; but
+	 * PageGetItemId() is simple enough that it was safe to do that
+	 * calculation anyway.
+	 */
+	if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
+		oldoff > PageGetMaxOffsetNumber(oldpage) ||
+		!ItemIdIsNormal(oldlp))
+	{
+		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+
+		/*
+		 * If this happens, and the new buffer was obtained by extending the
+		 * relation, then we need to ensure we don't leave it uninitialized or
+		 * forget about it.
+		 */
+		if (BufferIsValid(newbuf))
+		{
+			if (extended)
+				brin_initialize_empty_new_buffer(idxrel, newbuf);
+			UnlockReleaseBuffer(newbuf);
+			if (extended)
+				FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
+		}
+		return false;
+	}
+
+	oldsz = ItemIdGetLength(oldlp);
+	oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
+
+	/*
+	 * ... or it might have been updated in place to different contents.
+	 */
+	if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
+	{
+		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+		if (BufferIsValid(newbuf))
+		{
+			/* As above, initialize and record new page if we got one */
+			if (extended)
+				brin_initialize_empty_new_buffer(idxrel, newbuf);
+			UnlockReleaseBuffer(newbuf);
+			if (extended)
+				FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
+		}
+		return false;
+	}
+
+	/*
+	 * Great, the old tuple is intact.  We can proceed with the update.
+	 *
+	 * If there's enough room in the old page for the new tuple, replace it.
+	 *
+	 * Note that there might now be enough space on the page even though the
+	 * caller told us there isn't, if a concurrent update moved another tuple
+	 * elsewhere or replaced a tuple with a smaller one.
+	 */
+	if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
+		brin_can_do_samepage_update(oldbuf, origsz, newsz))
+	{
+		START_CRIT_SECTION();
+		if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) unconstify(BrinTuple *, newtup), newsz))
+			elog(ERROR, "failed to replace BRIN tuple");
+		MarkBufferDirty(oldbuf);
+
+		/* XLOG stuff */
+		if (RelationNeedsWAL(idxrel))
+		{
+			xl_brin_samepage_update xlrec;
+			XLogRecPtr	recptr;
+			uint8		info = XLOG_BRIN_SAMEPAGE_UPDATE;
+
+			xlrec.offnum = oldoff;
+
+			XLogBeginInsert();
+			XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
+
+			XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
+			XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
+
+			recptr = XLogInsert(RM_BRIN_ID, info);
+
+			PageSetLSN(oldpage, recptr);
+		}
+
+		END_CRIT_SECTION();
+
+		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+
+		if (BufferIsValid(newbuf))
+		{
+			/* As above, initialize and record new page if we got one */
+			if (extended)
+				brin_initialize_empty_new_buffer(idxrel, newbuf);
+			UnlockReleaseBuffer(newbuf);
+			if (extended)
+				FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
+		}
+
+		return true;
+	}
+	else if (newbuf == InvalidBuffer)
+	{
+		/*
+		 * Not enough space, but caller said that there was. Tell them to
+		 * start over.
+		 */
+		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+		return false;
+	}
+	else
+	{
+		/*
+		 * Not enough free space on the oldpage. Put the new tuple on the new
+		 * page, and update the revmap.
+		 */
+		Page		newpage = BufferGetPage(newbuf);
+		Buffer		revmapbuf;
+		ItemPointerData newtid;
+		OffsetNumber newoff;
+		Size		freespace = 0;
+
+		revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
+
+		START_CRIT_SECTION();
+
+		/*
+		 * We need to initialize the page if it's newly obtained.  Note we
+		 * will WAL-log the initialization as part of the update, so we don't
+		 * need to do that here.
+		 */
+		if (extended)
+			brin_page_init(newpage, BRIN_PAGETYPE_REGULAR);
+
+		PageIndexTupleDeleteNoCompact(oldpage, oldoff);
+		newoff = PageAddItem(newpage, (Item) unconstify(BrinTuple *, newtup), newsz,
+							 InvalidOffsetNumber, false, false);
+		if (newoff == InvalidOffsetNumber)
+			elog(ERROR, "failed to add BRIN tuple to new page");
+		MarkBufferDirty(oldbuf);
+		MarkBufferDirty(newbuf);
+
+		/* needed to update FSM below */
+		if (extended)
+			freespace = br_page_get_freespace(newpage);
+
+		ItemPointerSet(&newtid, newblk, newoff);
+		brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
+		MarkBufferDirty(revmapbuf);
+
+		/* XLOG stuff */
+		if (RelationNeedsWAL(idxrel))
+		{
+			xl_brin_update xlrec;
+			XLogRecPtr	recptr;
+			uint8		info;
+
+			info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
+
+			xlrec.insert.offnum = newoff;
+			xlrec.insert.heapBlk = heapBlk;
+			xlrec.insert.pagesPerRange = pagesPerRange;
+			xlrec.oldOffnum = oldoff;
+
+			XLogBeginInsert();
+
+			/* new page */
+			XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
+
+			XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
+			XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
+
+			/* revmap page */
+			XLogRegisterBuffer(1, revmapbuf, 0);
+
+			/* old page */
+			XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
+
+			recptr = XLogInsert(RM_BRIN_ID, info);
+
+			PageSetLSN(oldpage, recptr);
+			PageSetLSN(newpage, recptr);
+			PageSetLSN(BufferGetPage(revmapbuf), recptr);
+		}
+
+		END_CRIT_SECTION();
+
+		LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
+		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+		UnlockReleaseBuffer(newbuf);
+
+		if (extended)
+		{
+			RecordPageWithFreeSpace(idxrel, newblk, freespace);
+			FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
+		}
+
+		return true;
+	}
+}
+
+/*
+ * Return whether brin_doupdate can do a samepage update.
+ */
+bool
+brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
+{
+	return
+		((newsz <= origsz) ||
+		 PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
+}
+
+/*
+ * Insert an index tuple into the index relation.  The revmap is updated to
+ * mark the range containing the given page as pointing to the inserted entry.
+ * A WAL record is written.
+ *
+ * The buffer, if valid, is first checked for free space to insert the new
+ * entry; if there isn't enough, a new buffer is obtained and pinned.  No
+ * buffer lock must be held on entry, no buffer lock is held on exit.
+ *
+ * Return value is the offset number where the tuple was inserted.
+ */
+OffsetNumber
+brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
+			  BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
+			  BrinTuple *tup, Size itemsz)
+{
+	Page		page;
+	BlockNumber blk;
+	OffsetNumber off;
+	Size		freespace = 0;
+	Buffer		revmapbuf;
+	ItemPointerData tid;
+	bool		extended;
+
+	Assert(itemsz == MAXALIGN(itemsz));
+
+	/* If the item is oversized, don't even bother. */
+	if (itemsz > BrinMaxItemSize)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
+						itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
+		return InvalidOffsetNumber; /* keep compiler quiet */
+	}
+
+	/* Make sure the revmap is long enough to contain the entry we need */
+	brinRevmapExtend(revmap, heapBlk);
+
+	/*
+	 * Acquire lock on buffer supplied by caller, if any.  If it doesn't have
+	 * enough space, unpin it to obtain a new one below.
+	 */
+	if (BufferIsValid(*buffer))
+	{
+		/*
+		 * It's possible that another backend (or ourselves!) extended the
+		 * revmap over the page we held a pin on, so we cannot assume that
+		 * it's still a regular page.
+		 */
+		LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+		if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
+		{
+			UnlockReleaseBuffer(*buffer);
+			*buffer = InvalidBuffer;
+		}
+	}
+
+	/*
+	 * If we still don't have a usable buffer, have brin_getinsertbuffer
+	 * obtain one for us.
+	 */
+	if (!BufferIsValid(*buffer))
+	{
+		do
+			*buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
+		while (!BufferIsValid(*buffer));
+	}
+	else
+		extended = false;
+
+	/* Now obtain lock on revmap buffer */
+	revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
+
+	page = BufferGetPage(*buffer);
+	blk = BufferGetBlockNumber(*buffer);
+
+	/* Execute the actual insertion */
+	START_CRIT_SECTION();
+	if (extended)
+		brin_page_init(page, BRIN_PAGETYPE_REGULAR);
+	off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
+					  false, false);
+	if (off == InvalidOffsetNumber)
+		elog(ERROR, "failed to add BRIN tuple to new page");
+	MarkBufferDirty(*buffer);
+
+	/* needed to update FSM below */
+	if (extended)
+		freespace = br_page_get_freespace(page);
+
+	ItemPointerSet(&tid, blk, off);
+	brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
+	MarkBufferDirty(revmapbuf);
+
+	/* XLOG stuff */
+	if (RelationNeedsWAL(idxrel))
+	{
+		xl_brin_insert xlrec;
+		XLogRecPtr	recptr;
+		uint8		info;
+
+		info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
+		xlrec.heapBlk = heapBlk;
+		xlrec.pagesPerRange = pagesPerRange;
+		xlrec.offnum = off;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
+
+		XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
+		XLogRegisterBufData(0, (char *) tup, itemsz);
+
+		XLogRegisterBuffer(1, revmapbuf, 0);
+
+		recptr = XLogInsert(RM_BRIN_ID, info);
+
+		PageSetLSN(page, recptr);
+		PageSetLSN(BufferGetPage(revmapbuf), recptr);
+	}
+
+	END_CRIT_SECTION();
+
+	/* Tuple is firmly on buffer; we can release our locks */
+	LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+	LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
+
+	BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
+			   blk, off, heapBlk));
+
+	if (extended)
+	{
+		RecordPageWithFreeSpace(idxrel, blk, freespace);
+		FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
+	}
+
+	return off;
+}
+
+/*
+ * Initialize a page with the given type.
+ *
+ * Caller is responsible for marking it dirty, as appropriate.
+ */
+void
+brin_page_init(Page page, uint16 type)
+{
+	PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
+
+	BrinPageType(page) = type;
+}
+
+/*
+ * Initialize a new BRIN index's metapage.
+ */
+void
+brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
+{
+	BrinMetaPageData *metadata;
+
+	brin_page_init(page, BRIN_PAGETYPE_META);
+
+	metadata = (BrinMetaPageData *) PageGetContents(page);
+
+	metadata->brinMagic = BRIN_META_MAGIC;
+	metadata->brinVersion = version;
+	metadata->pagesPerRange = pagesPerRange;
+
+	/*
+	 * Note we cheat here a little.  0 is not a valid revmap block number
+	 * (because it's the metapage buffer), but doing this enables the first
+	 * revmap page to be created when the index is.
+	 */
+	metadata->lastRevmapPage = 0;
+
+	/*
+	 * Set pd_lower just past the end of the metadata.  This is essential,
+	 * because without doing so, metadata will be lost if xlog.c compresses
+	 * the page.
+	 */
+	((PageHeader) page)->pd_lower =
+		((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
+}
+
+/*
+ * Initiate page evacuation protocol.
+ *
+ * The page must be locked in exclusive mode by the caller.
+ *
+ * If the page is not yet initialized or empty, return false without doing
+ * anything; it can be used for revmap without any further changes.  If it
+ * contains tuples, mark it for evacuation and return true.
+ */
+bool
+brin_start_evacuating_page(Relation idxRel, Buffer buf)
+{
+	OffsetNumber off;
+	OffsetNumber maxoff;
+	Page		page;
+
+	page = BufferGetPage(buf);
+
+	if (PageIsNew(page))
+		return false;
+
+	maxoff = PageGetMaxOffsetNumber(page);
+	for (off = FirstOffsetNumber; off <= maxoff; off++)
+	{
+		ItemId		lp;
+
+		lp = PageGetItemId(page, off);
+		if (ItemIdIsUsed(lp))
+		{
+			/*
+			 * Prevent other backends from adding more stuff to this page:
+			 * BRIN_EVACUATE_PAGE informs br_page_get_freespace that this page
+			 * can no longer be used to add new tuples.  Note that this flag
+			 * is not WAL-logged, except accidentally.
+			 */
+			BrinPageFlags(page) |= BRIN_EVACUATE_PAGE;
+			MarkBufferDirtyHint(buf, true);
+
+			return true;
+		}
+	}
+	return false;
+}
+
+/*
+ * Move all tuples out of a page.
+ *
+ * The caller must hold lock on the page. The lock and pin are released.
+ */
+void
+brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
+				   BrinRevmap *revmap, Buffer buf)
+{
+	OffsetNumber off;
+	OffsetNumber maxoff;
+	Page		page;
+	BrinTuple  *btup = NULL;
+	Size		btupsz = 0;
+
+	page = BufferGetPage(buf);
+
+	Assert(BrinPageFlags(page) & BRIN_EVACUATE_PAGE);
+
+	maxoff = PageGetMaxOffsetNumber(page);
+	for (off = FirstOffsetNumber; off <= maxoff; off++)
+	{
+		BrinTuple  *tup;
+		Size		sz;
+		ItemId		lp;
+
+		CHECK_FOR_INTERRUPTS();
+
+		lp = PageGetItemId(page, off);
+		if (ItemIdIsUsed(lp))
+		{
+			sz = ItemIdGetLength(lp);
+			tup = (BrinTuple *) PageGetItem(page, lp);
+			tup = brin_copy_tuple(tup, sz, btup, &btupsz);
+
+			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+
+			if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
+							   buf, off, tup, sz, tup, sz, false))
+				off--;			/* retry */
+
+			LockBuffer(buf, BUFFER_LOCK_SHARE);
+
+			/* It's possible that someone extended the revmap over this page */
+			if (!BRIN_IS_REGULAR_PAGE(page))
+				break;
+		}
+	}
+
+	UnlockReleaseBuffer(buf);
+}
+
+/*
+ * Given a BRIN index page, initialize it if necessary, and record its
+ * current free space in the FSM.
+ *
+ * The main use for this is when, during vacuuming, an uninitialized page is
+ * found, which could be the result of relation extension followed by a crash
+ * before the page can be used.
+ *
+ * Here, we don't bother to update upper FSM pages, instead expecting that our
+ * caller (brin_vacuum_scan) will fix them at the end of the scan.  Elsewhere
+ * in this file, it's generally a good idea to propagate additions of free
+ * space into the upper FSM pages immediately.
+ */
+void
+brin_page_cleanup(Relation idxrel, Buffer buf)
+{
+	Page		page = BufferGetPage(buf);
+
+	/*
+	 * If a page was left uninitialized, initialize it now; also record it in
+	 * FSM.
+	 *
+	 * Somebody else might be extending the relation concurrently.  To avoid
+	 * re-initializing the page before they can grab the buffer lock, we
+	 * acquire the extension lock momentarily.  Since they hold the extension
+	 * lock from before getting the page and after its been initialized, we're
+	 * sure to see their initialization.
+	 */
+	if (PageIsNew(page))
+	{
+		LockRelationForExtension(idxrel, ShareLock);
+		UnlockRelationForExtension(idxrel, ShareLock);
+
+		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+		if (PageIsNew(page))
+		{
+			brin_initialize_empty_new_buffer(idxrel, buf);
+			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+			return;
+		}
+		LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+	}
+
+	/* Nothing to be done for non-regular index pages */
+	if (BRIN_IS_META_PAGE(BufferGetPage(buf)) ||
+		BRIN_IS_REVMAP_PAGE(BufferGetPage(buf)))
+		return;
+
+	/* Measure free space and record it */
+	RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
+							br_page_get_freespace(page));
+}
+
+/*
+ * Return a pinned and exclusively locked buffer which can be used to insert an
+ * index item of size itemsz (caller must ensure not to request sizes
+ * impossible to fulfill).  If oldbuf is a valid buffer, it is also locked (in
+ * an order determined to avoid deadlocks).
+ *
+ * If we find that the old page is no longer a regular index page (because
+ * of a revmap extension), the old buffer is unlocked and we return
+ * InvalidBuffer.
+ *
+ * If there's no existing page with enough free space to accommodate the new
+ * item, the relation is extended.  If this happens, *extended is set to true,
+ * and it is the caller's responsibility to initialize the page (and WAL-log
+ * that fact) prior to use.  The caller should also update the FSM with the
+ * page's remaining free space after the insertion.
+ *
+ * Note that the caller is not expected to update FSM unless *extended is set
+ * true.  This policy means that we'll update FSM when a page is created, and
+ * when it's found to have too little space for a desired tuple insertion,
+ * but not every single time we add a tuple to the page.
+ *
+ * Note that in some corner cases it is possible for this routine to extend
+ * the relation and then not return the new page.  It is this routine's
+ * responsibility to WAL-log the page initialization and to record the page in
+ * FSM if that happens, since the caller certainly can't do it.
+ */
+static Buffer
+brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
+					 bool *extended)
+{
+	BlockNumber oldblk;
+	BlockNumber newblk;
+	Page		page;
+	Size		freespace;
+
+	/* callers must have checked */
+	Assert(itemsz <= BrinMaxItemSize);
+
+	if (BufferIsValid(oldbuf))
+		oldblk = BufferGetBlockNumber(oldbuf);
+	else
+		oldblk = InvalidBlockNumber;
+
+	/* Choose initial target page, re-using existing target if known */
+	newblk = RelationGetTargetBlock(irel);
+	if (newblk == InvalidBlockNumber)
+		newblk = GetPageWithFreeSpace(irel, itemsz);
+
+	/*
+	 * Loop until we find a page with sufficient free space.  By the time we
+	 * return to caller out of this loop, both buffers are valid and locked;
+	 * if we have to restart here, neither page is locked and newblk isn't
+	 * pinned (if it's even valid).
+	 */
+	for (;;)
+	{
+		Buffer		buf;
+		bool		extensionLockHeld = false;
+
+		CHECK_FOR_INTERRUPTS();
+
+		*extended = false;
+
+		if (newblk == InvalidBlockNumber)
+		{
+			/*
+			 * There's not enough free space in any existing index page,
+			 * according to the FSM: extend the relation to obtain a shiny new
+			 * page.
+			 */
+			if (!RELATION_IS_LOCAL(irel))
+			{
+				LockRelationForExtension(irel, ExclusiveLock);
+				extensionLockHeld = true;
+			}
+			buf = ReadBuffer(irel, P_NEW);
+			newblk = BufferGetBlockNumber(buf);
+			*extended = true;
+
+			BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
+					   BufferGetBlockNumber(buf)));
+		}
+		else if (newblk == oldblk)
+		{
+			/*
+			 * There's an odd corner-case here where the FSM is out-of-date,
+			 * and gave us the old page.
+			 */
+			buf = oldbuf;
+		}
+		else
+		{
+			buf = ReadBuffer(irel, newblk);
+		}
+
+		/*
+		 * We lock the old buffer first, if it's earlier than the new one; but
+		 * then we need to check that it hasn't been turned into a revmap page
+		 * concurrently.  If we detect that that happened, give up and tell
+		 * caller to start over.
+		 */
+		if (BufferIsValid(oldbuf) && oldblk < newblk)
+		{
+			LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
+			if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
+			{
+				LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+
+				/*
+				 * It is possible that the new page was obtained from
+				 * extending the relation.  In that case, we must be sure to
+				 * record it in the FSM before leaving, because otherwise the
+				 * space would be lost forever.  However, we cannot let an
+				 * uninitialized page get in the FSM, so we need to initialize
+				 * it first.
+				 */
+				if (*extended)
+					brin_initialize_empty_new_buffer(irel, buf);
+
+				if (extensionLockHeld)
+					UnlockRelationForExtension(irel, ExclusiveLock);
+
+				ReleaseBuffer(buf);
+
+				if (*extended)
+				{
+					FreeSpaceMapVacuumRange(irel, newblk, newblk + 1);
+					/* shouldn't matter, but don't confuse caller */
+					*extended = false;
+				}
+
+				return InvalidBuffer;
+			}
+		}
+
+		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
+		if (extensionLockHeld)
+			UnlockRelationForExtension(irel, ExclusiveLock);
+
+		page = BufferGetPage(buf);
+
+		/*
+		 * We have a new buffer to insert into.  Check that the new page has
+		 * enough free space, and return it if it does; otherwise start over.
+		 * (br_page_get_freespace also checks that the FSM didn't hand us a
+		 * page that has since been repurposed for the revmap.)
+		 */
+		freespace = *extended ?
+			BrinMaxItemSize : br_page_get_freespace(page);
+		if (freespace >= itemsz)
+		{
+			RelationSetTargetBlock(irel, newblk);
+
+			/*
+			 * Lock the old buffer if not locked already.  Note that in this
+			 * case we know for sure it's a regular page: it's later than the
+			 * new page we just got, which is not a revmap page, and revmap
+			 * pages are always consecutive.
+			 */
+			if (BufferIsValid(oldbuf) && oldblk > newblk)
+			{
+				LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
+				Assert(BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)));
+			}
+
+			return buf;
+		}
+
+		/* This page is no good. */
+
+		/*
+		 * If an entirely new page does not contain enough free space for the
+		 * new item, then surely that item is oversized.  Complain loudly; but
+		 * first make sure we initialize the page and record it as free, for
+		 * next time.
+		 */
+		if (*extended)
+		{
+			brin_initialize_empty_new_buffer(irel, buf);
+			/* since this should not happen, skip FreeSpaceMapVacuum */
+
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
+							itemsz, freespace, RelationGetRelationName(irel))));
+			return InvalidBuffer;	/* keep compiler quiet */
+		}
+
+		if (newblk != oldblk)
+			UnlockReleaseBuffer(buf);
+		if (BufferIsValid(oldbuf) && oldblk <= newblk)
+			LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+
+		/*
+		 * Update the FSM with the new, presumably smaller, freespace value
+		 * for this page, then search for a new target page.
+		 */
+		newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
+	}
+}
+
+/*
+ * Initialize a page as an empty regular BRIN page, WAL-log this, and record
+ * the page in FSM.
+ *
+ * There are several corner situations in which we extend the relation to
+ * obtain a new page and later find that we cannot use it immediately.  When
+ * that happens, we don't want to leave the page go unrecorded in FSM, because
+ * there is no mechanism to get the space back and the index would bloat.
+ * Also, because we would not WAL-log the action that would initialize the
+ * page, the page would go uninitialized in a standby (or after recovery).
+ *
+ * While we record the page in FSM here, caller is responsible for doing FSM
+ * upper-page update if that seems appropriate.
+ */
+static void
+brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
+{
+	Page		page;
+
+	BRIN_elog((DEBUG2,
+			   "brin_initialize_empty_new_buffer: initializing blank page %u",
+			   BufferGetBlockNumber(buffer)));
+
+	START_CRIT_SECTION();
+	page = BufferGetPage(buffer);
+	brin_page_init(page, BRIN_PAGETYPE_REGULAR);
+	MarkBufferDirty(buffer);
+	log_newpage_buffer(buffer, true);
+	END_CRIT_SECTION();
+
+	/*
+	 * We update the FSM for this page, but this is not WAL-logged.  This is
+	 * acceptable because VACUUM will scan the index and update the FSM with
+	 * pages whose FSM records were forgotten in a crash.
+	 */
+	RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
+							br_page_get_freespace(page));
+}
+
+
+/*
+ * Return the amount of free space on a regular BRIN index page.
+ *
+ * If the page is not a regular page, or has been marked with the
+ * BRIN_EVACUATE_PAGE flag, returns 0.
+ */
+static Size
+br_page_get_freespace(Page page)
+{
+	if (!BRIN_IS_REGULAR_PAGE(page) ||
+		(BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
+		return 0;
+	else
+		return PageGetFreeSpace(page);
+}
diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c
new file mode 100644
index 0000000..6e392a5
--- /dev/null
+++ b/src/backend/access/brin/brin_revmap.c
@@ -0,0 +1,664 @@
+/*
+ * brin_revmap.c
+ *		Range map for BRIN indexes
+ *
+ * The range map (revmap) is a translation structure for BRIN indexes: for each
+ * page range there is one summary tuple, and its location is tracked by the
+ * revmap.  Whenever a new tuple is inserted into a table that violates the
+ * previously recorded summary values, a new tuple is inserted into the index
+ * and the revmap is updated to point to it.
+ *
+ * The revmap is stored in the first pages of the index, immediately following
+ * the metapage.  When the revmap needs to be expanded, all tuples on the
+ * regular BRIN page at that block (if any) are moved out of the way.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/brin/brin_revmap.c
+ */
+#include "postgres.h"
+
+#include "access/brin_page.h"
+#include "access/brin_pageops.h"
+#include "access/brin_revmap.h"
+#include "access/brin_tuple.h"
+#include "access/brin_xlog.h"
+#include "access/rmgr.h"
+#include "access/xloginsert.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "utils/rel.h"
+
+
+/*
+ * In revmap pages, each item stores an ItemPointerData.  These defines let one
+ * find the logical revmap page number and index number of the revmap item for
+ * the given heap block number.
+ */
+#define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \
+	((heapBlk / pagesPerRange) / REVMAP_PAGE_MAXITEMS)
+#define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \
+	((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS)
+
+
+struct BrinRevmap
+{
+	Relation	rm_irel;
+	BlockNumber rm_pagesPerRange;
+	BlockNumber rm_lastRevmapPage;	/* cached from the metapage */
+	Buffer		rm_metaBuf;
+	Buffer		rm_currBuf;
+};
+
+/* typedef appears in brin_revmap.h */
+
+
+static BlockNumber revmap_get_blkno(BrinRevmap *revmap,
+									BlockNumber heapBlk);
+static Buffer revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk);
+static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap,
+											   BlockNumber heapBlk);
+static void revmap_physical_extend(BrinRevmap *revmap);
+
+/*
+ * Initialize an access object for a range map.  This must be freed by
+ * brinRevmapTerminate when caller is done with it.
+ */
+BrinRevmap *
+brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange,
+					 Snapshot snapshot)
+{
+	BrinRevmap *revmap;
+	Buffer		meta;
+	BrinMetaPageData *metadata;
+	Page		page;
+
+	meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO);
+	LockBuffer(meta, BUFFER_LOCK_SHARE);
+	page = BufferGetPage(meta);
+	TestForOldSnapshot(snapshot, idxrel, page);
+	metadata = (BrinMetaPageData *) PageGetContents(page);
+
+	revmap = palloc(sizeof(BrinRevmap));
+	revmap->rm_irel = idxrel;
+	revmap->rm_pagesPerRange = metadata->pagesPerRange;
+	revmap->rm_lastRevmapPage = metadata->lastRevmapPage;
+	revmap->rm_metaBuf = meta;
+	revmap->rm_currBuf = InvalidBuffer;
+
+	*pagesPerRange = metadata->pagesPerRange;
+
+	LockBuffer(meta, BUFFER_LOCK_UNLOCK);
+
+	return revmap;
+}
+
+/*
+ * Release resources associated with a revmap access object.
+ */
+void
+brinRevmapTerminate(BrinRevmap *revmap)
+{
+	ReleaseBuffer(revmap->rm_metaBuf);
+	if (revmap->rm_currBuf != InvalidBuffer)
+		ReleaseBuffer(revmap->rm_currBuf);
+	pfree(revmap);
+}
+
+/*
+ * Extend the revmap to cover the given heap block number.
+ */
+void
+brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk)
+{
+	BlockNumber mapBlk PG_USED_FOR_ASSERTS_ONLY;
+
+	mapBlk = revmap_extend_and_get_blkno(revmap, heapBlk);
+
+	/* Ensure the buffer we got is in the expected range */
+	Assert(mapBlk != InvalidBlockNumber &&
+		   mapBlk != BRIN_METAPAGE_BLKNO &&
+		   mapBlk <= revmap->rm_lastRevmapPage);
+}
+
+/*
+ * Prepare to insert an entry into the revmap; the revmap buffer in which the
+ * entry is to reside is locked and returned.  Most callers should call
+ * brinRevmapExtend beforehand, as this routine does not extend the revmap if
+ * it's not long enough.
+ *
+ * The returned buffer is also recorded in the revmap struct; finishing that
+ * releases the buffer, therefore the caller needn't do it explicitly.
+ */
+Buffer
+brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk)
+{
+	Buffer		rmBuf;
+
+	rmBuf = revmap_get_buffer(revmap, heapBlk);
+	LockBuffer(rmBuf, BUFFER_LOCK_EXCLUSIVE);
+
+	return rmBuf;
+}
+
+/*
+ * In the given revmap buffer (locked appropriately by caller), which is used
+ * in a BRIN index of pagesPerRange pages per range, set the element
+ * corresponding to heap block number heapBlk to the given TID.
+ *
+ * Once the operation is complete, the caller must update the LSN on the
+ * returned buffer.
+ *
+ * This is used both in regular operation and during WAL replay.
+ */
+void
+brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange,
+						BlockNumber heapBlk, ItemPointerData tid)
+{
+	RevmapContents *contents;
+	ItemPointerData *iptr;
+	Page		page;
+
+	/* The correct page should already be pinned and locked */
+	page = BufferGetPage(buf);
+	contents = (RevmapContents *) PageGetContents(page);
+	iptr = (ItemPointerData *) contents->rm_tids;
+	iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk);
+
+	if (ItemPointerIsValid(&tid))
+		ItemPointerSet(iptr,
+					   ItemPointerGetBlockNumber(&tid),
+					   ItemPointerGetOffsetNumber(&tid));
+	else
+		ItemPointerSetInvalid(iptr);
+}
+
+/*
+ * Fetch the BrinTuple for a given heap block.
+ *
+ * The buffer containing the tuple is locked, and returned in *buf.  The
+ * returned tuple points to the shared buffer and must not be freed; if caller
+ * wants to use it after releasing the buffer lock, it must create its own
+ * palloc'ed copy.  As an optimization, the caller can pass a pinned buffer
+ * *buf on entry, which will avoid a pin-unpin cycle when the next tuple is on
+ * the same page as a previous one.
+ *
+ * If no tuple is found for the given heap range, returns NULL. In that case,
+ * *buf might still be updated (and pin must be released by caller), but it's
+ * not locked.
+ *
+ * The output tuple offset within the buffer is returned in *off, and its size
+ * is returned in *size.
+ */
+BrinTuple *
+brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
+						 Buffer *buf, OffsetNumber *off, Size *size, int mode,
+						 Snapshot snapshot)
+{
+	Relation	idxRel = revmap->rm_irel;
+	BlockNumber mapBlk;
+	RevmapContents *contents;
+	ItemPointerData *iptr;
+	BlockNumber blk;
+	Page		page;
+	ItemId		lp;
+	BrinTuple  *tup;
+	ItemPointerData previptr;
+
+	/* normalize the heap block number to be the first page in the range */
+	heapBlk = (heapBlk / revmap->rm_pagesPerRange) * revmap->rm_pagesPerRange;
+
+	/*
+	 * Compute the revmap page number we need.  If Invalid is returned (i.e.,
+	 * the revmap page hasn't been created yet), the requested page range is
+	 * not summarized.
+	 */
+	mapBlk = revmap_get_blkno(revmap, heapBlk);
+	if (mapBlk == InvalidBlockNumber)
+	{
+		*off = InvalidOffsetNumber;
+		return NULL;
+	}
+
+	ItemPointerSetInvalid(&previptr);
+	for (;;)
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		if (revmap->rm_currBuf == InvalidBuffer ||
+			BufferGetBlockNumber(revmap->rm_currBuf) != mapBlk)
+		{
+			if (revmap->rm_currBuf != InvalidBuffer)
+				ReleaseBuffer(revmap->rm_currBuf);
+
+			Assert(mapBlk != InvalidBlockNumber);
+			revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk);
+		}
+
+		LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE);
+
+		contents = (RevmapContents *)
+			PageGetContents(BufferGetPage(revmap->rm_currBuf));
+		iptr = contents->rm_tids;
+		iptr += HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk);
+
+		if (!ItemPointerIsValid(iptr))
+		{
+			LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_UNLOCK);
+			return NULL;
+		}
+
+		/*
+		 * Check the TID we got in a previous iteration, if any, and save the
+		 * current TID we got from the revmap; if we loop, we can sanity-check
+		 * that the next one we get is different.  Otherwise we might be stuck
+		 * looping forever if the revmap is somehow badly broken.
+		 */
+		if (ItemPointerIsValid(&previptr) && ItemPointerEquals(&previptr, iptr))
+			ereport(ERROR,
+					(errcode(ERRCODE_INDEX_CORRUPTED),
+					 errmsg_internal("corrupted BRIN index: inconsistent range map")));
+		previptr = *iptr;
+
+		blk = ItemPointerGetBlockNumber(iptr);
+		*off = ItemPointerGetOffsetNumber(iptr);
+
+		LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_UNLOCK);
+
+		/* Ok, got a pointer to where the BrinTuple should be. Fetch it. */
+		if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != blk)
+		{
+			if (BufferIsValid(*buf))
+				ReleaseBuffer(*buf);
+			*buf = ReadBuffer(idxRel, blk);
+		}
+		LockBuffer(*buf, mode);
+		page = BufferGetPage(*buf);
+		TestForOldSnapshot(snapshot, idxRel, page);
+
+		/* If we land on a revmap page, start over */
+		if (BRIN_IS_REGULAR_PAGE(page))
+		{
+			/*
+			 * If the offset number is greater than what's in the page, it's
+			 * possible that the range was desummarized concurrently. Just
+			 * return NULL to handle that case.
+			 */
+			if (*off > PageGetMaxOffsetNumber(page))
+			{
+				LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
+				return NULL;
+			}
+
+			lp = PageGetItemId(page, *off);
+			if (ItemIdIsUsed(lp))
+			{
+				tup = (BrinTuple *) PageGetItem(page, lp);
+
+				if (tup->bt_blkno == heapBlk)
+				{
+					if (size)
+						*size = ItemIdGetLength(lp);
+					/* found it! */
+					return tup;
+				}
+			}
+		}
+
+		/*
+		 * No luck. Assume that the revmap was updated concurrently.
+		 */
+		LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
+	}
+	/* not reached, but keep compiler quiet */
+	return NULL;
+}
+
+/*
+ * Delete an index tuple, marking a page range as unsummarized.
+ *
+ * Index must be locked in ShareUpdateExclusiveLock mode.
+ *
+ * Return false if caller should retry.
+ */
+bool
+brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
+{
+	BrinRevmap *revmap;
+	BlockNumber pagesPerRange;
+	RevmapContents *contents;
+	ItemPointerData *iptr;
+	ItemPointerData invalidIptr;
+	BlockNumber revmapBlk;
+	Buffer		revmapBuf;
+	Buffer		regBuf;
+	Page		revmapPg;
+	Page		regPg;
+	OffsetNumber revmapOffset;
+	OffsetNumber regOffset;
+	ItemId		lp;
+
+	revmap = brinRevmapInitialize(idxrel, &pagesPerRange, NULL);
+
+	revmapBlk = revmap_get_blkno(revmap, heapBlk);
+	if (!BlockNumberIsValid(revmapBlk))
+	{
+		/* revmap page doesn't exist: range not summarized, we're done */
+		brinRevmapTerminate(revmap);
+		return true;
+	}
+
+	/* Lock the revmap page, obtain the index tuple pointer from it */
+	revmapBuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
+	revmapPg = BufferGetPage(revmapBuf);
+	revmapOffset = HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk);
+
+	contents = (RevmapContents *) PageGetContents(revmapPg);
+	iptr = contents->rm_tids;
+	iptr += revmapOffset;
+
+	if (!ItemPointerIsValid(iptr))
+	{
+		/* no index tuple: range not summarized, we're done */
+		LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
+		brinRevmapTerminate(revmap);
+		return true;
+	}
+
+	regBuf = ReadBuffer(idxrel, ItemPointerGetBlockNumber(iptr));
+	LockBuffer(regBuf, BUFFER_LOCK_EXCLUSIVE);
+	regPg = BufferGetPage(regBuf);
+
+	/*
+	 * We're only removing data, not reading it, so there's no need to
+	 * TestForOldSnapshot here.
+	 */
+
+	/* if this is no longer a regular page, tell caller to start over */
+	if (!BRIN_IS_REGULAR_PAGE(regPg))
+	{
+		LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
+		LockBuffer(regBuf, BUFFER_LOCK_UNLOCK);
+		brinRevmapTerminate(revmap);
+		return false;
+	}
+
+	regOffset = ItemPointerGetOffsetNumber(iptr);
+	if (regOffset > PageGetMaxOffsetNumber(regPg))
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("corrupted BRIN index: inconsistent range map")));
+
+	lp = PageGetItemId(regPg, regOffset);
+	if (!ItemIdIsUsed(lp))
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("corrupted BRIN index: inconsistent range map")));
+
+	/*
+	 * Placeholder tuples only appear during unfinished summarization, and we
+	 * hold ShareUpdateExclusiveLock, so this function cannot run concurrently
+	 * with that.  So any placeholder tuples that exist are leftovers from a
+	 * crashed or aborted summarization; remove them silently.
+	 */
+
+	START_CRIT_SECTION();
+
+	ItemPointerSetInvalid(&invalidIptr);
+	brinSetHeapBlockItemptr(revmapBuf, revmap->rm_pagesPerRange, heapBlk,
+							invalidIptr);
+	PageIndexTupleDeleteNoCompact(regPg, regOffset);
+	/* XXX record free space in FSM? */
+
+	MarkBufferDirty(regBuf);
+	MarkBufferDirty(revmapBuf);
+
+	if (RelationNeedsWAL(idxrel))
+	{
+		xl_brin_desummarize xlrec;
+		XLogRecPtr	recptr;
+
+		xlrec.pagesPerRange = revmap->rm_pagesPerRange;
+		xlrec.heapBlk = heapBlk;
+		xlrec.regOffset = regOffset;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfBrinDesummarize);
+		XLogRegisterBuffer(0, revmapBuf, 0);
+		XLogRegisterBuffer(1, regBuf, REGBUF_STANDARD);
+		recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_DESUMMARIZE);
+		PageSetLSN(revmapPg, recptr);
+		PageSetLSN(regPg, recptr);
+	}
+
+	END_CRIT_SECTION();
+
+	UnlockReleaseBuffer(regBuf);
+	LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
+	brinRevmapTerminate(revmap);
+
+	return true;
+}
+
+/*
+ * Given a heap block number, find the corresponding physical revmap block
+ * number and return it.  If the revmap page hasn't been allocated yet, return
+ * InvalidBlockNumber.
+ */
+static BlockNumber
+revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk)
+{
+	BlockNumber targetblk;
+
+	/* obtain revmap block number, skip 1 for metapage block */
+	targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1;
+
+	/* Normal case: the revmap page is already allocated */
+	if (targetblk <= revmap->rm_lastRevmapPage)
+		return targetblk;
+
+	return InvalidBlockNumber;
+}
+
+/*
+ * Obtain and return a buffer containing the revmap page for the given heap
+ * page.  The revmap must have been previously extended to cover that page.
+ * The returned buffer is also recorded in the revmap struct; finishing that
+ * releases the buffer, therefore the caller needn't do it explicitly.
+ */
+static Buffer
+revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk)
+{
+	BlockNumber mapBlk;
+
+	/* Translate the heap block number to physical index location. */
+	mapBlk = revmap_get_blkno(revmap, heapBlk);
+
+	if (mapBlk == InvalidBlockNumber)
+		elog(ERROR, "revmap does not cover heap block %u", heapBlk);
+
+	/* Ensure the buffer we got is in the expected range */
+	Assert(mapBlk != BRIN_METAPAGE_BLKNO &&
+		   mapBlk <= revmap->rm_lastRevmapPage);
+
+	/*
+	 * Obtain the buffer from which we need to read.  If we already have the
+	 * correct buffer in our access struct, use that; otherwise, release that,
+	 * (if valid) and read the one we need.
+	 */
+	if (revmap->rm_currBuf == InvalidBuffer ||
+		mapBlk != BufferGetBlockNumber(revmap->rm_currBuf))
+	{
+		if (revmap->rm_currBuf != InvalidBuffer)
+			ReleaseBuffer(revmap->rm_currBuf);
+
+		revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk);
+	}
+
+	return revmap->rm_currBuf;
+}
+
+/*
+ * Given a heap block number, find the corresponding physical revmap block
+ * number and return it. If the revmap page hasn't been allocated yet, extend
+ * the revmap until it is.
+ */
+static BlockNumber
+revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk)
+{
+	BlockNumber targetblk;
+
+	/* obtain revmap block number, skip 1 for metapage block */
+	targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1;
+
+	/* Extend the revmap, if necessary */
+	while (targetblk > revmap->rm_lastRevmapPage)
+	{
+		CHECK_FOR_INTERRUPTS();
+		revmap_physical_extend(revmap);
+	}
+
+	return targetblk;
+}
+
+/*
+ * Try to extend the revmap by one page.  This might not happen for a number of
+ * reasons; caller is expected to retry until the expected outcome is obtained.
+ */
+static void
+revmap_physical_extend(BrinRevmap *revmap)
+{
+	Buffer		buf;
+	Page		page;
+	Page		metapage;
+	BrinMetaPageData *metadata;
+	BlockNumber mapBlk;
+	BlockNumber nblocks;
+	Relation	irel = revmap->rm_irel;
+	bool		needLock = !RELATION_IS_LOCAL(irel);
+
+	/*
+	 * Lock the metapage. This locks out concurrent extensions of the revmap,
+	 * but note that we still need to grab the relation extension lock because
+	 * another backend can extend the index with regular BRIN pages.
+	 */
+	LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_EXCLUSIVE);
+	metapage = BufferGetPage(revmap->rm_metaBuf);
+	metadata = (BrinMetaPageData *) PageGetContents(metapage);
+
+	/*
+	 * Check that our cached lastRevmapPage value was up-to-date; if it
+	 * wasn't, update the cached copy and have caller start over.
+	 */
+	if (metadata->lastRevmapPage != revmap->rm_lastRevmapPage)
+	{
+		revmap->rm_lastRevmapPage = metadata->lastRevmapPage;
+		LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK);
+		return;
+	}
+	mapBlk = metadata->lastRevmapPage + 1;
+
+	nblocks = RelationGetNumberOfBlocks(irel);
+	if (mapBlk < nblocks)
+	{
+		buf = ReadBuffer(irel, mapBlk);
+		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+		page = BufferGetPage(buf);
+	}
+	else
+	{
+		if (needLock)
+			LockRelationForExtension(irel, ExclusiveLock);
+
+		buf = ReadBuffer(irel, P_NEW);
+		if (BufferGetBlockNumber(buf) != mapBlk)
+		{
+			/*
+			 * Very rare corner case: somebody extended the relation
+			 * concurrently after we read its length.  If this happens, give
+			 * up and have caller start over.  We will have to evacuate that
+			 * page from under whoever is using it.
+			 */
+			if (needLock)
+				UnlockRelationForExtension(irel, ExclusiveLock);
+			LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK);
+			ReleaseBuffer(buf);
+			return;
+		}
+		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+		page = BufferGetPage(buf);
+
+		if (needLock)
+			UnlockRelationForExtension(irel, ExclusiveLock);
+	}
+
+	/* Check that it's a regular block (or an empty page) */
+	if (!PageIsNew(page) && !BRIN_IS_REGULAR_PAGE(page))
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("unexpected page type 0x%04X in BRIN index \"%s\" block %u",
+						BrinPageType(page),
+						RelationGetRelationName(irel),
+						BufferGetBlockNumber(buf))));
+
+	/* If the page is in use, evacuate it and restart */
+	if (brin_start_evacuating_page(irel, buf))
+	{
+		LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK);
+		brin_evacuate_page(irel, revmap->rm_pagesPerRange, revmap, buf);
+
+		/* have caller start over */
+		return;
+	}
+
+	/*
+	 * Ok, we have now locked the metapage and the target block. Re-initialize
+	 * the target block as a revmap page, and update the metapage.
+	 */
+	START_CRIT_SECTION();
+
+	/* the rm_tids array is initialized to all invalid by PageInit */
+	brin_page_init(page, BRIN_PAGETYPE_REVMAP);
+	MarkBufferDirty(buf);
+
+	metadata->lastRevmapPage = mapBlk;
+
+	/*
+	 * Set pd_lower just past the end of the metadata.  This is essential,
+	 * because without doing so, metadata will be lost if xlog.c compresses
+	 * the page.  (We must do this here because pre-v11 versions of PG did not
+	 * set the metapage's pd_lower correctly, so a pg_upgraded index might
+	 * contain the wrong value.)
+	 */
+	((PageHeader) metapage)->pd_lower =
+		((char *) metadata + sizeof(BrinMetaPageData)) - (char *) metapage;
+
+	MarkBufferDirty(revmap->rm_metaBuf);
+
+	if (RelationNeedsWAL(revmap->rm_irel))
+	{
+		xl_brin_revmap_extend xlrec;
+		XLogRecPtr	recptr;
+
+		xlrec.targetBlk = mapBlk;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend);
+		XLogRegisterBuffer(0, revmap->rm_metaBuf, REGBUF_STANDARD);
+
+		XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT);
+
+		recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND);
+		PageSetLSN(metapage, recptr);
+		PageSetLSN(page, recptr);
+	}
+
+	END_CRIT_SECTION();
+
+	LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK);
+
+	UnlockReleaseBuffer(buf);
+}
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c
new file mode 100644
index 0000000..4a5a4da
--- /dev/null
+++ b/src/backend/access/brin/brin_tuple.c
@@ -0,0 +1,720 @@
+/*
+ * brin_tuple.c
+ *		Method implementations for tuples in BRIN indexes.
+ *
+ * Intended usage is that code outside this file only deals with
+ * BrinMemTuples, and convert to and from the on-disk representation through
+ * functions in this file.
+ *
+ * NOTES
+ *
+ * A BRIN tuple is similar to a heap tuple, with a few key differences.  The
+ * first interesting difference is that the tuple header is much simpler, only
+ * containing its total length and a small area for flags.  Also, the stored
+ * data does not match the relation tuple descriptor exactly: for each
+ * attribute in the descriptor, the index tuple carries an arbitrary number
+ * of values, depending on the opclass.
+ *
+ * Also, for each column of the index relation there are two null bits: one
+ * (hasnulls) stores whether any tuple within the page range has that column
+ * set to null; the other one (allnulls) stores whether the column values are
+ * all null.  If allnulls is true, then the tuple data area does not contain
+ * values for that column at all; whereas it does if the hasnulls is set.
+ * Note the size of the null bitmask may not be the same as that of the
+ * datum array.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/brin/brin_tuple.c
+ */
+#include "postgres.h"
+
+#include "access/brin_tuple.h"
+#include "access/detoast.h"
+#include "access/heaptoast.h"
+#include "access/htup_details.h"
+#include "access/toast_internals.h"
+#include "access/tupdesc.h"
+#include "access/tupmacs.h"
+#include "utils/datum.h"
+#include "utils/memutils.h"
+
+
+/*
+ * This enables de-toasting of index entries.  Needed until VACUUM is
+ * smart enough to rebuild indexes from scratch.
+ */
+#define TOAST_INDEX_HACK
+
+
+static inline void brin_deconstruct_tuple(BrinDesc *brdesc,
+										  char *tp, bits8 *nullbits, bool nulls,
+										  Datum *values, bool *allnulls, bool *hasnulls);
+
+
+/*
+ * Return a tuple descriptor used for on-disk storage of BRIN tuples.
+ */
+static TupleDesc
+brtuple_disk_tupdesc(BrinDesc *brdesc)
+{
+	/* We cache these in the BrinDesc */
+	if (brdesc->bd_disktdesc == NULL)
+	{
+		int			i;
+		int			j;
+		AttrNumber	attno = 1;
+		TupleDesc	tupdesc;
+		MemoryContext oldcxt;
+
+		/* make sure it's in the bdesc's context */
+		oldcxt = MemoryContextSwitchTo(brdesc->bd_context);
+
+		tupdesc = CreateTemplateTupleDesc(brdesc->bd_totalstored);
+
+		for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
+		{
+			for (j = 0; j < brdesc->bd_info[i]->oi_nstored; j++)
+				TupleDescInitEntry(tupdesc, attno++, NULL,
+								   brdesc->bd_info[i]->oi_typcache[j]->type_id,
+								   -1, 0);
+		}
+
+		MemoryContextSwitchTo(oldcxt);
+
+		brdesc->bd_disktdesc = tupdesc;
+	}
+
+	return brdesc->bd_disktdesc;
+}
+
+/*
+ * Generate a new on-disk tuple to be inserted in a BRIN index.
+ *
+ * See brin_form_placeholder_tuple if you touch this.
+ */
+BrinTuple *
+brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
+				Size *size)
+{
+	Datum	   *values;
+	bool	   *nulls;
+	bool		anynulls = false;
+	BrinTuple  *rettuple;
+	int			keyno;
+	int			idxattno;
+	uint16		phony_infomask = 0;
+	bits8	   *phony_nullbitmap;
+	Size		len,
+				hoff,
+				data_len;
+	int			i;
+
+#ifdef TOAST_INDEX_HACK
+	Datum	   *untoasted_values;
+	int			nuntoasted = 0;
+#endif
+
+	Assert(brdesc->bd_totalstored > 0);
+
+	values = (Datum *) palloc(sizeof(Datum) * brdesc->bd_totalstored);
+	nulls = (bool *) palloc0(sizeof(bool) * brdesc->bd_totalstored);
+	phony_nullbitmap = (bits8 *)
+		palloc(sizeof(bits8) * BITMAPLEN(brdesc->bd_totalstored));
+
+#ifdef TOAST_INDEX_HACK
+	untoasted_values = (Datum *) palloc(sizeof(Datum) * brdesc->bd_totalstored);
+#endif
+
+	/*
+	 * Set up the values/nulls arrays for heap_fill_tuple
+	 */
+	idxattno = 0;
+	for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
+	{
+		int			datumno;
+
+		/*
+		 * "allnulls" is set when there's no nonnull value in any row in the
+		 * column; when this happens, there is no data to store.  Thus set the
+		 * nullable bits for all data elements of this column and we're done.
+		 */
+		if (tuple->bt_columns[keyno].bv_allnulls)
+		{
+			for (datumno = 0;
+				 datumno < brdesc->bd_info[keyno]->oi_nstored;
+				 datumno++)
+				nulls[idxattno++] = true;
+			anynulls = true;
+			continue;
+		}
+
+		/*
+		 * The "hasnulls" bit is set when there are some null values in the
+		 * data.  We still need to store a real value, but the presence of
+		 * this means we need a null bitmap.
+		 */
+		if (tuple->bt_columns[keyno].bv_hasnulls)
+			anynulls = true;
+
+		/* If needed, serialize the values before forming the on-disk tuple. */
+		if (tuple->bt_columns[keyno].bv_serialize)
+		{
+			tuple->bt_columns[keyno].bv_serialize(brdesc,
+												  tuple->bt_columns[keyno].bv_mem_value,
+												  tuple->bt_columns[keyno].bv_values);
+		}
+
+		/*
+		 * Now obtain the values of each stored datum.  Note that some values
+		 * might be toasted, and we cannot rely on the original heap values
+		 * sticking around forever, so we must detoast them.  Also try to
+		 * compress them.
+		 */
+		for (datumno = 0;
+			 datumno < brdesc->bd_info[keyno]->oi_nstored;
+			 datumno++)
+		{
+			Datum		value = tuple->bt_columns[keyno].bv_values[datumno];
+
+#ifdef TOAST_INDEX_HACK
+
+			/* We must look at the stored type, not at the index descriptor. */
+			TypeCacheEntry *atttype = brdesc->bd_info[keyno]->oi_typcache[datumno];
+
+			/* Do we need to free the value at the end? */
+			bool		free_value = false;
+
+			/* For non-varlena types we don't need to do anything special */
+			if (atttype->typlen != -1)
+			{
+				values[idxattno++] = value;
+				continue;
+			}
+
+			/*
+			 * Do nothing if value is not of varlena type. We don't need to
+			 * care about NULL values here, thanks to bv_allnulls above.
+			 *
+			 * If value is stored EXTERNAL, must fetch it so we are not
+			 * depending on outside storage.
+			 *
+			 * XXX Is this actually true? Could it be that the summary is NULL
+			 * even for range with non-NULL data? E.g. degenerate bloom filter
+			 * may be thrown away, etc.
+			 */
+			if (VARATT_IS_EXTERNAL(DatumGetPointer(value)))
+			{
+				value = PointerGetDatum(detoast_external_attr((struct varlena *)
+															  DatumGetPointer(value)));
+				free_value = true;
+			}
+
+			/*
+			 * If value is above size target, and is of a compressible
+			 * datatype, try to compress it in-line.
+			 */
+			if (!VARATT_IS_EXTENDED(DatumGetPointer(value)) &&
+				VARSIZE(DatumGetPointer(value)) > TOAST_INDEX_TARGET &&
+				(atttype->typstorage == TYPSTORAGE_EXTENDED ||
+				 atttype->typstorage == TYPSTORAGE_MAIN))
+			{
+				Datum		cvalue;
+				char		compression;
+				Form_pg_attribute att = TupleDescAttr(brdesc->bd_tupdesc,
+													  keyno);
+
+				/*
+				 * If the BRIN summary and indexed attribute use the same data
+				 * type and it has a valid compression method, we can use the
+				 * same compression method. Otherwise we have to use the
+				 * default method.
+				 */
+				if (att->atttypid == atttype->type_id)
+					compression = att->attcompression;
+				else
+					compression = InvalidCompressionMethod;
+
+				cvalue = toast_compress_datum(value, compression);
+
+				if (DatumGetPointer(cvalue) != NULL)
+				{
+					/* successful compression */
+					if (free_value)
+						pfree(DatumGetPointer(value));
+
+					value = cvalue;
+					free_value = true;
+				}
+			}
+
+			/*
+			 * If we untoasted / compressed the value, we need to free it
+			 * after forming the index tuple.
+			 */
+			if (free_value)
+				untoasted_values[nuntoasted++] = value;
+
+#endif
+
+			values[idxattno++] = value;
+		}
+	}
+
+	/* Assert we did not overrun temp arrays */
+	Assert(idxattno <= brdesc->bd_totalstored);
+
+	/* compute total space needed */
+	len = SizeOfBrinTuple;
+	if (anynulls)
+	{
+		/*
+		 * We need a double-length bitmap on an on-disk BRIN index tuple; the
+		 * first half stores the "allnulls" bits, the second stores
+		 * "hasnulls".
+		 */
+		len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2);
+	}
+
+	len = hoff = MAXALIGN(len);
+
+	data_len = heap_compute_data_size(brtuple_disk_tupdesc(brdesc),
+									  values, nulls);
+	len += data_len;
+
+	len = MAXALIGN(len);
+
+	rettuple = palloc0(len);
+	rettuple->bt_blkno = blkno;
+	rettuple->bt_info = hoff;
+
+	/* Assert that hoff fits in the space available */
+	Assert((rettuple->bt_info & BRIN_OFFSET_MASK) == hoff);
+
+	/*
+	 * The infomask and null bitmap as computed by heap_fill_tuple are useless
+	 * to us.  However, that function will not accept a null infomask; and we
+	 * need to pass a valid null bitmap so that it will correctly skip
+	 * outputting null attributes in the data area.
+	 */
+	heap_fill_tuple(brtuple_disk_tupdesc(brdesc),
+					values,
+					nulls,
+					(char *) rettuple + hoff,
+					data_len,
+					&phony_infomask,
+					phony_nullbitmap);
+
+	/* done with these */
+	pfree(values);
+	pfree(nulls);
+	pfree(phony_nullbitmap);
+
+#ifdef TOAST_INDEX_HACK
+	for (i = 0; i < nuntoasted; i++)
+		pfree(DatumGetPointer(untoasted_values[i]));
+#endif
+
+	/*
+	 * Now fill in the real null bitmasks.  allnulls first.
+	 */
+	if (anynulls)
+	{
+		bits8	   *bitP;
+		int			bitmask;
+
+		rettuple->bt_info |= BRIN_NULLS_MASK;
+
+		/*
+		 * Note that we reverse the sense of null bits in this module: we
+		 * store a 1 for a null attribute rather than a 0.  So we must reverse
+		 * the sense of the att_isnull test in brin_deconstruct_tuple as well.
+		 */
+		bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1;
+		bitmask = HIGHBIT;
+		for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
+		{
+			if (bitmask != HIGHBIT)
+				bitmask <<= 1;
+			else
+			{
+				bitP += 1;
+				*bitP = 0x0;
+				bitmask = 1;
+			}
+
+			if (!tuple->bt_columns[keyno].bv_allnulls)
+				continue;
+
+			*bitP |= bitmask;
+		}
+		/* hasnulls bits follow */
+		for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
+		{
+			if (bitmask != HIGHBIT)
+				bitmask <<= 1;
+			else
+			{
+				bitP += 1;
+				*bitP = 0x0;
+				bitmask = 1;
+			}
+
+			if (!tuple->bt_columns[keyno].bv_hasnulls)
+				continue;
+
+			*bitP |= bitmask;
+		}
+	}
+
+	if (tuple->bt_placeholder)
+		rettuple->bt_info |= BRIN_PLACEHOLDER_MASK;
+
+	if (tuple->bt_empty_range)
+		rettuple->bt_info |= BRIN_EMPTY_RANGE_MASK;
+
+	*size = len;
+	return rettuple;
+}
+
+/*
+ * Generate a new on-disk tuple with no data values, marked as placeholder.
+ *
+ * This is a cut-down version of brin_form_tuple.
+ */
+BrinTuple *
+brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size)
+{
+	Size		len;
+	Size		hoff;
+	BrinTuple  *rettuple;
+	int			keyno;
+	bits8	   *bitP;
+	int			bitmask;
+
+	/* compute total space needed: always add nulls */
+	len = SizeOfBrinTuple;
+	len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2);
+	len = hoff = MAXALIGN(len);
+
+	rettuple = palloc0(len);
+	rettuple->bt_blkno = blkno;
+	rettuple->bt_info = hoff;
+	rettuple->bt_info |= BRIN_NULLS_MASK | BRIN_PLACEHOLDER_MASK | BRIN_EMPTY_RANGE_MASK;
+
+	bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1;
+	bitmask = HIGHBIT;
+	/* set allnulls true for all attributes */
+	for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
+	{
+		if (bitmask != HIGHBIT)
+			bitmask <<= 1;
+		else
+		{
+			bitP += 1;
+			*bitP = 0x0;
+			bitmask = 1;
+		}
+
+		*bitP |= bitmask;
+	}
+	/* no need to set hasnulls */
+
+	*size = len;
+	return rettuple;
+}
+
+/*
+ * Free a tuple created by brin_form_tuple
+ */
+void
+brin_free_tuple(BrinTuple *tuple)
+{
+	pfree(tuple);
+}
+
+/*
+ * Given a brin tuple of size len, create a copy of it.  If 'dest' is not
+ * NULL, its size is destsz, and can be used as output buffer; if the tuple
+ * to be copied does not fit, it is enlarged by repalloc, and the size is
+ * updated to match.  This avoids palloc/free cycles when many brin tuples
+ * are being processed in loops.
+ */
+BrinTuple *
+brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
+{
+	if (!destsz || *destsz == 0)
+		dest = palloc(len);
+	else if (len > *destsz)
+	{
+		dest = repalloc(dest, len);
+		*destsz = len;
+	}
+
+	memcpy(dest, tuple, len);
+
+	return dest;
+}
+
+/*
+ * Return whether two BrinTuples are bitwise identical.
+ */
+bool
+brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen)
+{
+	if (alen != blen)
+		return false;
+	if (memcmp(a, b, alen) != 0)
+		return false;
+	return true;
+}
+
+/*
+ * Create a new BrinMemTuple from scratch, and initialize it to an empty
+ * state.
+ *
+ * Note: we don't provide any means to free a deformed tuple, so make sure to
+ * use a temporary memory context.
+ */
+BrinMemTuple *
+brin_new_memtuple(BrinDesc *brdesc)
+{
+	BrinMemTuple *dtup;
+	long		basesize;
+
+	basesize = MAXALIGN(sizeof(BrinMemTuple) +
+						sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
+	dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored);
+
+	dtup->bt_values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
+	dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
+	dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
+
+	dtup->bt_empty_range = true;
+
+	dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext,
+											 "brin dtuple",
+											 ALLOCSET_DEFAULT_SIZES);
+
+	brin_memtuple_initialize(dtup, brdesc);
+
+	return dtup;
+}
+
+/*
+ * Reset a BrinMemTuple to initial state.  We return the same tuple, for
+ * notational convenience.
+ */
+BrinMemTuple *
+brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
+{
+	int			i;
+	char	   *currdatum;
+
+	MemoryContextReset(dtuple->bt_context);
+
+	currdatum = (char *) dtuple +
+		MAXALIGN(sizeof(BrinMemTuple) +
+				 sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
+	for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
+	{
+		dtuple->bt_columns[i].bv_attno = i + 1;
+		dtuple->bt_columns[i].bv_allnulls = true;
+		dtuple->bt_columns[i].bv_hasnulls = false;
+		dtuple->bt_columns[i].bv_values = (Datum *) currdatum;
+
+		dtuple->bt_columns[i].bv_mem_value = PointerGetDatum(NULL);
+		dtuple->bt_columns[i].bv_serialize = NULL;
+		dtuple->bt_columns[i].bv_context = dtuple->bt_context;
+
+		currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
+	}
+
+	dtuple->bt_empty_range = true;
+
+	return dtuple;
+}
+
+/*
+ * Convert a BrinTuple back to a BrinMemTuple.  This is the reverse of
+ * brin_form_tuple.
+ *
+ * As an optimization, the caller can pass a previously allocated 'dMemtuple'.
+ * This avoids having to allocate it here, which can be useful when this
+ * function is called many times in a loop.  It is caller's responsibility
+ * that the given BrinMemTuple matches what we need here.
+ *
+ * Note we don't need the "on disk tupdesc" here; we rely on our own routine to
+ * deconstruct the tuple from the on-disk format.
+ */
+BrinMemTuple *
+brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
+{
+	BrinMemTuple *dtup;
+	Datum	   *values;
+	bool	   *allnulls;
+	bool	   *hasnulls;
+	char	   *tp;
+	bits8	   *nullbits;
+	int			keyno;
+	int			valueno;
+	MemoryContext oldcxt;
+
+	dtup = dMemtuple ? brin_memtuple_initialize(dMemtuple, brdesc) :
+		brin_new_memtuple(brdesc);
+
+	if (BrinTupleIsPlaceholder(tuple))
+		dtup->bt_placeholder = true;
+
+	/* ranges start as empty, depends on the BrinTuple */
+	if (!BrinTupleIsEmptyRange(tuple))
+		dtup->bt_empty_range = false;
+
+	dtup->bt_blkno = tuple->bt_blkno;
+
+	values = dtup->bt_values;
+	allnulls = dtup->bt_allnulls;
+	hasnulls = dtup->bt_hasnulls;
+
+	tp = (char *) tuple + BrinTupleDataOffset(tuple);
+
+	if (BrinTupleHasNulls(tuple))
+		nullbits = (bits8 *) ((char *) tuple + SizeOfBrinTuple);
+	else
+		nullbits = NULL;
+	brin_deconstruct_tuple(brdesc,
+						   tp, nullbits, BrinTupleHasNulls(tuple),
+						   values, allnulls, hasnulls);
+
+	/*
+	 * Iterate to assign each of the values to the corresponding item in the
+	 * values array of each column.  The copies occur in the tuple's context.
+	 */
+	oldcxt = MemoryContextSwitchTo(dtup->bt_context);
+	for (valueno = 0, keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
+	{
+		int			i;
+
+		if (allnulls[keyno])
+		{
+			valueno += brdesc->bd_info[keyno]->oi_nstored;
+			continue;
+		}
+
+		/*
+		 * We would like to skip datumCopy'ing the values datum in some cases,
+		 * caller permitting ...
+		 */
+		for (i = 0; i < brdesc->bd_info[keyno]->oi_nstored; i++)
+			dtup->bt_columns[keyno].bv_values[i] =
+				datumCopy(values[valueno++],
+						  brdesc->bd_info[keyno]->oi_typcache[i]->typbyval,
+						  brdesc->bd_info[keyno]->oi_typcache[i]->typlen);
+
+		dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno];
+		dtup->bt_columns[keyno].bv_allnulls = false;
+
+		dtup->bt_columns[keyno].bv_mem_value = PointerGetDatum(NULL);
+		dtup->bt_columns[keyno].bv_serialize = NULL;
+		dtup->bt_columns[keyno].bv_context = dtup->bt_context;
+	}
+
+	MemoryContextSwitchTo(oldcxt);
+
+	return dtup;
+}
+
+/*
+ * brin_deconstruct_tuple
+ *		Guts of attribute extraction from an on-disk BRIN tuple.
+ *
+ * Its arguments are:
+ *	brdesc		BRIN descriptor for the stored tuple
+ *	tp			pointer to the tuple data area
+ *	nullbits	pointer to the tuple nulls bitmask
+ *	nulls		"has nulls" bit in tuple infomask
+ *	values		output values, array of size brdesc->bd_totalstored
+ *	allnulls	output "allnulls", size brdesc->bd_tupdesc->natts
+ *	hasnulls	output "hasnulls", size brdesc->bd_tupdesc->natts
+ *
+ * Output arrays must have been allocated by caller.
+ */
+static inline void
+brin_deconstruct_tuple(BrinDesc *brdesc,
+					   char *tp, bits8 *nullbits, bool nulls,
+					   Datum *values, bool *allnulls, bool *hasnulls)
+{
+	int			attnum;
+	int			stored;
+	TupleDesc	diskdsc;
+	long		off;
+
+	/*
+	 * First iterate to natts to obtain both null flags for each attribute.
+	 * Note that we reverse the sense of the att_isnull test, because we store
+	 * 1 for a null value (rather than a 1 for a not null value as is the
+	 * att_isnull convention used elsewhere.)  See brin_form_tuple.
+	 */
+	for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++)
+	{
+		/*
+		 * the "all nulls" bit means that all values in the page range for
+		 * this column are nulls.  Therefore there are no values in the tuple
+		 * data area.
+		 */
+		allnulls[attnum] = nulls && !att_isnull(attnum, nullbits);
+
+		/*
+		 * the "has nulls" bit means that some tuples have nulls, but others
+		 * have not-null values.  Therefore we know the tuple contains data
+		 * for this column.
+		 *
+		 * The hasnulls bits follow the allnulls bits in the same bitmask.
+		 */
+		hasnulls[attnum] =
+			nulls && !att_isnull(brdesc->bd_tupdesc->natts + attnum, nullbits);
+	}
+
+	/*
+	 * Iterate to obtain each attribute's stored values.  Note that since we
+	 * may reuse attribute entries for more than one column, we cannot cache
+	 * offsets here.
+	 */
+	diskdsc = brtuple_disk_tupdesc(brdesc);
+	stored = 0;
+	off = 0;
+	for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++)
+	{
+		int			datumno;
+
+		if (allnulls[attnum])
+		{
+			stored += brdesc->bd_info[attnum]->oi_nstored;
+			continue;
+		}
+
+		for (datumno = 0;
+			 datumno < brdesc->bd_info[attnum]->oi_nstored;
+			 datumno++)
+		{
+			Form_pg_attribute thisatt = TupleDescAttr(diskdsc, stored);
+
+			if (thisatt->attlen == -1)
+			{
+				off = att_align_pointer(off, thisatt->attalign, -1,
+										tp + off);
+			}
+			else
+			{
+				/* not varlena, so safe to use att_align_nominal */
+				off = att_align_nominal(off, thisatt->attalign);
+			}
+
+			values[stored++] = fetchatt(thisatt, tp + off);
+
+			off = att_addlength_pointer(off, thisatt->attlen, tp + off);
+		}
+	}
+}
diff --git a/src/backend/access/brin/brin_validate.c b/src/backend/access/brin/brin_validate.c
new file mode 100644
index 0000000..c54c874
--- /dev/null
+++ b/src/backend/access/brin/brin_validate.c
@@ -0,0 +1,281 @@
+/*-------------------------------------------------------------------------
+ *
+ * brin_validate.c
+ *	  Opclass validator for BRIN.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/brin/brin_validate.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amvalidate.h"
+#include "access/brin_internal.h"
+#include "access/htup_details.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/regproc.h"
+#include "utils/syscache.h"
+
+/*
+ * Validator for a BRIN opclass.
+ *
+ * Some of the checks done here cover the whole opfamily, and therefore are
+ * redundant when checking each opclass in a family.  But they don't run long
+ * enough to be much of a problem, so we accept the duplication rather than
+ * complicate the amvalidate API.
+ */
+bool
+brinvalidate(Oid opclassoid)
+{
+	bool		result = true;
+	HeapTuple	classtup;
+	Form_pg_opclass classform;
+	Oid			opfamilyoid;
+	Oid			opcintype;
+	char	   *opclassname;
+	HeapTuple	familytup;
+	Form_pg_opfamily familyform;
+	char	   *opfamilyname;
+	CatCList   *proclist,
+			   *oprlist;
+	uint64		allfuncs = 0;
+	uint64		allops = 0;
+	List	   *grouplist;
+	OpFamilyOpFuncGroup *opclassgroup;
+	int			i;
+	ListCell   *lc;
+
+	/* Fetch opclass information */
+	classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
+	if (!HeapTupleIsValid(classtup))
+		elog(ERROR, "cache lookup failed for operator class %u", opclassoid);
+	classform = (Form_pg_opclass) GETSTRUCT(classtup);
+
+	opfamilyoid = classform->opcfamily;
+	opcintype = classform->opcintype;
+	opclassname = NameStr(classform->opcname);
+
+	/* Fetch opfamily information */
+	familytup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfamilyoid));
+	if (!HeapTupleIsValid(familytup))
+		elog(ERROR, "cache lookup failed for operator family %u", opfamilyoid);
+	familyform = (Form_pg_opfamily) GETSTRUCT(familytup);
+
+	opfamilyname = NameStr(familyform->opfname);
+
+	/* Fetch all operators and support functions of the opfamily */
+	oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
+	proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
+
+	/* Check individual support functions */
+	for (i = 0; i < proclist->n_members; i++)
+	{
+		HeapTuple	proctup = &proclist->members[i]->tuple;
+		Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
+		bool		ok;
+
+		/* Check procedure numbers and function signatures */
+		switch (procform->amprocnum)
+		{
+			case BRIN_PROCNUM_OPCINFO:
+				ok = check_amproc_signature(procform->amproc, INTERNALOID, true,
+											1, 1, INTERNALOID);
+				break;
+			case BRIN_PROCNUM_ADDVALUE:
+				ok = check_amproc_signature(procform->amproc, BOOLOID, true,
+											4, 4, INTERNALOID, INTERNALOID,
+											INTERNALOID, INTERNALOID);
+				break;
+			case BRIN_PROCNUM_CONSISTENT:
+				ok = check_amproc_signature(procform->amproc, BOOLOID, true,
+											3, 4, INTERNALOID, INTERNALOID,
+											INTERNALOID, INT4OID);
+				break;
+			case BRIN_PROCNUM_UNION:
+				ok = check_amproc_signature(procform->amproc, BOOLOID, true,
+											3, 3, INTERNALOID, INTERNALOID,
+											INTERNALOID);
+				break;
+			case BRIN_PROCNUM_OPTIONS:
+				ok = check_amoptsproc_signature(procform->amproc);
+				break;
+			default:
+				/* Complain if it's not a valid optional proc number */
+				if (procform->amprocnum < BRIN_FIRST_OPTIONAL_PROCNUM ||
+					procform->amprocnum > BRIN_LAST_OPTIONAL_PROCNUM)
+				{
+					ereport(INFO,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("operator family \"%s\" of access method %s contains function %s with invalid support number %d",
+									opfamilyname, "brin",
+									format_procedure(procform->amproc),
+									procform->amprocnum)));
+					result = false;
+					continue;	/* omit bad proc numbers from allfuncs */
+				}
+				/* Can't check signatures of optional procs, so assume OK */
+				ok = true;
+				break;
+		}
+
+		if (!ok)
+		{
+			ereport(INFO,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("operator family \"%s\" of access method %s contains function %s with wrong signature for support number %d",
+							opfamilyname, "brin",
+							format_procedure(procform->amproc),
+							procform->amprocnum)));
+			result = false;
+		}
+
+		/* Track all valid procedure numbers seen in opfamily */
+		allfuncs |= ((uint64) 1) << procform->amprocnum;
+	}
+
+	/* Check individual operators */
+	for (i = 0; i < oprlist->n_members; i++)
+	{
+		HeapTuple	oprtup = &oprlist->members[i]->tuple;
+		Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
+
+		/* Check that only allowed strategy numbers exist */
+		if (oprform->amopstrategy < 1 || oprform->amopstrategy > 63)
+		{
+			ereport(INFO,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("operator family \"%s\" of access method %s contains operator %s with invalid strategy number %d",
+							opfamilyname, "brin",
+							format_operator(oprform->amopopr),
+							oprform->amopstrategy)));
+			result = false;
+		}
+		else
+		{
+			/*
+			 * The set of operators supplied varies across BRIN opfamilies.
+			 * Our plan is to identify all operator strategy numbers used in
+			 * the opfamily and then complain about datatype combinations that
+			 * are missing any operator(s).  However, consider only numbers
+			 * that appear in some non-cross-type case, since cross-type
+			 * operators may have unique strategies.  (This is not a great
+			 * heuristic, in particular an erroneous number used in a
+			 * cross-type operator will not get noticed; but the core BRIN
+			 * opfamilies are messy enough to make it necessary.)
+			 */
+			if (oprform->amoplefttype == oprform->amoprighttype)
+				allops |= ((uint64) 1) << oprform->amopstrategy;
+		}
+
+		/* brin doesn't support ORDER BY operators */
+		if (oprform->amoppurpose != AMOP_SEARCH ||
+			OidIsValid(oprform->amopsortfamily))
+		{
+			ereport(INFO,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s",
+							opfamilyname, "brin",
+							format_operator(oprform->amopopr))));
+			result = false;
+		}
+
+		/* Check operator signature --- same for all brin strategies */
+		if (!check_amop_signature(oprform->amopopr, BOOLOID,
+								  oprform->amoplefttype,
+								  oprform->amoprighttype))
+		{
+			ereport(INFO,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("operator family \"%s\" of access method %s contains operator %s with wrong signature",
+							opfamilyname, "brin",
+							format_operator(oprform->amopopr))));
+			result = false;
+		}
+	}
+
+	/* Now check for inconsistent groups of operators/functions */
+	grouplist = identify_opfamily_groups(oprlist, proclist);
+	opclassgroup = NULL;
+	foreach(lc, grouplist)
+	{
+		OpFamilyOpFuncGroup *thisgroup = (OpFamilyOpFuncGroup *) lfirst(lc);
+
+		/* Remember the group exactly matching the test opclass */
+		if (thisgroup->lefttype == opcintype &&
+			thisgroup->righttype == opcintype)
+			opclassgroup = thisgroup;
+
+		/*
+		 * Some BRIN opfamilies expect cross-type support functions to exist,
+		 * and some don't.  We don't know exactly which are which, so if we
+		 * find a cross-type operator for which there are no support functions
+		 * at all, let it pass.  (Don't expect that all operators exist for
+		 * such cross-type cases, either.)
+		 */
+		if (thisgroup->functionset == 0 &&
+			thisgroup->lefttype != thisgroup->righttype)
+			continue;
+
+		/*
+		 * Else complain if there seems to be an incomplete set of either
+		 * operators or support functions for this datatype pair.
+		 */
+		if (thisgroup->operatorset != allops)
+		{
+			ereport(INFO,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("operator family \"%s\" of access method %s is missing operator(s) for types %s and %s",
+							opfamilyname, "brin",
+							format_type_be(thisgroup->lefttype),
+							format_type_be(thisgroup->righttype))));
+			result = false;
+		}
+		if (thisgroup->functionset != allfuncs)
+		{
+			ereport(INFO,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("operator family \"%s\" of access method %s is missing support function(s) for types %s and %s",
+							opfamilyname, "brin",
+							format_type_be(thisgroup->lefttype),
+							format_type_be(thisgroup->righttype))));
+			result = false;
+		}
+	}
+
+	/* Check that the originally-named opclass is complete */
+	if (!opclassgroup || opclassgroup->operatorset != allops)
+	{
+		ereport(INFO,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("operator class \"%s\" of access method %s is missing operator(s)",
+						opclassname, "brin")));
+		result = false;
+	}
+	for (i = 1; i <= BRIN_MANDATORY_NPROCS; i++)
+	{
+		if (opclassgroup &&
+			(opclassgroup->functionset & (((int64) 1) << i)) != 0)
+			continue;			/* got it */
+		ereport(INFO,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("operator class \"%s\" of access method %s is missing support function %d",
+						opclassname, "brin", i)));
+		result = false;
+	}
+
+	ReleaseCatCacheList(proclist);
+	ReleaseCatCacheList(oprlist);
+	ReleaseSysCache(familytup);
+	ReleaseSysCache(classtup);
+
+	return result;
+}
diff --git a/src/backend/access/brin/brin_xlog.c b/src/backend/access/brin/brin_xlog.c
new file mode 100644
index 0000000..af69498
--- /dev/null
+++ b/src/backend/access/brin/brin_xlog.c
@@ -0,0 +1,367 @@
+/*
+ * brin_xlog.c
+ *		XLog replay routines for BRIN indexes
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/brin/brin_xlog.c
+ */
+#include "postgres.h"
+
+#include "access/brin_page.h"
+#include "access/brin_pageops.h"
+#include "access/brin_xlog.h"
+#include "access/bufmask.h"
+#include "access/xlogutils.h"
+
+
+/*
+ * xlog replay routines
+ */
+static void
+brin_xlog_createidx(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_brin_createidx *xlrec = (xl_brin_createidx *) XLogRecGetData(record);
+	Buffer		buf;
+	Page		page;
+
+	/* create the index' metapage */
+	buf = XLogInitBufferForRedo(record, 0);
+	Assert(BufferIsValid(buf));
+	page = (Page) BufferGetPage(buf);
+	brin_metapage_init(page, xlrec->pagesPerRange, xlrec->version);
+	PageSetLSN(page, lsn);
+	MarkBufferDirty(buf);
+	UnlockReleaseBuffer(buf);
+}
+
+/*
+ * Common part of an insert or update. Inserts the new tuple and updates the
+ * revmap.
+ */
+static void
+brin_xlog_insert_update(XLogReaderState *record,
+						xl_brin_insert *xlrec)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	Buffer		buffer;
+	BlockNumber regpgno;
+	Page		page;
+	XLogRedoAction action;
+
+	/*
+	 * If we inserted the first and only tuple on the page, re-initialize the
+	 * page from scratch.
+	 */
+	if (XLogRecGetInfo(record) & XLOG_BRIN_INIT_PAGE)
+	{
+		buffer = XLogInitBufferForRedo(record, 0);
+		page = BufferGetPage(buffer);
+		brin_page_init(page, BRIN_PAGETYPE_REGULAR);
+		action = BLK_NEEDS_REDO;
+	}
+	else
+	{
+		action = XLogReadBufferForRedo(record, 0, &buffer);
+	}
+
+	/* need this page's blkno to store in revmap */
+	regpgno = BufferGetBlockNumber(buffer);
+
+	/* insert the index item into the page */
+	if (action == BLK_NEEDS_REDO)
+	{
+		OffsetNumber offnum;
+		BrinTuple  *tuple;
+		Size		tuplen;
+
+		tuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
+
+		Assert(tuple->bt_blkno == xlrec->heapBlk);
+
+		page = (Page) BufferGetPage(buffer);
+		offnum = xlrec->offnum;
+		if (PageGetMaxOffsetNumber(page) + 1 < offnum)
+			elog(PANIC, "brin_xlog_insert_update: invalid max offset number");
+
+		offnum = PageAddItem(page, (Item) tuple, tuplen, offnum, true, false);
+		if (offnum == InvalidOffsetNumber)
+			elog(PANIC, "brin_xlog_insert_update: failed to add tuple");
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+
+	/* update the revmap */
+	action = XLogReadBufferForRedo(record, 1, &buffer);
+	if (action == BLK_NEEDS_REDO)
+	{
+		ItemPointerData tid;
+
+		ItemPointerSet(&tid, regpgno, xlrec->offnum);
+		page = (Page) BufferGetPage(buffer);
+
+		brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk,
+								tid);
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+
+	/* XXX no FSM updates here ... */
+}
+
+/*
+ * replay a BRIN index insertion
+ */
+static void
+brin_xlog_insert(XLogReaderState *record)
+{
+	xl_brin_insert *xlrec = (xl_brin_insert *) XLogRecGetData(record);
+
+	brin_xlog_insert_update(record, xlrec);
+}
+
+/*
+ * replay a BRIN index update
+ */
+static void
+brin_xlog_update(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_brin_update *xlrec = (xl_brin_update *) XLogRecGetData(record);
+	Buffer		buffer;
+	XLogRedoAction action;
+
+	/* First remove the old tuple */
+	action = XLogReadBufferForRedo(record, 2, &buffer);
+	if (action == BLK_NEEDS_REDO)
+	{
+		Page		page;
+		OffsetNumber offnum;
+
+		page = (Page) BufferGetPage(buffer);
+
+		offnum = xlrec->oldOffnum;
+
+		PageIndexTupleDeleteNoCompact(page, offnum);
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+
+	/* Then insert the new tuple and update revmap, like in an insertion. */
+	brin_xlog_insert_update(record, &xlrec->insert);
+
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * Update a tuple on a single page.
+ */
+static void
+brin_xlog_samepage_update(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_brin_samepage_update *xlrec;
+	Buffer		buffer;
+	XLogRedoAction action;
+
+	xlrec = (xl_brin_samepage_update *) XLogRecGetData(record);
+	action = XLogReadBufferForRedo(record, 0, &buffer);
+	if (action == BLK_NEEDS_REDO)
+	{
+		Size		tuplen;
+		BrinTuple  *brintuple;
+		Page		page;
+		OffsetNumber offnum;
+
+		brintuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
+
+		page = (Page) BufferGetPage(buffer);
+
+		offnum = xlrec->offnum;
+
+		if (!PageIndexTupleOverwrite(page, offnum, (Item) brintuple, tuplen))
+			elog(PANIC, "brin_xlog_samepage_update: failed to replace tuple");
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+
+	/* XXX no FSM updates here ... */
+}
+
+/*
+ * Replay a revmap page extension
+ */
+static void
+brin_xlog_revmap_extend(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_brin_revmap_extend *xlrec;
+	Buffer		metabuf;
+	Buffer		buf;
+	Page		page;
+	BlockNumber targetBlk;
+	XLogRedoAction action;
+
+	xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record);
+
+	XLogRecGetBlockTag(record, 1, NULL, NULL, &targetBlk);
+	Assert(xlrec->targetBlk == targetBlk);
+
+	/* Update the metapage */
+	action = XLogReadBufferForRedo(record, 0, &metabuf);
+	if (action == BLK_NEEDS_REDO)
+	{
+		Page		metapg;
+		BrinMetaPageData *metadata;
+
+		metapg = BufferGetPage(metabuf);
+		metadata = (BrinMetaPageData *) PageGetContents(metapg);
+
+		Assert(metadata->lastRevmapPage == xlrec->targetBlk - 1);
+		metadata->lastRevmapPage = xlrec->targetBlk;
+
+		PageSetLSN(metapg, lsn);
+
+		/*
+		 * Set pd_lower just past the end of the metadata.  This is essential,
+		 * because without doing so, metadata will be lost if xlog.c
+		 * compresses the page.  (We must do this here because pre-v11
+		 * versions of PG did not set the metapage's pd_lower correctly, so a
+		 * pg_upgraded index might contain the wrong value.)
+		 */
+		((PageHeader) metapg)->pd_lower =
+			((char *) metadata + sizeof(BrinMetaPageData)) - (char *) metapg;
+
+		MarkBufferDirty(metabuf);
+	}
+
+	/*
+	 * Re-init the target block as a revmap page.  There's never a full- page
+	 * image here.
+	 */
+
+	buf = XLogInitBufferForRedo(record, 1);
+	page = (Page) BufferGetPage(buf);
+	brin_page_init(page, BRIN_PAGETYPE_REVMAP);
+
+	PageSetLSN(page, lsn);
+	MarkBufferDirty(buf);
+
+	UnlockReleaseBuffer(buf);
+	if (BufferIsValid(metabuf))
+		UnlockReleaseBuffer(metabuf);
+}
+
+static void
+brin_xlog_desummarize_page(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_brin_desummarize *xlrec;
+	Buffer		buffer;
+	XLogRedoAction action;
+
+	xlrec = (xl_brin_desummarize *) XLogRecGetData(record);
+
+	/* Update the revmap */
+	action = XLogReadBufferForRedo(record, 0, &buffer);
+	if (action == BLK_NEEDS_REDO)
+	{
+		ItemPointerData iptr;
+
+		ItemPointerSetInvalid(&iptr);
+		brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, iptr);
+
+		PageSetLSN(BufferGetPage(buffer), lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+
+	/* remove the leftover entry from the regular page */
+	action = XLogReadBufferForRedo(record, 1, &buffer);
+	if (action == BLK_NEEDS_REDO)
+	{
+		Page		regPg = BufferGetPage(buffer);
+
+		PageIndexTupleDeleteNoCompact(regPg, xlrec->regOffset);
+
+		PageSetLSN(regPg, lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+}
+
+void
+brin_redo(XLogReaderState *record)
+{
+	uint8		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+	switch (info & XLOG_BRIN_OPMASK)
+	{
+		case XLOG_BRIN_CREATE_INDEX:
+			brin_xlog_createidx(record);
+			break;
+		case XLOG_BRIN_INSERT:
+			brin_xlog_insert(record);
+			break;
+		case XLOG_BRIN_UPDATE:
+			brin_xlog_update(record);
+			break;
+		case XLOG_BRIN_SAMEPAGE_UPDATE:
+			brin_xlog_samepage_update(record);
+			break;
+		case XLOG_BRIN_REVMAP_EXTEND:
+			brin_xlog_revmap_extend(record);
+			break;
+		case XLOG_BRIN_DESUMMARIZE:
+			brin_xlog_desummarize_page(record);
+			break;
+		default:
+			elog(PANIC, "brin_redo: unknown op code %u", info);
+	}
+}
+
+/*
+ * Mask a BRIN page before doing consistency checks.
+ */
+void
+brin_mask(char *pagedata, BlockNumber blkno)
+{
+	Page		page = (Page) pagedata;
+	PageHeader	pagehdr = (PageHeader) page;
+
+	mask_page_lsn_and_checksum(page);
+
+	mask_page_hint_bits(page);
+
+	/*
+	 * Regular brin pages contain unused space which needs to be masked.
+	 * Similarly for meta pages, but mask it only if pd_lower appears to have
+	 * been set correctly.
+	 */
+	if (BRIN_IS_REGULAR_PAGE(page) ||
+		(BRIN_IS_META_PAGE(page) && pagehdr->pd_lower > SizeOfPageHeaderData))
+	{
+		mask_unused_space(page);
+	}
+
+	/*
+	 * BRIN_EVACUATE_PAGE is not WAL-logged, since it's of no use in recovery.
+	 * Mask it.  See brin_start_evacuating_page() for details.
+	 */
+	BrinPageFlags(page) &= ~BRIN_EVACUATE_PAGE;
+}