summaryrefslogtreecommitdiffstats
path: root/contrib/bloom
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--contrib/bloom/.gitignore4
-rw-r--r--contrib/bloom/Makefile30
-rw-r--r--contrib/bloom/blcost.c44
-rw-r--r--contrib/bloom/blinsert.c366
-rw-r--r--contrib/bloom/bloom--1.0.sql25
-rw-r--r--contrib/bloom/bloom.control5
-rw-r--r--contrib/bloom/bloom.h216
-rw-r--r--contrib/bloom/blscan.c172
-rw-r--r--contrib/bloom/blutils.c497
-rw-r--r--contrib/bloom/blvacuum.c217
-rw-r--r--contrib/bloom/blvalidate.c225
-rw-r--r--contrib/bloom/expected/bloom.out230
-rw-r--r--contrib/bloom/sql/bloom.sql95
-rw-r--r--contrib/bloom/t/001_wal.pl93
14 files changed, 2219 insertions, 0 deletions
diff --git a/contrib/bloom/.gitignore b/contrib/bloom/.gitignore
new file mode 100644
index 0000000..5dcb3ff
--- /dev/null
+++ b/contrib/bloom/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/bloom/Makefile b/contrib/bloom/Makefile
new file mode 100644
index 0000000..8a781e4
--- /dev/null
+++ b/contrib/bloom/Makefile
@@ -0,0 +1,30 @@
+# contrib/bloom/Makefile
+
+MODULE_big = bloom
+OBJS = \
+ $(WIN32RES) \
+ blcost.o \
+ blinsert.o \
+ blscan.o \
+ blutils.o \
+ blvacuum.o \
+ blvalidate.o
+
+EXTENSION = bloom
+DATA = bloom--1.0.sql
+PGFILEDESC = "bloom access method - signature file based index"
+
+REGRESS = bloom
+
+TAP_TESTS = 1
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/bloom
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/bloom/blcost.c b/contrib/bloom/blcost.c
new file mode 100644
index 0000000..4af1fc9
--- /dev/null
+++ b/contrib/bloom/blcost.c
@@ -0,0 +1,44 @@
+/*-------------------------------------------------------------------------
+ *
+ * blcost.c
+ * Cost estimate function for bloom indexes.
+ *
+ * Copyright (c) 2016-2021, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blcost.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "bloom.h"
+#include "fmgr.h"
+#include "utils/selfuncs.h"
+
+/*
+ * Estimate cost of bloom index scan.
+ */
+void
+blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
+{
+ IndexOptInfo *index = path->indexinfo;
+ GenericCosts costs;
+
+ MemSet(&costs, 0, sizeof(costs));
+
+ /* We have to visit all index tuples anyway */
+ costs.numIndexTuples = index->tuples;
+
+ /* Use generic estimate */
+ genericcostestimate(root, path, loop_count, &costs);
+
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
+}
diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c
new file mode 100644
index 0000000..c34a640
--- /dev/null
+++ b/contrib/bloom/blinsert.c
@@ -0,0 +1,366 @@
+/*-------------------------------------------------------------------------
+ *
+ * blinsert.c
+ * Bloom index build and insert functions.
+ *
+ * Copyright (c) 2016-2021, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blinsert.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/generic_xlog.h"
+#include "access/tableam.h"
+#include "bloom.h"
+#include "catalog/index.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "storage/smgr.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+PG_MODULE_MAGIC;
+
+/*
+ * State of bloom index build. We accumulate one page data here before
+ * flushing it to buffer manager.
+ */
+typedef struct
+{
+ BloomState blstate; /* bloom index state */
+ int64 indtuples; /* total number of tuples indexed */
+ MemoryContext tmpCtx; /* temporary memory context reset after each
+ * tuple */
+ PGAlignedBlock data; /* cached page */
+ int count; /* number of tuples in cached page */
+} BloomBuildState;
+
+/*
+ * Flush page cached in BloomBuildState.
+ */
+static void
+flushCachedPage(Relation index, BloomBuildState *buildstate)
+{
+ Page page;
+ Buffer buffer = BloomNewBuffer(index);
+ GenericXLogState *state;
+
+ state = GenericXLogStart(index);
+ page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE);
+ memcpy(page, buildstate->data.data, BLCKSZ);
+ GenericXLogFinish(state);
+ UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * (Re)initialize cached page in BloomBuildState.
+ */
+static void
+initCachedPage(BloomBuildState *buildstate)
+{
+ BloomInitPage(buildstate->data.data, 0);
+ buildstate->count = 0;
+}
+
+/*
+ * Per-tuple callback for table_index_build_scan.
+ */
+static void
+bloomBuildCallback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *state)
+{
+ BloomBuildState *buildstate = (BloomBuildState *) state;
+ MemoryContext oldCtx;
+ BloomTuple *itup;
+
+ oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
+
+ itup = BloomFormTuple(&buildstate->blstate, tid, values, isnull);
+
+ /* Try to add next item to cached page */
+ if (BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup))
+ {
+ /* Next item was added successfully */
+ buildstate->count++;
+ }
+ else
+ {
+ /* Cached page is full, flush it out and make a new one */
+ flushCachedPage(index, buildstate);
+
+ CHECK_FOR_INTERRUPTS();
+
+ initCachedPage(buildstate);
+
+ if (!BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup))
+ {
+ /* We shouldn't be here since we're inserting to the empty page */
+ elog(ERROR, "could not add new bloom tuple to empty page");
+ }
+
+ /* Next item was added successfully */
+ buildstate->count++;
+ }
+
+ /* Update total tuple count */
+ buildstate->indtuples += 1;
+
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextReset(buildstate->tmpCtx);
+}
+
+/*
+ * Build a new bloom index.
+ */
+IndexBuildResult *
+blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
+{
+ IndexBuildResult *result;
+ double reltuples;
+ BloomBuildState buildstate;
+
+ if (RelationGetNumberOfBlocks(index) != 0)
+ elog(ERROR, "index \"%s\" already contains data",
+ RelationGetRelationName(index));
+
+ /* Initialize the meta page */
+ BloomInitMetapage(index);
+
+ /* Initialize the bloom build state */
+ memset(&buildstate, 0, sizeof(buildstate));
+ initBloomState(&buildstate.blstate, index);
+ buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
+ "Bloom build temporary context",
+ ALLOCSET_DEFAULT_SIZES);
+ initCachedPage(&buildstate);
+
+ /* Do the heap scan */
+ reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
+ bloomBuildCallback, (void *) &buildstate,
+ NULL);
+
+ /* Flush last page if needed (it will be, unless heap was empty) */
+ if (buildstate.count > 0)
+ flushCachedPage(index, &buildstate);
+
+ MemoryContextDelete(buildstate.tmpCtx);
+
+ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+ result->heap_tuples = reltuples;
+ result->index_tuples = buildstate.indtuples;
+
+ return result;
+}
+
+/*
+ * Build an empty bloom index in the initialization fork.
+ */
+void
+blbuildempty(Relation index)
+{
+ Page metapage;
+
+ /* Construct metapage. */
+ metapage = (Page) palloc(BLCKSZ);
+ BloomFillMetapage(index, metapage);
+
+ /*
+ * Write the page and log it. It might seem that an immediate sync would
+ * be sufficient to guarantee that the file exists on disk, but recovery
+ * itself might remove it while replaying, for example, an
+ * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
+ * this even when wal_level=minimal.
+ */
+ PageSetChecksumInplace(metapage, BLOOM_METAPAGE_BLKNO);
+ smgrwrite(index->rd_smgr, INIT_FORKNUM, BLOOM_METAPAGE_BLKNO,
+ (char *) metapage, true);
+ log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
+ BLOOM_METAPAGE_BLKNO, metapage, true);
+
+ /*
+ * An immediate sync is required even if we xlog'd the page, because the
+ * write did not go through shared_buffers and therefore a concurrent
+ * checkpoint may have moved the redo pointer past our xlog record.
+ */
+ smgrimmedsync(index->rd_smgr, INIT_FORKNUM);
+}
+
+/*
+ * Insert new tuple to the bloom index.
+ */
+bool
+blinsert(Relation index, Datum *values, bool *isnull,
+ ItemPointer ht_ctid, Relation heapRel,
+ IndexUniqueCheck checkUnique,
+ bool indexUnchanged,
+ IndexInfo *indexInfo)
+{
+ BloomState blstate;
+ BloomTuple *itup;
+ MemoryContext oldCtx;
+ MemoryContext insertCtx;
+ BloomMetaPageData *metaData;
+ Buffer buffer,
+ metaBuffer;
+ Page page,
+ metaPage;
+ BlockNumber blkno = InvalidBlockNumber;
+ OffsetNumber nStart;
+ GenericXLogState *state;
+
+ insertCtx = AllocSetContextCreate(CurrentMemoryContext,
+ "Bloom insert temporary context",
+ ALLOCSET_DEFAULT_SIZES);
+
+ oldCtx = MemoryContextSwitchTo(insertCtx);
+
+ initBloomState(&blstate, index);
+ itup = BloomFormTuple(&blstate, ht_ctid, values, isnull);
+
+ /*
+ * At first, try to insert new tuple to the first page in notFullPage
+ * array. If successful, we don't need to modify the meta page.
+ */
+ metaBuffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
+ LockBuffer(metaBuffer, BUFFER_LOCK_SHARE);
+ metaData = BloomPageGetMeta(BufferGetPage(metaBuffer));
+
+ if (metaData->nEnd > metaData->nStart)
+ {
+ Page page;
+
+ blkno = metaData->notFullPage[metaData->nStart];
+ Assert(blkno != InvalidBlockNumber);
+
+ /* Don't hold metabuffer lock while doing insert */
+ LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
+
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ state = GenericXLogStart(index);
+ page = GenericXLogRegisterBuffer(state, buffer, 0);
+
+ /*
+ * We might have found a page that was recently deleted by VACUUM. If
+ * so, we can reuse it, but we must reinitialize it.
+ */
+ if (PageIsNew(page) || BloomPageIsDeleted(page))
+ BloomInitPage(page, 0);
+
+ if (BloomPageAddItem(&blstate, page, itup))
+ {
+ /* Success! Apply the change, clean up, and exit */
+ GenericXLogFinish(state);
+ UnlockReleaseBuffer(buffer);
+ ReleaseBuffer(metaBuffer);
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextDelete(insertCtx);
+ return false;
+ }
+
+ /* Didn't fit, must try other pages */
+ GenericXLogAbort(state);
+ UnlockReleaseBuffer(buffer);
+ }
+ else
+ {
+ /* No entries in notFullPage */
+ LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
+ }
+
+ /*
+ * Try other pages in notFullPage array. We will have to change nStart in
+ * metapage. Thus, grab exclusive lock on metapage.
+ */
+ LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /* nStart might have changed while we didn't have lock */
+ nStart = metaData->nStart;
+
+ /* Skip first page if we already tried it above */
+ if (nStart < metaData->nEnd &&
+ blkno == metaData->notFullPage[nStart])
+ nStart++;
+
+ /*
+ * This loop iterates for each page we try from the notFullPage array, and
+ * will also initialize a GenericXLogState for the fallback case of having
+ * to allocate a new page.
+ */
+ for (;;)
+ {
+ state = GenericXLogStart(index);
+
+ /* get modifiable copy of metapage */
+ metaPage = GenericXLogRegisterBuffer(state, metaBuffer, 0);
+ metaData = BloomPageGetMeta(metaPage);
+
+ if (nStart >= metaData->nEnd)
+ break; /* no more entries in notFullPage array */
+
+ blkno = metaData->notFullPage[nStart];
+ Assert(blkno != InvalidBlockNumber);
+
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ page = GenericXLogRegisterBuffer(state, buffer, 0);
+
+ /* Basically same logic as above */
+ if (PageIsNew(page) || BloomPageIsDeleted(page))
+ BloomInitPage(page, 0);
+
+ if (BloomPageAddItem(&blstate, page, itup))
+ {
+ /* Success! Apply the changes, clean up, and exit */
+ metaData->nStart = nStart;
+ GenericXLogFinish(state);
+ UnlockReleaseBuffer(buffer);
+ UnlockReleaseBuffer(metaBuffer);
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextDelete(insertCtx);
+ return false;
+ }
+
+ /* Didn't fit, must try other pages */
+ GenericXLogAbort(state);
+ UnlockReleaseBuffer(buffer);
+ nStart++;
+ }
+
+ /*
+ * Didn't find place to insert in notFullPage array. Allocate new page.
+ * (XXX is it good to do this while holding ex-lock on the metapage??)
+ */
+ buffer = BloomNewBuffer(index);
+
+ page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE);
+ BloomInitPage(page, 0);
+
+ if (!BloomPageAddItem(&blstate, page, itup))
+ {
+ /* We shouldn't be here since we're inserting to an empty page */
+ elog(ERROR, "could not add new bloom tuple to empty page");
+ }
+
+ /* Reset notFullPage array to contain just this new page */
+ metaData->nStart = 0;
+ metaData->nEnd = 1;
+ metaData->notFullPage[0] = BufferGetBlockNumber(buffer);
+
+ /* Apply the changes, clean up, and exit */
+ GenericXLogFinish(state);
+
+ UnlockReleaseBuffer(buffer);
+ UnlockReleaseBuffer(metaBuffer);
+
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextDelete(insertCtx);
+
+ return false;
+}
diff --git a/contrib/bloom/bloom--1.0.sql b/contrib/bloom/bloom--1.0.sql
new file mode 100644
index 0000000..4e7c922
--- /dev/null
+++ b/contrib/bloom/bloom--1.0.sql
@@ -0,0 +1,25 @@
+/* contrib/bloom/bloom--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION bloom" to load this file. \quit
+
+CREATE FUNCTION blhandler(internal)
+RETURNS index_am_handler
+AS 'MODULE_PATHNAME'
+LANGUAGE C;
+
+-- Access method
+CREATE ACCESS METHOD bloom TYPE INDEX HANDLER blhandler;
+COMMENT ON ACCESS METHOD bloom IS 'bloom index access method';
+
+-- Opclasses
+
+CREATE OPERATOR CLASS int4_ops
+DEFAULT FOR TYPE int4 USING bloom AS
+ OPERATOR 1 =(int4, int4),
+ FUNCTION 1 hashint4(int4);
+
+CREATE OPERATOR CLASS text_ops
+DEFAULT FOR TYPE text USING bloom AS
+ OPERATOR 1 =(text, text),
+ FUNCTION 1 hashtext(text);
diff --git a/contrib/bloom/bloom.control b/contrib/bloom/bloom.control
new file mode 100644
index 0000000..4d4124b
--- /dev/null
+++ b/contrib/bloom/bloom.control
@@ -0,0 +1,5 @@
+# bloom extension
+comment = 'bloom access method - signature file based index'
+default_version = '1.0'
+module_pathname = '$libdir/bloom'
+relocatable = true
diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
new file mode 100644
index 0000000..a22a6df
--- /dev/null
+++ b/contrib/bloom/bloom.h
@@ -0,0 +1,216 @@
+/*-------------------------------------------------------------------------
+ *
+ * bloom.h
+ * Header for bloom index.
+ *
+ * Copyright (c) 2016-2021, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/bloom.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _BLOOM_H_
+#define _BLOOM_H_
+
+#include "access/amapi.h"
+#include "access/generic_xlog.h"
+#include "access/itup.h"
+#include "access/xlog.h"
+#include "fmgr.h"
+#include "nodes/pathnodes.h"
+
+/* Support procedures numbers */
+#define BLOOM_HASH_PROC 1
+#define BLOOM_OPTIONS_PROC 2
+#define BLOOM_NPROC 2
+
+/* Scan strategies */
+#define BLOOM_EQUAL_STRATEGY 1
+#define BLOOM_NSTRATEGIES 1
+
+/* Opaque for bloom pages */
+typedef struct BloomPageOpaqueData
+{
+ OffsetNumber maxoff; /* number of index tuples on page */
+ uint16 flags; /* see bit definitions below */
+ uint16 unused; /* placeholder to force maxaligning of size of
+ * BloomPageOpaqueData and to place
+ * bloom_page_id exactly at the end of page */
+ uint16 bloom_page_id; /* for identification of BLOOM indexes */
+} BloomPageOpaqueData;
+
+typedef BloomPageOpaqueData *BloomPageOpaque;
+
+/* Bloom page flags */
+#define BLOOM_META (1<<0)
+#define BLOOM_DELETED (2<<0)
+
+/*
+ * The page ID is for the convenience of pg_filedump and similar utilities,
+ * which otherwise would have a hard time telling pages of different index
+ * types apart. It should be the last 2 bytes on the page. This is more or
+ * less "free" due to alignment considerations.
+ *
+ * See comments above GinPageOpaqueData.
+ */
+#define BLOOM_PAGE_ID 0xFF83
+
+/* Macros for accessing bloom page structures */
+#define BloomPageGetOpaque(page) ((BloomPageOpaque) PageGetSpecialPointer(page))
+#define BloomPageGetMaxOffset(page) (BloomPageGetOpaque(page)->maxoff)
+#define BloomPageIsMeta(page) \
+ ((BloomPageGetOpaque(page)->flags & BLOOM_META) != 0)
+#define BloomPageIsDeleted(page) \
+ ((BloomPageGetOpaque(page)->flags & BLOOM_DELETED) != 0)
+#define BloomPageSetDeleted(page) \
+ (BloomPageGetOpaque(page)->flags |= BLOOM_DELETED)
+#define BloomPageSetNonDeleted(page) \
+ (BloomPageGetOpaque(page)->flags &= ~BLOOM_DELETED)
+#define BloomPageGetData(page) ((BloomTuple *)PageGetContents(page))
+#define BloomPageGetTuple(state, page, offset) \
+ ((BloomTuple *)(PageGetContents(page) \
+ + (state)->sizeOfBloomTuple * ((offset) - 1)))
+#define BloomPageGetNextTuple(state, tuple) \
+ ((BloomTuple *)((Pointer)(tuple) + (state)->sizeOfBloomTuple))
+
+/* Preserved page numbers */
+#define BLOOM_METAPAGE_BLKNO (0)
+#define BLOOM_HEAD_BLKNO (1) /* first data page */
+
+/*
+ * We store Bloom signatures as arrays of uint16 words.
+ */
+typedef uint16 BloomSignatureWord;
+
+#define SIGNWORDBITS ((int) (BITS_PER_BYTE * sizeof(BloomSignatureWord)))
+
+/*
+ * Default and maximum Bloom signature length in bits.
+ */
+#define DEFAULT_BLOOM_LENGTH (5 * SIGNWORDBITS)
+#define MAX_BLOOM_LENGTH (256 * SIGNWORDBITS)
+
+/*
+ * Default and maximum signature bits generated per index key.
+ */
+#define DEFAULT_BLOOM_BITS 2
+#define MAX_BLOOM_BITS (MAX_BLOOM_LENGTH - 1)
+
+/* Bloom index options */
+typedef struct BloomOptions
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ int bloomLength; /* length of signature in words (not bits!) */
+ int bitSize[INDEX_MAX_KEYS]; /* # of bits generated for each
+ * index key */
+} BloomOptions;
+
+/*
+ * FreeBlockNumberArray - array of block numbers sized so that metadata fill
+ * all space in metapage.
+ */
+typedef BlockNumber FreeBlockNumberArray[
+ MAXALIGN_DOWN(
+ BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(BloomPageOpaqueData))
+ - MAXALIGN(sizeof(uint16) * 2 + sizeof(uint32) + sizeof(BloomOptions))
+ ) / sizeof(BlockNumber)
+];
+
+/* Metadata of bloom index */
+typedef struct BloomMetaPageData
+{
+ uint32 magickNumber;
+ uint16 nStart;
+ uint16 nEnd;
+ BloomOptions opts;
+ FreeBlockNumberArray notFullPage;
+} BloomMetaPageData;
+
+/* Magic number to distinguish bloom pages among anothers */
+#define BLOOM_MAGICK_NUMBER (0xDBAC0DED)
+
+/* Number of blocks numbers fit in BloomMetaPageData */
+#define BloomMetaBlockN (sizeof(FreeBlockNumberArray) / sizeof(BlockNumber))
+
+#define BloomPageGetMeta(page) ((BloomMetaPageData *) PageGetContents(page))
+
+typedef struct BloomState
+{
+ FmgrInfo hashFn[INDEX_MAX_KEYS];
+ Oid collations[INDEX_MAX_KEYS];
+ BloomOptions opts; /* copy of options on index's metapage */
+ int32 nColumns;
+
+ /*
+ * sizeOfBloomTuple is index-specific, and it depends on reloptions, so
+ * precompute it
+ */
+ Size sizeOfBloomTuple;
+} BloomState;
+
+#define BloomPageGetFreeSpace(state, page) \
+ (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
+ - BloomPageGetMaxOffset(page) * (state)->sizeOfBloomTuple \
+ - MAXALIGN(sizeof(BloomPageOpaqueData)))
+
+/*
+ * Tuples are very different from all other relations
+ */
+typedef struct BloomTuple
+{
+ ItemPointerData heapPtr;
+ BloomSignatureWord sign[FLEXIBLE_ARRAY_MEMBER];
+} BloomTuple;
+
+#define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
+
+/* Opaque data structure for bloom index scan */
+typedef struct BloomScanOpaqueData
+{
+ BloomSignatureWord *sign; /* Scan signature */
+ BloomState state;
+} BloomScanOpaqueData;
+
+typedef BloomScanOpaqueData *BloomScanOpaque;
+
+/* blutils.c */
+extern void _PG_init(void);
+extern void initBloomState(BloomState *state, Relation index);
+extern void BloomFillMetapage(Relation index, Page metaPage);
+extern void BloomInitMetapage(Relation index);
+extern void BloomInitPage(Page page, uint16 flags);
+extern Buffer BloomNewBuffer(Relation index);
+extern void signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno);
+extern BloomTuple *BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull);
+extern bool BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple);
+
+/* blvalidate.c */
+extern bool blvalidate(Oid opclassoid);
+
+/* index access method interface functions */
+extern bool blinsert(Relation index, Datum *values, bool *isnull,
+ ItemPointer ht_ctid, Relation heapRel,
+ IndexUniqueCheck checkUnique,
+ bool indexUnchanged,
+ struct IndexInfo *indexInfo);
+extern IndexScanDesc blbeginscan(Relation r, int nkeys, int norderbys);
+extern int64 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
+extern void blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+ ScanKey orderbys, int norderbys);
+extern void blendscan(IndexScanDesc scan);
+extern IndexBuildResult *blbuild(Relation heap, Relation index,
+ struct IndexInfo *indexInfo);
+extern void blbuildempty(Relation index);
+extern IndexBulkDeleteResult *blbulkdelete(IndexVacuumInfo *info,
+ IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback,
+ void *callback_state);
+extern IndexBulkDeleteResult *blvacuumcleanup(IndexVacuumInfo *info,
+ IndexBulkDeleteResult *stats);
+extern bytea *bloptions(Datum reloptions, bool validate);
+extern void blcostestimate(PlannerInfo *root, IndexPath *path,
+ double loop_count, Cost *indexStartupCost,
+ Cost *indexTotalCost, Selectivity *indexSelectivity,
+ double *indexCorrelation, double *indexPages);
+
+#endif
diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c
new file mode 100644
index 0000000..6ae3710
--- /dev/null
+++ b/contrib/bloom/blscan.c
@@ -0,0 +1,172 @@
+/*-------------------------------------------------------------------------
+ *
+ * blscan.c
+ * Bloom index scan functions.
+ *
+ * Copyright (c) 2016-2021, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blscan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "bloom.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+/*
+ * Begin scan of bloom index.
+ */
+IndexScanDesc
+blbeginscan(Relation r, int nkeys, int norderbys)
+{
+ IndexScanDesc scan;
+ BloomScanOpaque so;
+
+ scan = RelationGetIndexScan(r, nkeys, norderbys);
+
+ so = (BloomScanOpaque) palloc(sizeof(BloomScanOpaqueData));
+ initBloomState(&so->state, scan->indexRelation);
+ so->sign = NULL;
+
+ scan->opaque = so;
+
+ return scan;
+}
+
+/*
+ * Rescan a bloom index.
+ */
+void
+blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+ ScanKey orderbys, int norderbys)
+{
+ BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
+
+ if (so->sign)
+ pfree(so->sign);
+ so->sign = NULL;
+
+ if (scankey && scan->numberOfKeys > 0)
+ {
+ memmove(scan->keyData, scankey,
+ scan->numberOfKeys * sizeof(ScanKeyData));
+ }
+}
+
+/*
+ * End scan of bloom index.
+ */
+void
+blendscan(IndexScanDesc scan)
+{
+ BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
+
+ if (so->sign)
+ pfree(so->sign);
+ so->sign = NULL;
+}
+
+/*
+ * Insert all matching tuples into a bitmap.
+ */
+int64
+blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
+{
+ int64 ntids = 0;
+ BlockNumber blkno = BLOOM_HEAD_BLKNO,
+ npages;
+ int i;
+ BufferAccessStrategy bas;
+ BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
+
+ if (so->sign == NULL)
+ {
+ /* New search: have to calculate search signature */
+ ScanKey skey = scan->keyData;
+
+ so->sign = palloc0(sizeof(BloomSignatureWord) * so->state.opts.bloomLength);
+
+ for (i = 0; i < scan->numberOfKeys; i++)
+ {
+ /*
+ * Assume bloom-indexable operators to be strict, so nothing could
+ * be found for NULL key.
+ */
+ if (skey->sk_flags & SK_ISNULL)
+ {
+ pfree(so->sign);
+ so->sign = NULL;
+ return 0;
+ }
+
+ /* Add next value to the signature */
+ signValue(&so->state, so->sign, skey->sk_argument,
+ skey->sk_attno - 1);
+
+ skey++;
+ }
+ }
+
+ /*
+ * We're going to read the whole index. This is why we use appropriate
+ * buffer access strategy.
+ */
+ bas = GetAccessStrategy(BAS_BULKREAD);
+ npages = RelationGetNumberOfBlocks(scan->indexRelation);
+
+ for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
+ {
+ Buffer buffer;
+ Page page;
+
+ buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM,
+ blkno, RBM_NORMAL, bas);
+
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ page = BufferGetPage(buffer);
+ TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
+
+ if (!PageIsNew(page) && !BloomPageIsDeleted(page))
+ {
+ OffsetNumber offset,
+ maxOffset = BloomPageGetMaxOffset(page);
+
+ for (offset = 1; offset <= maxOffset; offset++)
+ {
+ BloomTuple *itup = BloomPageGetTuple(&so->state, page, offset);
+ bool res = true;
+
+ /* Check index signature with scan signature */
+ for (i = 0; i < so->state.opts.bloomLength; i++)
+ {
+ if ((itup->sign[i] & so->sign[i]) != so->sign[i])
+ {
+ res = false;
+ break;
+ }
+ }
+
+ /* Add matching tuples to bitmap */
+ if (res)
+ {
+ tbm_add_tuples(tbm, &itup->heapPtr, 1, true);
+ ntids++;
+ }
+ }
+ }
+
+ UnlockReleaseBuffer(buffer);
+ CHECK_FOR_INTERRUPTS();
+ }
+ FreeAccessStrategy(bas);
+
+ return ntids;
+}
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
new file mode 100644
index 0000000..754de00
--- /dev/null
+++ b/contrib/bloom/blutils.c
@@ -0,0 +1,497 @@
+/*-------------------------------------------------------------------------
+ *
+ * blutils.c
+ * Bloom index utilities.
+ *
+ * Portions Copyright (c) 2016-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1990-1993, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amapi.h"
+#include "access/generic_xlog.h"
+#include "access/reloptions.h"
+#include "bloom.h"
+#include "catalog/index.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/indexfsm.h"
+#include "storage/lmgr.h"
+#include "utils/memutils.h"
+
+/* Signature dealing macros - note i is assumed to be of type int */
+#define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
+#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
+#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) )
+#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
+
+PG_FUNCTION_INFO_V1(blhandler);
+
+/* Kind of relation options for bloom index */
+static relopt_kind bl_relopt_kind;
+
+/* parse table for fillRelOptions */
+static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
+
+static int32 myRand(void);
+static void mySrand(uint32 seed);
+
+/*
+ * Module initialize function: initialize info about Bloom relation options.
+ *
+ * Note: keep this in sync with makeDefaultBloomOptions().
+ */
+void
+_PG_init(void)
+{
+ int i;
+ char buf[16];
+
+ bl_relopt_kind = add_reloption_kind();
+
+ /* Option for length of signature */
+ add_int_reloption(bl_relopt_kind, "length",
+ "Length of signature in bits",
+ DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH,
+ AccessExclusiveLock);
+ bl_relopt_tab[0].optname = "length";
+ bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
+ bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
+
+ /* Number of bits for each possible index column: col1, col2, ... */
+ for (i = 0; i < INDEX_MAX_KEYS; i++)
+ {
+ snprintf(buf, sizeof(buf), "col%d", i + 1);
+ add_int_reloption(bl_relopt_kind, buf,
+ "Number of bits generated for each index column",
+ DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS,
+ AccessExclusiveLock);
+ bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
+ buf);
+ bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
+ bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[0]) + sizeof(int) * i;
+ }
+}
+
+/*
+ * Construct a default set of Bloom options.
+ */
+static BloomOptions *
+makeDefaultBloomOptions(void)
+{
+ BloomOptions *opts;
+ int i;
+
+ opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
+ /* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */
+ opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
+ for (i = 0; i < INDEX_MAX_KEYS; i++)
+ opts->bitSize[i] = DEFAULT_BLOOM_BITS;
+ SET_VARSIZE(opts, sizeof(BloomOptions));
+ return opts;
+}
+
+/*
+ * Bloom handler function: return IndexAmRoutine with access method parameters
+ * and callbacks.
+ */
+Datum
+blhandler(PG_FUNCTION_ARGS)
+{
+ IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
+
+ amroutine->amstrategies = BLOOM_NSTRATEGIES;
+ amroutine->amsupport = BLOOM_NPROC;
+ amroutine->amoptsprocnum = BLOOM_OPTIONS_PROC;
+ amroutine->amcanorder = false;
+ amroutine->amcanorderbyop = false;
+ amroutine->amcanbackward = false;
+ amroutine->amcanunique = false;
+ amroutine->amcanmulticol = true;
+ amroutine->amoptionalkey = true;
+ amroutine->amsearcharray = false;
+ amroutine->amsearchnulls = false;
+ amroutine->amstorage = false;
+ amroutine->amclusterable = false;
+ amroutine->ampredlocks = false;
+ amroutine->amcanparallel = false;
+ amroutine->amcaninclude = false;
+ amroutine->amusemaintenanceworkmem = false;
+ amroutine->amparallelvacuumoptions =
+ VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_CLEANUP;
+ amroutine->amkeytype = InvalidOid;
+
+ amroutine->ambuild = blbuild;
+ amroutine->ambuildempty = blbuildempty;
+ amroutine->aminsert = blinsert;
+ amroutine->ambulkdelete = blbulkdelete;
+ amroutine->amvacuumcleanup = blvacuumcleanup;
+ amroutine->amcanreturn = NULL;
+ amroutine->amcostestimate = blcostestimate;
+ amroutine->amoptions = bloptions;
+ amroutine->amproperty = NULL;
+ amroutine->ambuildphasename = NULL;
+ amroutine->amvalidate = blvalidate;
+ amroutine->amadjustmembers = NULL;
+ amroutine->ambeginscan = blbeginscan;
+ amroutine->amrescan = blrescan;
+ amroutine->amgettuple = NULL;
+ amroutine->amgetbitmap = blgetbitmap;
+ amroutine->amendscan = blendscan;
+ amroutine->ammarkpos = NULL;
+ amroutine->amrestrpos = NULL;
+ amroutine->amestimateparallelscan = NULL;
+ amroutine->aminitparallelscan = NULL;
+ amroutine->amparallelrescan = NULL;
+
+ PG_RETURN_POINTER(amroutine);
+}
+
+/*
+ * Fill BloomState structure for particular index.
+ */
+void
+initBloomState(BloomState *state, Relation index)
+{
+ int i;
+
+ state->nColumns = index->rd_att->natts;
+
+ /* Initialize hash function for each attribute */
+ for (i = 0; i < index->rd_att->natts; i++)
+ {
+ fmgr_info_copy(&(state->hashFn[i]),
+ index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
+ CurrentMemoryContext);
+ state->collations[i] = index->rd_indcollation[i];
+ }
+
+ /* Initialize amcache if needed with options from metapage */
+ if (!index->rd_amcache)
+ {
+ Buffer buffer;
+ Page page;
+ BloomMetaPageData *meta;
+ BloomOptions *opts;
+
+ opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions));
+
+ buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+ page = BufferGetPage(buffer);
+
+ if (!BloomPageIsMeta(page))
+ elog(ERROR, "Relation is not a bloom index");
+ meta = BloomPageGetMeta(BufferGetPage(buffer));
+
+ if (meta->magickNumber != BLOOM_MAGICK_NUMBER)
+ elog(ERROR, "Relation is not a bloom index");
+
+ *opts = meta->opts;
+
+ UnlockReleaseBuffer(buffer);
+
+ index->rd_amcache = (void *) opts;
+ }
+
+ memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
+ state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
+ sizeof(BloomSignatureWord) * state->opts.bloomLength;
+}
+
+/*
+ * Random generator copied from FreeBSD. Using own random generator here for
+ * two reasons:
+ *
+ * 1) In this case random numbers are used for on-disk storage. Usage of
+ * PostgreSQL number generator would obstruct it from all possible changes.
+ * 2) Changing seed of PostgreSQL random generator would be undesirable side
+ * effect.
+ */
+static int32 next;
+
+static int32
+myRand(void)
+{
+ /*----------
+ * Compute x = (7^5 * x) mod (2^31 - 1)
+ * without overflowing 31 bits:
+ * (2^31 - 1) = 127773 * (7^5) + 2836
+ * From "Random number generators: good ones are hard to find",
+ * Park and Miller, Communications of the ACM, vol. 31, no. 10,
+ * October 1988, p. 1195.
+ *----------
+ */
+ int32 hi,
+ lo,
+ x;
+
+ /* Must be in [1, 0x7ffffffe] range at this point. */
+ hi = next / 127773;
+ lo = next % 127773;
+ x = 16807 * lo - 2836 * hi;
+ if (x < 0)
+ x += 0x7fffffff;
+ next = x;
+ /* Transform to [0, 0x7ffffffd] range. */
+ return (x - 1);
+}
+
+static void
+mySrand(uint32 seed)
+{
+ next = seed;
+ /* Transform to [1, 0x7ffffffe] range. */
+ next = (next % 0x7ffffffe) + 1;
+}
+
+/*
+ * Add bits of given value to the signature.
+ */
+void
+signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
+{
+ uint32 hashVal;
+ int nBit,
+ j;
+
+ /*
+ * init generator with "column's" number to get "hashed" seed for new
+ * value. We don't want to map the same numbers from different columns
+ * into the same bits!
+ */
+ mySrand(attno);
+
+ /*
+ * Init hash sequence to map our value into bits. the same values in
+ * different columns will be mapped into different bits because of step
+ * above
+ */
+ hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value));
+ mySrand(hashVal ^ myRand());
+
+ for (j = 0; j < state->opts.bitSize[attno]; j++)
+ {
+ /* prevent multiple evaluation in SETBIT macro */
+ nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
+ SETBIT(sign, nBit);
+ }
+}
+
+/*
+ * Make bloom tuple from values.
+ */
+BloomTuple *
+BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull)
+{
+ int i;
+ BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple);
+
+ res->heapPtr = *iptr;
+
+ /* Blooming each column */
+ for (i = 0; i < state->nColumns; i++)
+ {
+ /* skip nulls */
+ if (isnull[i])
+ continue;
+
+ signValue(state, res->sign, values[i], i);
+ }
+
+ return res;
+}
+
+/*
+ * Add new bloom tuple to the page. Returns true if new tuple was successfully
+ * added to the page. Returns false if it doesn't fit on the page.
+ */
+bool
+BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple)
+{
+ BloomTuple *itup;
+ BloomPageOpaque opaque;
+ Pointer ptr;
+
+ /* We shouldn't be pointed to an invalid page */
+ Assert(!PageIsNew(page) && !BloomPageIsDeleted(page));
+
+ /* Does new tuple fit on the page? */
+ if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple)
+ return false;
+
+ /* Copy new tuple to the end of page */
+ opaque = BloomPageGetOpaque(page);
+ itup = BloomPageGetTuple(state, page, opaque->maxoff + 1);
+ memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple);
+
+ /* Adjust maxoff and pd_lower */
+ opaque->maxoff++;
+ ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1);
+ ((PageHeader) page)->pd_lower = ptr - page;
+
+ /* Assert we didn't overrun available space */
+ Assert(((PageHeader) page)->pd_lower <= ((PageHeader) page)->pd_upper);
+
+ return true;
+}
+
+/*
+ * Allocate a new page (either by recycling, or by extending the index file)
+ * The returned buffer is already pinned and exclusive-locked
+ * Caller is responsible for initializing the page by calling BloomInitPage
+ */
+Buffer
+BloomNewBuffer(Relation index)
+{
+ Buffer buffer;
+ bool needLock;
+
+ /* First, try to get a page from FSM */
+ for (;;)
+ {
+ BlockNumber blkno = GetFreeIndexPage(index);
+
+ if (blkno == InvalidBlockNumber)
+ break;
+
+ buffer = ReadBuffer(index, blkno);
+
+ /*
+ * We have to guard against the possibility that someone else already
+ * recycled this page; the buffer may be locked if so.
+ */
+ if (ConditionalLockBuffer(buffer))
+ {
+ Page page = BufferGetPage(buffer);
+
+ if (PageIsNew(page))
+ return buffer; /* OK to use, if never initialized */
+
+ if (BloomPageIsDeleted(page))
+ return buffer; /* OK to use */
+
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ }
+
+ /* Can't use it, so release buffer and try again */
+ ReleaseBuffer(buffer);
+ }
+
+ /* Must extend the file */
+ needLock = !RELATION_IS_LOCAL(index);
+ if (needLock)
+ LockRelationForExtension(index, ExclusiveLock);
+
+ buffer = ReadBuffer(index, P_NEW);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ if (needLock)
+ UnlockRelationForExtension(index, ExclusiveLock);
+
+ return buffer;
+}
+
+/*
+ * Initialize any page of a bloom index.
+ */
+void
+BloomInitPage(Page page, uint16 flags)
+{
+ BloomPageOpaque opaque;
+
+ PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData));
+
+ opaque = BloomPageGetOpaque(page);
+ opaque->flags = flags;
+ opaque->bloom_page_id = BLOOM_PAGE_ID;
+}
+
+/*
+ * Fill in metapage for bloom index.
+ */
+void
+BloomFillMetapage(Relation index, Page metaPage)
+{
+ BloomOptions *opts;
+ BloomMetaPageData *metadata;
+
+ /*
+ * Choose the index's options. If reloptions have been assigned, use
+ * those, otherwise create default options.
+ */
+ opts = (BloomOptions *) index->rd_options;
+ if (!opts)
+ opts = makeDefaultBloomOptions();
+
+ /*
+ * Initialize contents of meta page, including a copy of the options,
+ * which are now frozen for the life of the index.
+ */
+ BloomInitPage(metaPage, BLOOM_META);
+ metadata = BloomPageGetMeta(metaPage);
+ memset(metadata, 0, sizeof(BloomMetaPageData));
+ metadata->magickNumber = BLOOM_MAGICK_NUMBER;
+ metadata->opts = *opts;
+ ((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData);
+
+ /* If this fails, probably FreeBlockNumberArray size calc is wrong: */
+ Assert(((PageHeader) metaPage)->pd_lower <= ((PageHeader) metaPage)->pd_upper);
+}
+
+/*
+ * Initialize metapage for bloom index.
+ */
+void
+BloomInitMetapage(Relation index)
+{
+ Buffer metaBuffer;
+ Page metaPage;
+ GenericXLogState *state;
+
+ /*
+ * Make a new page; since it is first page it should be associated with
+ * block number 0 (BLOOM_METAPAGE_BLKNO).
+ */
+ metaBuffer = BloomNewBuffer(index);
+ Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
+
+ /* Initialize contents of meta page */
+ state = GenericXLogStart(index);
+ metaPage = GenericXLogRegisterBuffer(state, metaBuffer,
+ GENERIC_XLOG_FULL_IMAGE);
+ BloomFillMetapage(index, metaPage);
+ GenericXLogFinish(state);
+
+ UnlockReleaseBuffer(metaBuffer);
+}
+
+/*
+ * Parse reloptions for bloom index, producing a BloomOptions struct.
+ */
+bytea *
+bloptions(Datum reloptions, bool validate)
+{
+ BloomOptions *rdopts;
+
+ /* Parse the user-given reloptions */
+ rdopts = (BloomOptions *) build_reloptions(reloptions, validate,
+ bl_relopt_kind,
+ sizeof(BloomOptions),
+ bl_relopt_tab,
+ lengthof(bl_relopt_tab));
+
+ /* Convert signature length from # of bits to # to words, rounding up */
+ if (rdopts)
+ rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
+
+ return (bytea *) rdopts;
+}
diff --git a/contrib/bloom/blvacuum.c b/contrib/bloom/blvacuum.c
new file mode 100644
index 0000000..88b0a6d
--- /dev/null
+++ b/contrib/bloom/blvacuum.c
@@ -0,0 +1,217 @@
+/*-------------------------------------------------------------------------
+ *
+ * blvacuum.c
+ * Bloom VACUUM functions.
+ *
+ * Copyright (c) 2016-2021, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blvacuum.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "bloom.h"
+#include "catalog/storage.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "postmaster/autovacuum.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "storage/lmgr.h"
+
+
+/*
+ * Bulk deletion of all index entries pointing to a set of heap tuples.
+ * The set of target tuples is specified via a callback routine that tells
+ * whether any given heap tuple (identified by ItemPointer) is being deleted.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
+IndexBulkDeleteResult *
+blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
+ IndexBulkDeleteCallback callback, void *callback_state)
+{
+ Relation index = info->index;
+ BlockNumber blkno,
+ npages;
+ FreeBlockNumberArray notFullPage;
+ int countPage = 0;
+ BloomState state;
+ Buffer buffer;
+ Page page;
+ BloomMetaPageData *metaData;
+ GenericXLogState *gxlogState;
+
+ if (stats == NULL)
+ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+
+ initBloomState(&state, index);
+
+ /*
+ * Iterate over the pages. We don't care about concurrently added pages,
+ * they can't contain tuples to delete.
+ */
+ npages = RelationGetNumberOfBlocks(index);
+ for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
+ {
+ BloomTuple *itup,
+ *itupPtr,
+ *itupEnd;
+
+ vacuum_delay_point();
+
+ buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
+ RBM_NORMAL, info->strategy);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ gxlogState = GenericXLogStart(index);
+ page = GenericXLogRegisterBuffer(gxlogState, buffer, 0);
+
+ /* Ignore empty/deleted pages until blvacuumcleanup() */
+ if (PageIsNew(page) || BloomPageIsDeleted(page))
+ {
+ UnlockReleaseBuffer(buffer);
+ GenericXLogAbort(gxlogState);
+ continue;
+ }
+
+ /*
+ * Iterate over the tuples. itup points to current tuple being
+ * scanned, itupPtr points to where to save next non-deleted tuple.
+ */
+ itup = itupPtr = BloomPageGetTuple(&state, page, FirstOffsetNumber);
+ itupEnd = BloomPageGetTuple(&state, page,
+ OffsetNumberNext(BloomPageGetMaxOffset(page)));
+ while (itup < itupEnd)
+ {
+ /* Do we have to delete this tuple? */
+ if (callback(&itup->heapPtr, callback_state))
+ {
+ /* Yes; adjust count of tuples that will be left on page */
+ BloomPageGetOpaque(page)->maxoff--;
+ stats->tuples_removed += 1;
+ }
+ else
+ {
+ /* No; copy it to itupPtr++, but skip copy if not needed */
+ if (itupPtr != itup)
+ memmove((Pointer) itupPtr, (Pointer) itup,
+ state.sizeOfBloomTuple);
+ itupPtr = BloomPageGetNextTuple(&state, itupPtr);
+ }
+
+ itup = BloomPageGetNextTuple(&state, itup);
+ }
+
+ /* Assert that we counted correctly */
+ Assert(itupPtr == BloomPageGetTuple(&state, page,
+ OffsetNumberNext(BloomPageGetMaxOffset(page))));
+
+ /*
+ * Add page to new notFullPage list if we will not mark page as
+ * deleted and there is free space on it
+ */
+ if (BloomPageGetMaxOffset(page) != 0 &&
+ BloomPageGetFreeSpace(&state, page) >= state.sizeOfBloomTuple &&
+ countPage < BloomMetaBlockN)
+ notFullPage[countPage++] = blkno;
+
+ /* Did we delete something? */
+ if (itupPtr != itup)
+ {
+ /* Is it empty page now? */
+ if (BloomPageGetMaxOffset(page) == 0)
+ BloomPageSetDeleted(page);
+ /* Adjust pd_lower */
+ ((PageHeader) page)->pd_lower = (Pointer) itupPtr - page;
+ /* Finish WAL-logging */
+ GenericXLogFinish(gxlogState);
+ }
+ else
+ {
+ /* Didn't change anything: abort WAL-logging */
+ GenericXLogAbort(gxlogState);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+
+ /*
+ * Update the metapage's notFullPage list with whatever we found. Our
+ * info could already be out of date at this point, but blinsert() will
+ * cope if so.
+ */
+ buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ gxlogState = GenericXLogStart(index);
+ page = GenericXLogRegisterBuffer(gxlogState, buffer, 0);
+
+ metaData = BloomPageGetMeta(page);
+ memcpy(metaData->notFullPage, notFullPage, sizeof(BlockNumber) * countPage);
+ metaData->nStart = 0;
+ metaData->nEnd = countPage;
+
+ GenericXLogFinish(gxlogState);
+ UnlockReleaseBuffer(buffer);
+
+ return stats;
+}
+
+/*
+ * Post-VACUUM cleanup.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
+IndexBulkDeleteResult *
+blvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
+{
+ Relation index = info->index;
+ BlockNumber npages,
+ blkno;
+
+ if (info->analyze_only)
+ return stats;
+
+ if (stats == NULL)
+ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+
+ /*
+ * Iterate over the pages: insert deleted pages into FSM and collect
+ * statistics.
+ */
+ npages = RelationGetNumberOfBlocks(index);
+ stats->num_pages = npages;
+ stats->pages_free = 0;
+ stats->num_index_tuples = 0;
+ for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
+ {
+ Buffer buffer;
+ Page page;
+
+ vacuum_delay_point();
+
+ buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
+ RBM_NORMAL, info->strategy);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ page = (Page) BufferGetPage(buffer);
+
+ if (PageIsNew(page) || BloomPageIsDeleted(page))
+ {
+ RecordFreeIndexPage(index, blkno);
+ stats->pages_free++;
+ }
+ else
+ {
+ stats->num_index_tuples += BloomPageGetMaxOffset(page);
+ }
+
+ UnlockReleaseBuffer(buffer);
+ }
+
+ IndexFreeSpaceMapVacuum(info->index);
+
+ return stats;
+}
diff --git a/contrib/bloom/blvalidate.c b/contrib/bloom/blvalidate.c
new file mode 100644
index 0000000..aa8c87c
--- /dev/null
+++ b/contrib/bloom/blvalidate.c
@@ -0,0 +1,225 @@
+/*-------------------------------------------------------------------------
+ *
+ * blvalidate.c
+ * Opclass validator for bloom.
+ *
+ * Copyright (c) 2016-2021, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blvalidate.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amvalidate.h"
+#include "access/htup_details.h"
+#include "bloom.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/regproc.h"
+#include "utils/syscache.h"
+
+/*
+ * Validator for a bloom opclass.
+ */
+bool
+blvalidate(Oid opclassoid)
+{
+ bool result = true;
+ HeapTuple classtup;
+ Form_pg_opclass classform;
+ Oid opfamilyoid;
+ Oid opcintype;
+ Oid opckeytype;
+ char *opclassname;
+ HeapTuple familytup;
+ Form_pg_opfamily familyform;
+ char *opfamilyname;
+ CatCList *proclist,
+ *oprlist;
+ List *grouplist;
+ OpFamilyOpFuncGroup *opclassgroup;
+ int i;
+ ListCell *lc;
+
+ /* Fetch opclass information */
+ classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
+ if (!HeapTupleIsValid(classtup))
+ elog(ERROR, "cache lookup failed for operator class %u", opclassoid);
+ classform = (Form_pg_opclass) GETSTRUCT(classtup);
+
+ opfamilyoid = classform->opcfamily;
+ opcintype = classform->opcintype;
+ opckeytype = classform->opckeytype;
+ if (!OidIsValid(opckeytype))
+ opckeytype = opcintype;
+ opclassname = NameStr(classform->opcname);
+
+ /* Fetch opfamily information */
+ familytup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfamilyoid));
+ if (!HeapTupleIsValid(familytup))
+ elog(ERROR, "cache lookup failed for operator family %u", opfamilyoid);
+ familyform = (Form_pg_opfamily) GETSTRUCT(familytup);
+
+ opfamilyname = NameStr(familyform->opfname);
+
+ /* Fetch all operators and support functions of the opfamily */
+ oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
+ proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
+
+ /* Check individual support functions */
+ for (i = 0; i < proclist->n_members; i++)
+ {
+ HeapTuple proctup = &proclist->members[i]->tuple;
+ Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
+ bool ok;
+
+ /*
+ * All bloom support functions should be registered with matching
+ * left/right types
+ */
+ if (procform->amproclefttype != procform->amprocrighttype)
+ {
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opfamily %s contains support procedure %s with cross-type registration",
+ opfamilyname,
+ format_procedure(procform->amproc))));
+ result = false;
+ }
+
+ /*
+ * We can't check signatures except within the specific opclass, since
+ * we need to know the associated opckeytype in many cases.
+ */
+ if (procform->amproclefttype != opcintype)
+ continue;
+
+ /* Check procedure numbers and function signatures */
+ switch (procform->amprocnum)
+ {
+ case BLOOM_HASH_PROC:
+ ok = check_amproc_signature(procform->amproc, INT4OID, false,
+ 1, 1, opckeytype);
+ break;
+ case BLOOM_OPTIONS_PROC:
+ ok = check_amoptsproc_signature(procform->amproc);
+ break;
+ default:
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opfamily %s contains function %s with invalid support number %d",
+ opfamilyname,
+ format_procedure(procform->amproc),
+ procform->amprocnum)));
+ result = false;
+ continue; /* don't want additional message */
+ }
+
+ if (!ok)
+ {
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("gist opfamily %s contains function %s with wrong signature for support number %d",
+ opfamilyname,
+ format_procedure(procform->amproc),
+ procform->amprocnum)));
+ result = false;
+ }
+ }
+
+ /* Check individual operators */
+ for (i = 0; i < oprlist->n_members; i++)
+ {
+ HeapTuple oprtup = &oprlist->members[i]->tuple;
+ Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
+
+ /* Check it's allowed strategy for bloom */
+ if (oprform->amopstrategy < 1 ||
+ oprform->amopstrategy > BLOOM_NSTRATEGIES)
+ {
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opfamily %s contains operator %s with invalid strategy number %d",
+ opfamilyname,
+ format_operator(oprform->amopopr),
+ oprform->amopstrategy)));
+ result = false;
+ }
+
+ /* bloom doesn't support ORDER BY operators */
+ if (oprform->amoppurpose != AMOP_SEARCH ||
+ OidIsValid(oprform->amopsortfamily))
+ {
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opfamily %s contains invalid ORDER BY specification for operator %s",
+ opfamilyname,
+ format_operator(oprform->amopopr))));
+ result = false;
+ }
+
+ /* Check operator signature --- same for all bloom strategies */
+ if (!check_amop_signature(oprform->amopopr, BOOLOID,
+ oprform->amoplefttype,
+ oprform->amoprighttype))
+ {
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opfamily %s contains operator %s with wrong signature",
+ opfamilyname,
+ format_operator(oprform->amopopr))));
+ result = false;
+ }
+ }
+
+ /* Now check for inconsistent groups of operators/functions */
+ grouplist = identify_opfamily_groups(oprlist, proclist);
+ opclassgroup = NULL;
+ foreach(lc, grouplist)
+ {
+ OpFamilyOpFuncGroup *thisgroup = (OpFamilyOpFuncGroup *) lfirst(lc);
+
+ /* Remember the group exactly matching the test opclass */
+ if (thisgroup->lefttype == opcintype &&
+ thisgroup->righttype == opcintype)
+ opclassgroup = thisgroup;
+
+ /*
+ * There is not a lot we can do to check the operator sets, since each
+ * bloom opclass is more or less a law unto itself, and some contain
+ * only operators that are binary-compatible with the opclass datatype
+ * (meaning that empty operator sets can be OK). That case also means
+ * that we shouldn't insist on nonempty function sets except for the
+ * opclass's own group.
+ */
+ }
+
+ /* Check that the originally-named opclass is complete */
+ for (i = 1; i <= BLOOM_NPROC; i++)
+ {
+ if (opclassgroup &&
+ (opclassgroup->functionset & (((uint64) 1) << i)) != 0)
+ continue; /* got it */
+ if (i == BLOOM_OPTIONS_PROC)
+ continue; /* optional method */
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opclass %s is missing support function %d",
+ opclassname, i)));
+ result = false;
+ }
+
+ ReleaseCatCacheList(proclist);
+ ReleaseCatCacheList(oprlist);
+ ReleaseSysCache(familytup);
+ ReleaseSysCache(classtup);
+
+ return result;
+}
diff --git a/contrib/bloom/expected/bloom.out b/contrib/bloom/expected/bloom.out
new file mode 100644
index 0000000..dae12a7
--- /dev/null
+++ b/contrib/bloom/expected/bloom.out
@@ -0,0 +1,230 @@
+CREATE EXTENSION bloom;
+CREATE TABLE tst (
+ i int4,
+ t text
+);
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
+CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);
+ALTER INDEX bloomidx SET (length=80);
+SET enable_seqscan=on;
+SET enable_bitmapscan=off;
+SET enable_indexscan=off;
+SELECT count(*) FROM tst WHERE i = 7;
+ count
+-------
+ 200
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count
+-------
+ 112
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count
+-------
+ 13
+(1 row)
+
+SET enable_seqscan=off;
+SET enable_bitmapscan=on;
+SET enable_indexscan=on;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7;
+ QUERY PLAN
+-------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tst
+ Recheck Cond: (i = 7)
+ -> Bitmap Index Scan on bloomidx
+ Index Cond: (i = 7)
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5';
+ QUERY PLAN
+-------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tst
+ Recheck Cond: (t = '5'::text)
+ -> Bitmap Index Scan on bloomidx
+ Index Cond: (t = '5'::text)
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ QUERY PLAN
+---------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tst
+ Recheck Cond: ((i = 7) AND (t = '5'::text))
+ -> Bitmap Index Scan on bloomidx
+ Index Cond: ((i = 7) AND (t = '5'::text))
+(5 rows)
+
+SELECT count(*) FROM tst WHERE i = 7;
+ count
+-------
+ 200
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count
+-------
+ 112
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count
+-------
+ 13
+(1 row)
+
+DELETE FROM tst;
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
+VACUUM ANALYZE tst;
+SELECT count(*) FROM tst WHERE i = 7;
+ count
+-------
+ 200
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count
+-------
+ 112
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count
+-------
+ 13
+(1 row)
+
+DELETE FROM tst WHERE i > 1 OR t = '5';
+VACUUM tst;
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
+SELECT count(*) FROM tst WHERE i = 7;
+ count
+-------
+ 200
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count
+-------
+ 112
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count
+-------
+ 13
+(1 row)
+
+VACUUM FULL tst;
+SELECT count(*) FROM tst WHERE i = 7;
+ count
+-------
+ 200
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count
+-------
+ 112
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count
+-------
+ 13
+(1 row)
+
+-- Try an unlogged table too
+CREATE UNLOGGED TABLE tstu (
+ i int4,
+ t text
+);
+INSERT INTO tstu SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
+CREATE INDEX bloomidxu ON tstu USING bloom (i, t) WITH (col2 = 4);
+SET enable_seqscan=off;
+SET enable_bitmapscan=on;
+SET enable_indexscan=on;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tstu WHERE i = 7;
+ QUERY PLAN
+--------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tstu
+ Recheck Cond: (i = 7)
+ -> Bitmap Index Scan on bloomidxu
+ Index Cond: (i = 7)
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tstu WHERE t = '5';
+ QUERY PLAN
+--------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tstu
+ Recheck Cond: (t = '5'::text)
+ -> Bitmap Index Scan on bloomidxu
+ Index Cond: (t = '5'::text)
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tstu WHERE i = 7 AND t = '5';
+ QUERY PLAN
+---------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tstu
+ Recheck Cond: ((i = 7) AND (t = '5'::text))
+ -> Bitmap Index Scan on bloomidxu
+ Index Cond: ((i = 7) AND (t = '5'::text))
+(5 rows)
+
+SELECT count(*) FROM tstu WHERE i = 7;
+ count
+-------
+ 200
+(1 row)
+
+SELECT count(*) FROM tstu WHERE t = '5';
+ count
+-------
+ 112
+(1 row)
+
+SELECT count(*) FROM tstu WHERE i = 7 AND t = '5';
+ count
+-------
+ 13
+(1 row)
+
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+RESET enable_indexscan;
+-- Run amvalidator function on our opclasses
+SELECT opcname, amvalidate(opc.oid)
+FROM pg_opclass opc JOIN pg_am am ON am.oid = opcmethod
+WHERE amname = 'bloom'
+ORDER BY 1;
+ opcname | amvalidate
+----------+------------
+ int4_ops | t
+ text_ops | t
+(2 rows)
+
+--
+-- relation options
+--
+DROP INDEX bloomidx;
+CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (length=7, col1=4);
+SELECT reloptions FROM pg_class WHERE oid = 'bloomidx'::regclass;
+ reloptions
+-------------------
+ {length=7,col1=4}
+(1 row)
+
+-- check for min and max values
+\set VERBOSITY terse
+CREATE INDEX bloomidx2 ON tst USING bloom (i, t) WITH (length=0);
+ERROR: value 0 out of bounds for option "length"
+CREATE INDEX bloomidx2 ON tst USING bloom (i, t) WITH (col1=0);
+ERROR: value 0 out of bounds for option "col1"
diff --git a/contrib/bloom/sql/bloom.sql b/contrib/bloom/sql/bloom.sql
new file mode 100644
index 0000000..4733e1e
--- /dev/null
+++ b/contrib/bloom/sql/bloom.sql
@@ -0,0 +1,95 @@
+CREATE EXTENSION bloom;
+
+CREATE TABLE tst (
+ i int4,
+ t text
+);
+
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
+CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);
+ALTER INDEX bloomidx SET (length=80);
+
+SET enable_seqscan=on;
+SET enable_bitmapscan=off;
+SET enable_indexscan=off;
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+SET enable_seqscan=off;
+SET enable_bitmapscan=on;
+SET enable_indexscan=on;
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5';
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+DELETE FROM tst;
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
+VACUUM ANALYZE tst;
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+DELETE FROM tst WHERE i > 1 OR t = '5';
+VACUUM tst;
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+VACUUM FULL tst;
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+-- Try an unlogged table too
+
+CREATE UNLOGGED TABLE tstu (
+ i int4,
+ t text
+);
+
+INSERT INTO tstu SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
+CREATE INDEX bloomidxu ON tstu USING bloom (i, t) WITH (col2 = 4);
+
+SET enable_seqscan=off;
+SET enable_bitmapscan=on;
+SET enable_indexscan=on;
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tstu WHERE i = 7;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tstu WHERE t = '5';
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tstu WHERE i = 7 AND t = '5';
+
+SELECT count(*) FROM tstu WHERE i = 7;
+SELECT count(*) FROM tstu WHERE t = '5';
+SELECT count(*) FROM tstu WHERE i = 7 AND t = '5';
+
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+RESET enable_indexscan;
+
+-- Run amvalidator function on our opclasses
+SELECT opcname, amvalidate(opc.oid)
+FROM pg_opclass opc JOIN pg_am am ON am.oid = opcmethod
+WHERE amname = 'bloom'
+ORDER BY 1;
+
+--
+-- relation options
+--
+DROP INDEX bloomidx;
+CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (length=7, col1=4);
+SELECT reloptions FROM pg_class WHERE oid = 'bloomidx'::regclass;
+-- check for min and max values
+\set VERBOSITY terse
+CREATE INDEX bloomidx2 ON tst USING bloom (i, t) WITH (length=0);
+CREATE INDEX bloomidx2 ON tst USING bloom (i, t) WITH (col1=0);
diff --git a/contrib/bloom/t/001_wal.pl b/contrib/bloom/t/001_wal.pl
new file mode 100644
index 0000000..c487c07
--- /dev/null
+++ b/contrib/bloom/t/001_wal.pl
@@ -0,0 +1,93 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+# Test generic xlog record work for bloom index replication.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More;
+
+if (TestLib::has_wal_read_bug)
+{
+ # We'd prefer to use Test::More->builder->todo_start, but the bug causes
+ # this test file to die(), not merely to fail.
+ plan skip_all => 'filesystem bug';
+}
+else
+{
+ plan tests => 31;
+}
+
+my $node_primary;
+my $node_standby;
+
+# Run few queries on both primary and standby and check their results match.
+sub test_index_replay
+{
+ my ($test_name) = @_;
+
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ # Wait for standby to catch up
+ $node_primary->wait_for_catchup($node_standby);
+
+ my $queries = qq(SET enable_seqscan=off;
+SET enable_bitmapscan=on;
+SET enable_indexscan=on;
+SELECT * FROM tst WHERE i = 0;
+SELECT * FROM tst WHERE i = 3;
+SELECT * FROM tst WHERE t = 'b';
+SELECT * FROM tst WHERE t = 'f';
+SELECT * FROM tst WHERE i = 3 AND t = 'c';
+SELECT * FROM tst WHERE i = 7 AND t = 'e';
+);
+
+ # Run test queries and compare their result
+ my $primary_result = $node_primary->safe_psql("postgres", $queries);
+ my $standby_result = $node_standby->safe_psql("postgres", $queries);
+
+ is($primary_result, $standby_result, "$test_name: query result matches");
+ return;
+}
+
+# Initialize primary node
+$node_primary = get_new_node('primary');
+$node_primary->init(allows_streaming => 1);
+$node_primary->start;
+my $backup_name = 'my_backup';
+
+# Take backup
+$node_primary->backup($backup_name);
+
+# Create streaming standby linking to primary
+$node_standby = get_new_node('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby->start;
+
+# Create some bloom index on primary
+$node_primary->safe_psql("postgres", "CREATE EXTENSION bloom;");
+$node_primary->safe_psql("postgres", "CREATE TABLE tst (i int4, t text);");
+$node_primary->safe_psql("postgres",
+ "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;"
+);
+$node_primary->safe_psql("postgres",
+ "CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);");
+
+# Test that queries give same result
+test_index_replay('initial');
+
+# Run 10 cycles of table modification. Run test queries after each modification.
+for my $i (1 .. 10)
+{
+ $node_primary->safe_psql("postgres", "DELETE FROM tst WHERE i = $i;");
+ test_index_replay("delete $i");
+ $node_primary->safe_psql("postgres", "VACUUM tst;");
+ test_index_replay("vacuum $i");
+ my ($start, $end) = (100001 + ($i - 1) * 10000, 100000 + $i * 10000);
+ $node_primary->safe_psql("postgres",
+ "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series($start,$end) i;"
+ );
+ test_index_replay("insert $i");
+}