diff options
Diffstat (limited to 'src/backend/access/brin/brin_bloom.c')
-rw-r--r-- | src/backend/access/brin/brin_bloom.c | 809 |
1 files changed, 809 insertions, 0 deletions
diff --git a/src/backend/access/brin/brin_bloom.c b/src/backend/access/brin/brin_bloom.c new file mode 100644 index 0000000..2c8a20a --- /dev/null +++ b/src/backend/access/brin/brin_bloom.c @@ -0,0 +1,809 @@ +/* + * brin_bloom.c + * Implementation of Bloom opclass for BRIN + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * A BRIN opclass summarizing page range into a bloom filter. + * + * Bloom filters allow efficient testing whether a given page range contains + * a particular value. Therefore, if we summarize each page range into a small + * bloom filter, we can easily (and cheaply) test whether it contains values + * we get later. + * + * The index only supports equality operators, similarly to hash indexes. + * Bloom indexes are however much smaller, and support only bitmap scans. + * + * Note: Don't confuse this with bloom indexes, implemented in a contrib + * module. That extension implements an entirely new AM, building a bloom + * filter on multiple columns in a single row. This opclass works with an + * existing AM (BRIN) and builds bloom filter on a column. + * + * + * values vs. hashes + * ----------------- + * + * The original column values are not used directly, but are first hashed + * using the regular type-specific hash function, producing a uint32 hash. + * And this hash value is then added to the summary - i.e. it's hashed + * again and added to the bloom filter. + * + * This allows the code to treat all data types (byval/byref/...) the same + * way, with only minimal space requirements, because we're working with + * hashes and not the original values. Everything is uint32. + * + * Of course, this assumes the built-in hash function is reasonably good, + * without too many collisions etc. But that does seem to be the case, at + * least based on past experience. After all, the same hash functions are + * used for hash indexes, hash partitioning and so on. + * + * + * hashing scheme + * -------------- + * + * Bloom filters require a number of independent hash functions. There are + * different schemes how to construct them - for example we might use + * hash_uint32_extended with random seeds, but that seems fairly expensive. + * We use a scheme requiring only two functions described in this paper: + * + * Less Hashing, Same Performance:Building a Better Bloom Filter + * Adam Kirsch, Michael Mitzenmacher†, Harvard School of Engineering and + * Applied Sciences, Cambridge, Massachusetts [DOI 10.1002/rsa.20208] + * + * The two hash functions h1 and h2 are calculated using hard-coded seeds, + * and then combined using (h1 + i * h2) to generate the hash functions. + * + * + * sizing the bloom filter + * ----------------------- + * + * Size of a bloom filter depends on the number of distinct values we will + * store in it, and the desired false positive rate. The higher the number + * of distinct values and/or the lower the false positive rate, the larger + * the bloom filter. On the other hand, we want to keep the index as small + * as possible - that's one of the basic advantages of BRIN indexes. + * + * Although the number of distinct elements (in a page range) depends on + * the data, we can consider it fixed. This simplifies the trade-off to + * just false positive rate vs. size. + * + * At the page range level, false positive rate is a probability the bloom + * filter matches a random value. For the whole index (with sufficiently + * many page ranges) it represents the fraction of the index ranges (and + * thus fraction of the table to be scanned) matching the random value. + * + * Furthermore, the size of the bloom filter is subject to implementation + * limits - it has to fit onto a single index page (8kB by default). As + * the bitmap is inherently random (when "full" about half the bits is set + * to 1, randomly), compression can't help very much. + * + * To reduce the size of a filter (to fit to a page), we have to either + * accept higher false positive rate (undesirable), or reduce the number + * of distinct items to be stored in the filter. We can't alter the input + * data, of course, but we may make the BRIN page ranges smaller - instead + * of the default 128 pages (1MB) we may build index with 16-page ranges, + * or something like that. This should reduce the number of distinct values + * in the page range, making the filter smaller (with fixed false positive + * rate). Even for random data sets this should help, as the number of rows + * per heap page is limited (to ~290 with very narrow tables, likely ~20 + * in practice). + * + * Of course, good sizing decisions depend on having the necessary data, + * i.e. number of distinct values in a page range (of a given size) and + * table size (to estimate cost change due to change in false positive + * rate due to having larger index vs. scanning larger indexes). We may + * not have that data - for example when building an index on empty table + * it's not really possible. And for some data we only have estimates for + * the whole table and we can only estimate per-range values (ndistinct). + * + * Another challenge is that while the bloom filter is per-column, it's + * the whole index tuple that has to fit into a page. And for multi-column + * indexes that may include pieces we have no control over (not necessarily + * bloom filters, the other columns may use other BRIN opclasses). So it's + * not entirely clear how to distribute the space between those columns. + * + * The current logic, implemented in brin_bloom_get_ndistinct, attempts to + * make some basic sizing decisions, based on the size of BRIN ranges, and + * the maximum number of rows per range. + * + * + * IDENTIFICATION + * src/backend/access/brin/brin_bloom.c + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/brin.h" +#include "access/brin_internal.h" +#include "access/brin_page.h" +#include "access/brin_tuple.h" +#include "access/hash.h" +#include "access/htup_details.h" +#include "access/reloptions.h" +#include "access/stratnum.h" +#include "catalog/pg_type.h" +#include "catalog/pg_amop.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +#include <math.h> + +#define BloomEqualStrategyNumber 1 + +/* + * Additional SQL level support functions. We only have one, which is + * used to calculate hash of the input value. + * + * Procedure numbers must not use values reserved for BRIN itself; see + * brin_internal.h. + */ +#define BLOOM_MAX_PROCNUMS 1 /* maximum support procs we need */ +#define PROCNUM_HASH 11 /* required */ + +/* + * Subtract this from procnum to obtain index in BloomOpaque arrays + * (Must be equal to minimum of private procnums). + */ +#define PROCNUM_BASE 11 + +/* + * Storage type for BRIN's reloptions. + */ +typedef struct BloomOptions +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + double nDistinctPerRange; /* number of distinct values per range */ + double falsePositiveRate; /* false positive for bloom filter */ +} BloomOptions; + +/* + * The current min value (16) is somewhat arbitrary, but it's based + * on the fact that the filter header is ~20B alone, which is about + * the same as the filter bitmap for 16 distinct items with 1% false + * positive rate. So by allowing lower values we'd not gain much. In + * any case, the min should not be larger than MaxHeapTuplesPerPage + * (~290), which is the theoretical maximum for single-page ranges. + */ +#define BLOOM_MIN_NDISTINCT_PER_RANGE 16 + +/* + * Used to determine number of distinct items, based on the number of rows + * in a page range. The 10% is somewhat similar to what estimate_num_groups + * does, so we use the same factor here. + */ +#define BLOOM_DEFAULT_NDISTINCT_PER_RANGE -0.1 /* 10% of values */ + +/* + * Allowed range and default value for the false positive range. The exact + * values are somewhat arbitrary, but were chosen considering the various + * parameters (size of filter vs. page size, etc.). + * + * The lower the false-positive rate, the more accurate the filter is, but + * it also gets larger - at some point this eliminates the main advantage + * of BRIN indexes, which is the tiny size. At 0.01% the index is about + * 10% of the table (assuming 290 distinct values per 8kB page). + * + * On the other hand, as the false-positive rate increases, larger part of + * the table has to be scanned due to mismatches - at 25% we're probably + * close to sequential scan being cheaper. + */ +#define BLOOM_MIN_FALSE_POSITIVE_RATE 0.0001 /* 0.01% fp rate */ +#define BLOOM_MAX_FALSE_POSITIVE_RATE 0.25 /* 25% fp rate */ +#define BLOOM_DEFAULT_FALSE_POSITIVE_RATE 0.01 /* 1% fp rate */ + +#define BloomGetNDistinctPerRange(opts) \ + ((opts) && (((BloomOptions *) (opts))->nDistinctPerRange != 0) ? \ + (((BloomOptions *) (opts))->nDistinctPerRange) : \ + BLOOM_DEFAULT_NDISTINCT_PER_RANGE) + +#define BloomGetFalsePositiveRate(opts) \ + ((opts) && (((BloomOptions *) (opts))->falsePositiveRate != 0.0) ? \ + (((BloomOptions *) (opts))->falsePositiveRate) : \ + BLOOM_DEFAULT_FALSE_POSITIVE_RATE) + +/* + * And estimate of the largest bloom we can fit onto a page. This is not + * a perfect guarantee, for a couple of reasons. For example, the row may + * be larger because the index has multiple columns. + */ +#define BloomMaxFilterSize \ + MAXALIGN_DOWN(BLCKSZ - \ + (MAXALIGN(SizeOfPageHeaderData + \ + sizeof(ItemIdData)) + \ + MAXALIGN(sizeof(BrinSpecialSpace)) + \ + SizeOfBrinTuple)) + +/* + * Seeds used to calculate two hash functions h1 and h2, which are then used + * to generate k hashes using the (h1 + i * h2) scheme. + */ +#define BLOOM_SEED_1 0x71d924af +#define BLOOM_SEED_2 0xba48b314 + +/* + * Bloom Filter + * + * Represents a bloom filter, built on hashes of the indexed values. That is, + * we compute a uint32 hash of the value, and then store this hash into the + * bloom filter (and compute additional hashes on it). + * + * XXX We could implement "sparse" bloom filters, keeping only the bytes that + * are not entirely 0. But while indexes don't support TOAST, the varlena can + * still be compressed. So this seems unnecessary, because the compression + * should do the same job. + * + * XXX We can also watch the number of bits set in the bloom filter, and then + * stop using it (and not store the bitmap, to save space) when the false + * positive rate gets too high. But even if the false positive rate exceeds the + * desired value, it still can eliminate some page ranges. + */ +typedef struct BloomFilter +{ + /* varlena header (do not touch directly!) */ + int32 vl_len_; + + /* space for various flags (unused for now) */ + uint16 flags; + + /* fields for the HASHED phase */ + uint8 nhashes; /* number of hash functions */ + uint32 nbits; /* number of bits in the bitmap (size) */ + uint32 nbits_set; /* number of bits set to 1 */ + + /* data of the bloom filter */ + char data[FLEXIBLE_ARRAY_MEMBER]; + +} BloomFilter; + + +/* + * bloom_init + * Initialize the Bloom Filter, allocate all the memory. + * + * The filter is initialized with optimal size for ndistinct expected values + * and the requested false positive rate. The filter is stored as varlena. + */ +static BloomFilter * +bloom_init(int ndistinct, double false_positive_rate) +{ + Size len; + BloomFilter *filter; + + int nbits; /* size of filter / number of bits */ + int nbytes; /* size of filter / number of bytes */ + + double k; /* number of hash functions */ + + Assert(ndistinct > 0); + Assert((false_positive_rate >= BLOOM_MIN_FALSE_POSITIVE_RATE) && + (false_positive_rate < BLOOM_MAX_FALSE_POSITIVE_RATE)); + + /* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */ + nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2)); + + /* round m to whole bytes */ + nbytes = ((nbits + 7) / 8); + nbits = nbytes * 8; + + /* + * Reject filters that are obviously too large to store on a page. + * + * Initially the bloom filter is just zeroes and so very compressible, but + * as we add values it gets more and more random, and so less and less + * compressible. So initially everything fits on the page, but we might + * get surprising failures later - we want to prevent that, so we reject + * bloom filter that are obviously too large. + * + * XXX It's not uncommon to oversize the bloom filter a bit, to defend + * against unexpected data anomalies (parts of table with more distinct + * values per range etc.). But we still need to make sure even the + * oversized filter fits on page, if such need arises. + * + * XXX This check is not perfect, because the index may have multiple + * filters that are small individually, but too large when combined. + */ + if (nbytes > BloomMaxFilterSize) + elog(ERROR, "the bloom filter is too large (%d > %zu)", nbytes, + BloomMaxFilterSize); + + /* + * round(log(2.0) * m / ndistinct), but assume round() may not be + * available on Windows + */ + k = log(2.0) * nbits / ndistinct; + k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k); + + /* + * We allocate the whole filter. Most of it is going to be 0 bits, so the + * varlena is easy to compress. + */ + len = offsetof(BloomFilter, data) + nbytes; + + filter = (BloomFilter *) palloc0(len); + + filter->flags = 0; + filter->nhashes = (int) k; + filter->nbits = nbits; + + SET_VARSIZE(filter, len); + + return filter; +} + + +/* + * bloom_add_value + * Add value to the bloom filter. + */ +static BloomFilter * +bloom_add_value(BloomFilter *filter, uint32 value, bool *updated) +{ + int i; + uint64 h1, + h2; + + /* compute the hashes, used for the bloom filter */ + h1 = hash_bytes_uint32_extended(value, BLOOM_SEED_1) % filter->nbits; + h2 = hash_bytes_uint32_extended(value, BLOOM_SEED_2) % filter->nbits; + + /* compute the requested number of hashes */ + for (i = 0; i < filter->nhashes; i++) + { + /* h1 + h2 + f(i) */ + uint32 h = (h1 + i * h2) % filter->nbits; + uint32 byte = (h / 8); + uint32 bit = (h % 8); + + /* if the bit is not set, set it and remember we did that */ + if (!(filter->data[byte] & (0x01 << bit))) + { + filter->data[byte] |= (0x01 << bit); + filter->nbits_set++; + if (updated) + *updated = true; + } + } + + return filter; +} + + +/* + * bloom_contains_value + * Check if the bloom filter contains a particular value. + */ +static bool +bloom_contains_value(BloomFilter *filter, uint32 value) +{ + int i; + uint64 h1, + h2; + + /* calculate the two hashes */ + h1 = hash_bytes_uint32_extended(value, BLOOM_SEED_1) % filter->nbits; + h2 = hash_bytes_uint32_extended(value, BLOOM_SEED_2) % filter->nbits; + + /* compute the requested number of hashes */ + for (i = 0; i < filter->nhashes; i++) + { + /* h1 + h2 + f(i) */ + uint32 h = (h1 + i * h2) % filter->nbits; + uint32 byte = (h / 8); + uint32 bit = (h % 8); + + /* if the bit is not set, the value is not there */ + if (!(filter->data[byte] & (0x01 << bit))) + return false; + } + + /* all hashes found in bloom filter */ + return true; +} + +typedef struct BloomOpaque +{ + /* + * XXX At this point we only need a single proc (to compute the hash), but + * let's keep the array just like inclusion and minmax opclasses, for + * consistency. We may need additional procs in the future. + */ + FmgrInfo extra_procinfos[BLOOM_MAX_PROCNUMS]; + bool extra_proc_missing[BLOOM_MAX_PROCNUMS]; +} BloomOpaque; + +static FmgrInfo *bloom_get_procinfo(BrinDesc *bdesc, uint16 attno, + uint16 procnum); + + +Datum +brin_bloom_opcinfo(PG_FUNCTION_ARGS) +{ + BrinOpcInfo *result; + + /* + * opaque->strategy_procinfos is initialized lazily; here it is set to + * all-uninitialized by palloc0 which sets fn_oid to InvalidOid. + * + * bloom indexes only store the filter as a single BYTEA column + */ + + result = palloc0(MAXALIGN(SizeofBrinOpcInfo(1)) + + sizeof(BloomOpaque)); + result->oi_nstored = 1; + result->oi_regular_nulls = true; + result->oi_opaque = (BloomOpaque *) + MAXALIGN((char *) result + SizeofBrinOpcInfo(1)); + result->oi_typcache[0] = lookup_type_cache(PG_BRIN_BLOOM_SUMMARYOID, 0); + + PG_RETURN_POINTER(result); +} + +/* + * brin_bloom_get_ndistinct + * Determine the ndistinct value used to size bloom filter. + * + * Adjust the ndistinct value based on the pagesPerRange value. First, + * if it's negative, it's assumed to be relative to maximum number of + * tuples in the range (assuming each page gets MaxHeapTuplesPerPage + * tuples, which is likely a significant over-estimate). We also clamp + * the value, not to over-size the bloom filter unnecessarily. + * + * XXX We can only do this when the pagesPerRange value was supplied. + * If it wasn't, it has to be a read-only access to the index, in which + * case we don't really care. But perhaps we should fall-back to the + * default pagesPerRange value? + * + * XXX We might also fetch info about ndistinct estimate for the column, + * and compute the expected number of distinct values in a range. But + * that may be tricky due to data being sorted in various ways, so it + * seems better to rely on the upper estimate. + * + * XXX We might also calculate a better estimate of rows per BRIN range, + * instead of using MaxHeapTuplesPerPage (which probably produces values + * much higher than reality). + */ +static int +brin_bloom_get_ndistinct(BrinDesc *bdesc, BloomOptions *opts) +{ + double ndistinct; + double maxtuples; + BlockNumber pagesPerRange; + + pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index); + ndistinct = BloomGetNDistinctPerRange(opts); + + Assert(BlockNumberIsValid(pagesPerRange)); + + maxtuples = MaxHeapTuplesPerPage * pagesPerRange; + + /* + * Similarly to n_distinct, negative values are relative - in this case to + * maximum number of tuples in the page range (maxtuples). + */ + if (ndistinct < 0) + ndistinct = (-ndistinct) * maxtuples; + + /* + * Positive values are to be used directly, but we still apply a couple of + * safeties to avoid using unreasonably small bloom filters. + */ + ndistinct = Max(ndistinct, BLOOM_MIN_NDISTINCT_PER_RANGE); + + /* + * And don't use more than the maximum possible number of tuples, in the + * range, which would be entirely wasteful. + */ + ndistinct = Min(ndistinct, maxtuples); + + return (int) ndistinct; +} + +/* + * Examine the given index tuple (which contains partial status of a certain + * page range) by comparing it to the given value that comes from another heap + * tuple. If the new value is outside the bloom filter specified by the + * existing tuple values, update the index tuple and return true. Otherwise, + * return false and do not modify in this case. + */ +Datum +brin_bloom_add_value(PG_FUNCTION_ARGS) +{ + BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); + BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1); + Datum newval = PG_GETARG_DATUM(2); + bool isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3); + BloomOptions *opts = (BloomOptions *) PG_GET_OPCLASS_OPTIONS(); + Oid colloid = PG_GET_COLLATION(); + FmgrInfo *hashFn; + uint32 hashValue; + bool updated = false; + AttrNumber attno; + BloomFilter *filter; + + Assert(!isnull); + + attno = column->bv_attno; + + /* + * If this is the first non-null value, we need to initialize the bloom + * filter. Otherwise just extract the existing bloom filter from + * BrinValues. + */ + if (column->bv_allnulls) + { + filter = bloom_init(brin_bloom_get_ndistinct(bdesc, opts), + BloomGetFalsePositiveRate(opts)); + column->bv_values[0] = PointerGetDatum(filter); + column->bv_allnulls = false; + updated = true; + } + else + filter = (BloomFilter *) PG_DETOAST_DATUM(column->bv_values[0]); + + /* + * Compute the hash of the new value, using the supplied hash function, + * and then add the hash value to the bloom filter. + */ + hashFn = bloom_get_procinfo(bdesc, attno, PROCNUM_HASH); + + hashValue = DatumGetUInt32(FunctionCall1Coll(hashFn, colloid, newval)); + + filter = bloom_add_value(filter, hashValue, &updated); + + column->bv_values[0] = PointerGetDatum(filter); + + PG_RETURN_BOOL(updated); +} + +/* + * Given an index tuple corresponding to a certain page range and a scan key, + * return whether the scan key is consistent with the index tuple's bloom + * filter. Return true if so, false otherwise. + */ +Datum +brin_bloom_consistent(PG_FUNCTION_ARGS) +{ + BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); + BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1); + ScanKey *keys = (ScanKey *) PG_GETARG_POINTER(2); + int nkeys = PG_GETARG_INT32(3); + Oid colloid = PG_GET_COLLATION(); + AttrNumber attno; + Datum value; + Datum matches; + FmgrInfo *finfo; + uint32 hashValue; + BloomFilter *filter; + int keyno; + + filter = (BloomFilter *) PG_DETOAST_DATUM(column->bv_values[0]); + + Assert(filter); + + matches = true; + + for (keyno = 0; keyno < nkeys; keyno++) + { + ScanKey key = keys[keyno]; + + /* NULL keys are handled and filtered-out in bringetbitmap */ + Assert(!(key->sk_flags & SK_ISNULL)); + + attno = key->sk_attno; + value = key->sk_argument; + + switch (key->sk_strategy) + { + case BloomEqualStrategyNumber: + + /* + * In the equality case (WHERE col = someval), we want to + * return the current page range if the minimum value in the + * range <= scan key, and the maximum value >= scan key. + */ + finfo = bloom_get_procinfo(bdesc, attno, PROCNUM_HASH); + + hashValue = DatumGetUInt32(FunctionCall1Coll(finfo, colloid, value)); + matches &= bloom_contains_value(filter, hashValue); + + break; + default: + /* shouldn't happen */ + elog(ERROR, "invalid strategy number %d", key->sk_strategy); + matches = 0; + break; + } + + if (!matches) + break; + } + + PG_RETURN_DATUM(matches); +} + +/* + * Given two BrinValues, update the first of them as a union of the summary + * values contained in both. The second one is untouched. + * + * XXX We assume the bloom filters have the same parameters for now. In the + * future we should have 'can union' function, to decide if we can combine + * two particular bloom filters. + */ +Datum +brin_bloom_union(PG_FUNCTION_ARGS) +{ + int i; + int nbytes; + BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1); + BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2); + BloomFilter *filter_a; + BloomFilter *filter_b; + + Assert(col_a->bv_attno == col_b->bv_attno); + Assert(!col_a->bv_allnulls && !col_b->bv_allnulls); + + filter_a = (BloomFilter *) PG_DETOAST_DATUM(col_a->bv_values[0]); + filter_b = (BloomFilter *) PG_DETOAST_DATUM(col_b->bv_values[0]); + + /* make sure the filters use the same parameters */ + Assert(filter_a && filter_b); + Assert(filter_a->nbits == filter_b->nbits); + Assert(filter_a->nhashes == filter_b->nhashes); + Assert((filter_a->nbits > 0) && (filter_a->nbits % 8 == 0)); + + nbytes = (filter_a->nbits) / 8; + + /* simply OR the bitmaps */ + for (i = 0; i < nbytes; i++) + filter_a->data[i] |= filter_b->data[i]; + + PG_RETURN_VOID(); +} + +/* + * Cache and return inclusion opclass support procedure + * + * Return the procedure corresponding to the given function support number + * or null if it does not exist. + */ +static FmgrInfo * +bloom_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum) +{ + BloomOpaque *opaque; + uint16 basenum = procnum - PROCNUM_BASE; + + /* + * We cache these in the opaque struct, to avoid repetitive syscache + * lookups. + */ + opaque = (BloomOpaque *) bdesc->bd_info[attno - 1]->oi_opaque; + + /* + * If we already searched for this proc and didn't find it, don't bother + * searching again. + */ + if (opaque->extra_proc_missing[basenum]) + return NULL; + + if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid) + { + if (RegProcedureIsValid(index_getprocid(bdesc->bd_index, attno, + procnum))) + { + fmgr_info_copy(&opaque->extra_procinfos[basenum], + index_getprocinfo(bdesc->bd_index, attno, procnum), + bdesc->bd_context); + } + else + { + opaque->extra_proc_missing[basenum] = true; + return NULL; + } + } + + return &opaque->extra_procinfos[basenum]; +} + +Datum +brin_bloom_options(PG_FUNCTION_ARGS) +{ + local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0); + + init_local_reloptions(relopts, sizeof(BloomOptions)); + + add_local_real_reloption(relopts, "n_distinct_per_range", + "number of distinct items expected in a BRIN page range", + BLOOM_DEFAULT_NDISTINCT_PER_RANGE, + -1.0, INT_MAX, offsetof(BloomOptions, nDistinctPerRange)); + + add_local_real_reloption(relopts, "false_positive_rate", + "desired false-positive rate for the bloom filters", + BLOOM_DEFAULT_FALSE_POSITIVE_RATE, + BLOOM_MIN_FALSE_POSITIVE_RATE, + BLOOM_MAX_FALSE_POSITIVE_RATE, + offsetof(BloomOptions, falsePositiveRate)); + + PG_RETURN_VOID(); +} + +/* + * brin_bloom_summary_in + * - input routine for type brin_bloom_summary. + * + * brin_bloom_summary is only used internally to represent summaries + * in BRIN bloom indexes, so it has no operations of its own, and we + * disallow input too. + */ +Datum +brin_bloom_summary_in(PG_FUNCTION_ARGS) +{ + /* + * brin_bloom_summary stores the data in binary form and parsing text + * input is not needed, so disallow this. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_brin_bloom_summary"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + + +/* + * brin_bloom_summary_out + * - output routine for type brin_bloom_summary. + * + * BRIN bloom summaries are serialized into a bytea value, but we want + * to output something nicer humans can understand. + */ +Datum +brin_bloom_summary_out(PG_FUNCTION_ARGS) +{ + BloomFilter *filter; + StringInfoData str; + + /* detoast the data to get value with a full 4B header */ + filter = (BloomFilter *) PG_DETOAST_DATUM(PG_GETARG_BYTEA_PP(0)); + + initStringInfo(&str); + appendStringInfoChar(&str, '{'); + + appendStringInfo(&str, "mode: hashed nhashes: %u nbits: %u nbits_set: %u", + filter->nhashes, filter->nbits, filter->nbits_set); + + appendStringInfoChar(&str, '}'); + + PG_RETURN_CSTRING(str.data); +} + +/* + * brin_bloom_summary_recv + * - binary input routine for type brin_bloom_summary. + */ +Datum +brin_bloom_summary_recv(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_brin_bloom_summary"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * brin_bloom_summary_send + * - binary output routine for type brin_bloom_summary. + * + * BRIN bloom summaries are serialized in a bytea value (although the + * type is named differently), so let's just send that. + */ +Datum +brin_bloom_summary_send(PG_FUNCTION_ARGS) +{ + return byteasend(fcinfo); +} |