summaryrefslogtreecommitdiffstats
path: root/src/backend/access/tablesample
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/tablesample')
-rw-r--r--src/backend/access/tablesample/Makefile20
-rw-r--r--src/backend/access/tablesample/bernoulli.c229
-rw-r--r--src/backend/access/tablesample/system.c257
-rw-r--r--src/backend/access/tablesample/tablesample.c40
4 files changed, 546 insertions, 0 deletions
diff --git a/src/backend/access/tablesample/Makefile b/src/backend/access/tablesample/Makefile
new file mode 100644
index 0000000..01641e5
--- /dev/null
+++ b/src/backend/access/tablesample/Makefile
@@ -0,0 +1,20 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for access/tablesample
+#
+# IDENTIFICATION
+# src/backend/access/tablesample/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/access/tablesample
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = \
+ bernoulli.o \
+ system.o \
+ tablesample.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c
new file mode 100644
index 0000000..ae6e4f5
--- /dev/null
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -0,0 +1,229 @@
+/*-------------------------------------------------------------------------
+ *
+ * bernoulli.c
+ * support routines for BERNOULLI tablesample method
+ *
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
+ *
+ * To achieve that, we proceed by hashing each candidate TID together with
+ * the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/access/tablesample/bernoulli.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "optimizer/optimizer.h"
+#include "utils/builtins.h"
+
+
+/* Private state */
+typedef struct
+{
+ uint64 cutoff; /* select tuples with hash less than this */
+ uint32 seed; /* random seed */
+ OffsetNumber lt; /* last tuple returned from current block */
+} BernoulliSamplerData;
+
+
+static void bernoulli_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
+static void bernoulli_initsamplescan(SampleScanState *node,
+ int eflags);
+static void bernoulli_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
+static OffsetNumber bernoulli_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
+
+
+/*
+ * Create a TsmRoutine descriptor for the BERNOULLI method.
+ */
+Datum
+tsm_bernoulli_handler(PG_FUNCTION_ARGS)
+{
+ TsmRoutine *tsm = makeNode(TsmRoutine);
+
+ tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+ tsm->repeatable_across_queries = true;
+ tsm->repeatable_across_scans = true;
+ tsm->SampleScanGetSampleSize = bernoulli_samplescangetsamplesize;
+ tsm->InitSampleScan = bernoulli_initsamplescan;
+ tsm->BeginSampleScan = bernoulli_beginsamplescan;
+ tsm->NextSampleBlock = NULL;
+ tsm->NextSampleTuple = bernoulli_nextsampletuple;
+ tsm->EndSampleScan = NULL;
+
+ PG_RETURN_POINTER(tsm);
+}
+
+/*
+ * Sample size estimation.
+ */
+static void
+bernoulli_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples)
+{
+ Node *pctnode;
+ float4 samplefract;
+
+ /* Try to extract an estimate for the sample percentage */
+ pctnode = (Node *) linitial(paramexprs);
+ pctnode = estimate_expression_value(root, pctnode);
+
+ if (IsA(pctnode, Const) &&
+ !((Const *) pctnode)->constisnull)
+ {
+ samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+ samplefract /= 100.0f;
+ else
+ {
+ /* Default samplefract if the value is bogus */
+ samplefract = 0.1f;
+ }
+ }
+ else
+ {
+ /* Default samplefract if we didn't obtain a non-null Const */
+ samplefract = 0.1f;
+ }
+
+ /* We'll visit all pages of the baserel */
+ *pages = baserel->pages;
+
+ *tuples = clamp_row_est(baserel->tuples * samplefract);
+}
+
+/*
+ * Initialize during executor setup.
+ */
+static void
+bernoulli_initsamplescan(SampleScanState *node, int eflags)
+{
+ node->tsm_state = palloc0(sizeof(BernoulliSamplerData));
+}
+
+/*
+ * Examine parameters and prepare for a sample scan.
+ */
+static void
+bernoulli_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed)
+{
+ BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
+ double percent = DatumGetFloat4(params[0]);
+ double dcutoff;
+
+ if (percent < 0 || percent > 100 || isnan(percent))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg("sample percentage must be between 0 and 100")));
+
+ /*
+ * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+ * store that as a uint64, of course. Note that this gives strictly
+ * correct behavior at the limits of zero or one probability.
+ */
+ dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+ sampler->cutoff = (uint64) dcutoff;
+ sampler->seed = seed;
+ sampler->lt = InvalidOffsetNumber;
+
+ /*
+ * Use bulkread, since we're scanning all pages. But pagemode visibility
+ * checking is a win only at larger sampling fractions. The 25% cutoff
+ * here is based on very limited experimentation.
+ */
+ node->use_bulkread = true;
+ node->use_pagemode = (percent >= 25);
+}
+
+/*
+ * Select next sampled tuple in current block.
+ *
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists). The reason is that we do the coinflip for every tuple
+ * offset in the table. Since all tuples have the same probability of being
+ * returned, it doesn't matter if we do extra coinflips for invisible tuples.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+static OffsetNumber
+bernoulli_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset)
+{
+ BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
+ OffsetNumber tupoffset = sampler->lt;
+ uint32 hashinput[3];
+
+ /* Advance to first/next tuple in block */
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ /*
+ * We compute the hash by applying hash_any to an array of 3 uint32's
+ * containing the block, offset, and seed. This is efficient to set up,
+ * and with the current implementation of hash_any, it gives
+ * machine-independent results, which is a nice property for regression
+ * testing.
+ *
+ * These words in the hash input are the same throughout the block:
+ */
+ hashinput[0] = blockno;
+ hashinput[2] = sampler->seed;
+
+ /*
+ * Loop over tuple offsets until finding suitable TID or reaching end of
+ * block.
+ */
+ for (; tupoffset <= maxoffset; tupoffset++)
+ {
+ uint32 hash;
+
+ hashinput[1] = tupoffset;
+
+ hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+ (int) sizeof(hashinput)));
+ if (hash < sampler->cutoff)
+ break;
+ }
+
+ if (tupoffset > maxoffset)
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
+
+ return tupoffset;
+}
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c
new file mode 100644
index 0000000..b0869e5
--- /dev/null
+++ b/src/backend/access/tablesample/system.c
@@ -0,0 +1,257 @@
+/*-------------------------------------------------------------------------
+ *
+ * system.c
+ * support routines for SYSTEM tablesample method
+ *
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
+ *
+ * To achieve that, we proceed by hashing each candidate block number together
+ * with the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/access/tablesample/system.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/relscan.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "optimizer/optimizer.h"
+#include "utils/builtins.h"
+
+
+/* Private state */
+typedef struct
+{
+ uint64 cutoff; /* select blocks with hash less than this */
+ uint32 seed; /* random seed */
+ BlockNumber nextblock; /* next block to consider sampling */
+ OffsetNumber lt; /* last tuple returned from current block */
+} SystemSamplerData;
+
+
+static void system_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
+static void system_initsamplescan(SampleScanState *node,
+ int eflags);
+static void system_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
+static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
+static OffsetNumber system_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
+
+
+/*
+ * Create a TsmRoutine descriptor for the SYSTEM method.
+ */
+Datum
+tsm_system_handler(PG_FUNCTION_ARGS)
+{
+ TsmRoutine *tsm = makeNode(TsmRoutine);
+
+ tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+ tsm->repeatable_across_queries = true;
+ tsm->repeatable_across_scans = true;
+ tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
+ tsm->InitSampleScan = system_initsamplescan;
+ tsm->BeginSampleScan = system_beginsamplescan;
+ tsm->NextSampleBlock = system_nextsampleblock;
+ tsm->NextSampleTuple = system_nextsampletuple;
+ tsm->EndSampleScan = NULL;
+
+ PG_RETURN_POINTER(tsm);
+}
+
+/*
+ * Sample size estimation.
+ */
+static void
+system_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples)
+{
+ Node *pctnode;
+ float4 samplefract;
+
+ /* Try to extract an estimate for the sample percentage */
+ pctnode = (Node *) linitial(paramexprs);
+ pctnode = estimate_expression_value(root, pctnode);
+
+ if (IsA(pctnode, Const) &&
+ !((Const *) pctnode)->constisnull)
+ {
+ samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+ samplefract /= 100.0f;
+ else
+ {
+ /* Default samplefract if the value is bogus */
+ samplefract = 0.1f;
+ }
+ }
+ else
+ {
+ /* Default samplefract if we didn't obtain a non-null Const */
+ samplefract = 0.1f;
+ }
+
+ /* We'll visit a sample of the pages ... */
+ *pages = clamp_row_est(baserel->pages * samplefract);
+
+ /* ... and hopefully get a representative number of tuples from them */
+ *tuples = clamp_row_est(baserel->tuples * samplefract);
+}
+
+/*
+ * Initialize during executor setup.
+ */
+static void
+system_initsamplescan(SampleScanState *node, int eflags)
+{
+ node->tsm_state = palloc0(sizeof(SystemSamplerData));
+}
+
+/*
+ * Examine parameters and prepare for a sample scan.
+ */
+static void
+system_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed)
+{
+ SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+ double percent = DatumGetFloat4(params[0]);
+ double dcutoff;
+
+ if (percent < 0 || percent > 100 || isnan(percent))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg("sample percentage must be between 0 and 100")));
+
+ /*
+ * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+ * store that as a uint64, of course. Note that this gives strictly
+ * correct behavior at the limits of zero or one probability.
+ */
+ dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+ sampler->cutoff = (uint64) dcutoff;
+ sampler->seed = seed;
+ sampler->nextblock = 0;
+ sampler->lt = InvalidOffsetNumber;
+
+ /*
+ * Bulkread buffer access strategy probably makes sense unless we're
+ * scanning a very small fraction of the table. The 1% cutoff here is a
+ * guess. We should use pagemode visibility checking, since we scan all
+ * tuples on each selected page.
+ */
+ node->use_bulkread = (percent >= 1);
+ node->use_pagemode = true;
+}
+
+/*
+ * Select next block to sample.
+ */
+static BlockNumber
+system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
+{
+ SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+ BlockNumber nextblock = sampler->nextblock;
+ uint32 hashinput[2];
+
+ /*
+ * We compute the hash by applying hash_any to an array of 2 uint32's
+ * containing the block number and seed. This is efficient to set up, and
+ * with the current implementation of hash_any, it gives
+ * machine-independent results, which is a nice property for regression
+ * testing.
+ *
+ * These words in the hash input are the same throughout the block:
+ */
+ hashinput[1] = sampler->seed;
+
+ /*
+ * Loop over block numbers until finding suitable block or reaching end of
+ * relation.
+ */
+ for (; nextblock < nblocks; nextblock++)
+ {
+ uint32 hash;
+
+ hashinput[0] = nextblock;
+
+ hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+ (int) sizeof(hashinput)));
+ if (hash < sampler->cutoff)
+ break;
+ }
+
+ if (nextblock < nblocks)
+ {
+ /* Found a suitable block; remember where we should start next time */
+ sampler->nextblock = nextblock + 1;
+ return nextblock;
+ }
+
+ /* Done, but let's reset nextblock to 0 for safety. */
+ sampler->nextblock = 0;
+ return InvalidBlockNumber;
+}
+
+/*
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists); nodeSamplescan.c will deal with that.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+static OffsetNumber
+system_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset)
+{
+ SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+ OffsetNumber tupoffset = sampler->lt;
+
+ /* Advance to next possible offset on page */
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ /* Done? */
+ if (tupoffset > maxoffset)
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
+
+ return tupoffset;
+}
diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c
new file mode 100644
index 0000000..02f2a95
--- /dev/null
+++ b/src/backend/access/tablesample/tablesample.c
@@ -0,0 +1,40 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablesample.c
+ * Support functions for TABLESAMPLE feature
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/access/tablesample/tablesample.c
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/tsmapi.h"
+
+
+/*
+ * GetTsmRoutine --- get a TsmRoutine struct by invoking the handler.
+ *
+ * This is a convenience routine that's just meant to check for errors.
+ */
+TsmRoutine *
+GetTsmRoutine(Oid tsmhandler)
+{
+ Datum datum;
+ TsmRoutine *routine;
+
+ datum = OidFunctionCall1(tsmhandler, PointerGetDatum(NULL));
+ routine = (TsmRoutine *) DatumGetPointer(datum);
+
+ if (routine == NULL || !IsA(routine, TsmRoutine))
+ elog(ERROR, "tablesample handler function %u did not return a TsmRoutine struct",
+ tsmhandler);
+
+ return routine;
+}