summaryrefslogtreecommitdiffstats
path: root/src/backend/executor/execGrouping.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/executor/execGrouping.c')
-rw-r--r--src/backend/executor/execGrouping.c560
1 files changed, 560 insertions, 0 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
new file mode 100644
index 0000000..c11427a
--- /dev/null
+++ b/src/backend/executor/execGrouping.c
@@ -0,0 +1,560 @@
+/*-------------------------------------------------------------------------
+ *
+ * execGrouping.c
+ * executor utility routines for grouping, hashing, and aggregation
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/executor/execGrouping.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/parallel.h"
+#include "common/hashfn.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+
+static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
+static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
+ const MinimalTuple tuple);
+static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
+ TupleTableSlot *slot,
+ bool *isnew, uint32 hash);
+
+/*
+ * Define parameters for tuple hash table code generation. The interface is
+ * *also* declared in execnodes.h (to generate the types, which are externally
+ * visible).
+ */
+#define SH_PREFIX tuplehash
+#define SH_ELEMENT_TYPE TupleHashEntryData
+#define SH_KEY_TYPE MinimalTuple
+#define SH_KEY firstTuple
+#define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
+#define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
+#define SH_SCOPE extern
+#define SH_STORE_HASH
+#define SH_GET_HASH(tb, a) a->hash
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+
+/*****************************************************************************
+ * Utility routines for grouping tuples together
+ *****************************************************************************/
+
+/*
+ * execTuplesMatchPrepare
+ * Build expression that can be evaluated using ExecQual(), returning
+ * whether an ExprContext's inner/outer tuples are NOT DISTINCT
+ */
+ExprState *
+execTuplesMatchPrepare(TupleDesc desc,
+ int numCols,
+ const AttrNumber *keyColIdx,
+ const Oid *eqOperators,
+ const Oid *collations,
+ PlanState *parent)
+{
+ Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
+ int i;
+ ExprState *expr;
+
+ if (numCols == 0)
+ return NULL;
+
+ /* lookup equality functions */
+ for (i = 0; i < numCols; i++)
+ eqFunctions[i] = get_opcode(eqOperators[i]);
+
+ /* build actual expression */
+ expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
+ numCols, keyColIdx, eqFunctions, collations,
+ parent);
+
+ return expr;
+}
+
+/*
+ * execTuplesHashPrepare
+ * Look up the equality and hashing functions needed for a TupleHashTable.
+ *
+ * This is similar to execTuplesMatchPrepare, but we also need to find the
+ * hash functions associated with the equality operators. *eqFunctions and
+ * *hashFunctions receive the palloc'd result arrays.
+ *
+ * Note: we expect that the given operators are not cross-type comparisons.
+ */
+void
+execTuplesHashPrepare(int numCols,
+ const Oid *eqOperators,
+ Oid **eqFuncOids,
+ FmgrInfo **hashFunctions)
+{
+ int i;
+
+ *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
+ *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
+
+ for (i = 0; i < numCols; i++)
+ {
+ Oid eq_opr = eqOperators[i];
+ Oid eq_function;
+ Oid left_hash_function;
+ Oid right_hash_function;
+
+ eq_function = get_opcode(eq_opr);
+ if (!get_op_hash_functions(eq_opr,
+ &left_hash_function, &right_hash_function))
+ elog(ERROR, "could not find hash function for hash operator %u",
+ eq_opr);
+ /* We're not supporting cross-type cases here */
+ Assert(left_hash_function == right_hash_function);
+ (*eqFuncOids)[i] = eq_function;
+ fmgr_info(right_hash_function, &(*hashFunctions)[i]);
+ }
+}
+
+
+/*****************************************************************************
+ * Utility routines for all-in-memory hash tables
+ *
+ * These routines build hash tables for grouping tuples together (eg, for
+ * hash aggregation). There is one entry for each not-distinct set of tuples
+ * presented.
+ *****************************************************************************/
+
+/*
+ * Construct an empty TupleHashTable
+ *
+ * numCols, keyColIdx: identify the tuple fields to use as lookup key
+ * eqfunctions: equality comparison functions to use
+ * hashfunctions: datatype-specific hashing functions to use
+ * nbuckets: initial estimate of hashtable size
+ * additionalsize: size of data stored in ->additional
+ * metacxt: memory context for long-lived allocation, but not per-entry data
+ * tablecxt: memory context in which to store table entries
+ * tempcxt: short-lived context for evaluation hash and comparison functions
+ *
+ * The function arrays may be made with execTuplesHashPrepare(). Note they
+ * are not cross-type functions, but expect to see the table datatype(s)
+ * on both sides.
+ *
+ * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
+ * storage that will live as long as the hashtable does.
+ */
+TupleHashTable
+BuildTupleHashTableExt(PlanState *parent,
+ TupleDesc inputDesc,
+ int numCols, AttrNumber *keyColIdx,
+ const Oid *eqfuncoids,
+ FmgrInfo *hashfunctions,
+ Oid *collations,
+ long nbuckets, Size additionalsize,
+ MemoryContext metacxt,
+ MemoryContext tablecxt,
+ MemoryContext tempcxt,
+ bool use_variable_hash_iv)
+{
+ TupleHashTable hashtable;
+ Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
+ Size hash_mem_limit;
+ MemoryContext oldcontext;
+ bool allow_jit;
+
+ Assert(nbuckets > 0);
+
+ /* Limit initial table size request to not more than hash_mem */
+ hash_mem_limit = get_hash_memory_limit() / entrysize;
+ if (nbuckets > hash_mem_limit)
+ nbuckets = hash_mem_limit;
+
+ oldcontext = MemoryContextSwitchTo(metacxt);
+
+ hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
+
+ hashtable->numCols = numCols;
+ hashtable->keyColIdx = keyColIdx;
+ hashtable->tab_hash_funcs = hashfunctions;
+ hashtable->tab_collations = collations;
+ hashtable->tablecxt = tablecxt;
+ hashtable->tempcxt = tempcxt;
+ hashtable->entrysize = entrysize;
+ hashtable->tableslot = NULL; /* will be made on first lookup */
+ hashtable->inputslot = NULL;
+ hashtable->in_hash_funcs = NULL;
+ hashtable->cur_eq_func = NULL;
+
+ /*
+ * If parallelism is in use, even if the leader backend is performing the
+ * scan itself, we don't want to create the hashtable exactly the same way
+ * in all workers. As hashtables are iterated over in keyspace-order,
+ * doing so in all processes in the same way is likely to lead to
+ * "unbalanced" hashtables when the table size initially is
+ * underestimated.
+ */
+ if (use_variable_hash_iv)
+ hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
+ else
+ hashtable->hash_iv = 0;
+
+ hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
+
+ /*
+ * We copy the input tuple descriptor just for safety --- we assume all
+ * input tuples will have equivalent descriptors.
+ */
+ hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
+ &TTSOpsMinimalTuple);
+
+ /*
+ * If the old reset interface is used (i.e. BuildTupleHashTable, rather
+ * than BuildTupleHashTableExt), allowing JIT would lead to the generated
+ * functions to a) live longer than the query b) be re-generated each time
+ * the table is being reset. Therefore prevent JIT from being used in that
+ * case, by not providing a parent node (which prevents accessing the
+ * JitContext in the EState).
+ */
+ allow_jit = metacxt != tablecxt;
+
+ /* build comparator for all columns */
+ /* XXX: should we support non-minimal tuples for the inputslot? */
+ hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
+ &TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
+ numCols,
+ keyColIdx, eqfuncoids, collations,
+ allow_jit ? parent : NULL);
+
+ /*
+ * While not pretty, it's ok to not shut down this context, but instead
+ * rely on the containing memory context being reset, as
+ * ExecBuildGroupingEqual() only builds a very simple expression calling
+ * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
+ */
+ hashtable->exprcontext = CreateStandaloneExprContext();
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return hashtable;
+}
+
+/*
+ * BuildTupleHashTable is a backwards-compatibilty wrapper for
+ * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
+ * tablecxt. Note that hashtables created this way cannot be reset leak-free
+ * with ResetTupleHashTable().
+ */
+TupleHashTable
+BuildTupleHashTable(PlanState *parent,
+ TupleDesc inputDesc,
+ int numCols, AttrNumber *keyColIdx,
+ const Oid *eqfuncoids,
+ FmgrInfo *hashfunctions,
+ Oid *collations,
+ long nbuckets, Size additionalsize,
+ MemoryContext tablecxt,
+ MemoryContext tempcxt,
+ bool use_variable_hash_iv)
+{
+ return BuildTupleHashTableExt(parent,
+ inputDesc,
+ numCols, keyColIdx,
+ eqfuncoids,
+ hashfunctions,
+ collations,
+ nbuckets, additionalsize,
+ tablecxt,
+ tablecxt,
+ tempcxt,
+ use_variable_hash_iv);
+}
+
+/*
+ * Reset contents of the hashtable to be empty, preserving all the non-content
+ * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
+ * also be reset, otherwise there will be leaks.
+ */
+void
+ResetTupleHashTable(TupleHashTable hashtable)
+{
+ tuplehash_reset(hashtable->hashtab);
+}
+
+/*
+ * Find or create a hashtable entry for the tuple group containing the
+ * given tuple. The tuple must be the same type as the hashtable entries.
+ *
+ * If isnew is NULL, we do not create new entries; we return NULL if no
+ * match is found.
+ *
+ * If hash is not NULL, we set it to the calculated hash value. This allows
+ * callers access to the hash value even if no entry is returned.
+ *
+ * If isnew isn't NULL, then a new entry is created if no existing entry
+ * matches. On return, *isnew is true if the entry is newly created,
+ * false if it existed already. ->additional_data in the new entry has
+ * been zeroed.
+ */
+TupleHashEntry
+LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
+ bool *isnew, uint32 *hash)
+{
+ TupleHashEntry entry;
+ MemoryContext oldContext;
+ uint32 local_hash;
+
+ /* Need to run the hash functions in short-lived context */
+ oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+ /* set up data needed by hash and match functions */
+ hashtable->inputslot = slot;
+ hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+ hashtable->cur_eq_func = hashtable->tab_eq_func;
+
+ local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
+ entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
+
+ if (hash != NULL)
+ *hash = local_hash;
+
+ Assert(entry == NULL || entry->hash == local_hash);
+
+ MemoryContextSwitchTo(oldContext);
+
+ return entry;
+}
+
+/*
+ * Compute the hash value for a tuple
+ */
+uint32
+TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
+{
+ MemoryContext oldContext;
+ uint32 hash;
+
+ hashtable->inputslot = slot;
+ hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+
+ /* Need to run the hash functions in short-lived context */
+ oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+ hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
+
+ MemoryContextSwitchTo(oldContext);
+
+ return hash;
+}
+
+/*
+ * A variant of LookupTupleHashEntry for callers that have already computed
+ * the hash value.
+ */
+TupleHashEntry
+LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
+ bool *isnew, uint32 hash)
+{
+ TupleHashEntry entry;
+ MemoryContext oldContext;
+
+ /* Need to run the hash functions in short-lived context */
+ oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+ /* set up data needed by hash and match functions */
+ hashtable->inputslot = slot;
+ hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+ hashtable->cur_eq_func = hashtable->tab_eq_func;
+
+ entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
+ Assert(entry == NULL || entry->hash == hash);
+
+ MemoryContextSwitchTo(oldContext);
+
+ return entry;
+}
+
+/*
+ * Search for a hashtable entry matching the given tuple. No entry is
+ * created if there's not a match. This is similar to the non-creating
+ * case of LookupTupleHashEntry, except that it supports cross-type
+ * comparisons, in which the given tuple is not of the same type as the
+ * table entries. The caller must provide the hash functions to use for
+ * the input tuple, as well as the equality functions, since these may be
+ * different from the table's internal functions.
+ */
+TupleHashEntry
+FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
+ ExprState *eqcomp,
+ FmgrInfo *hashfunctions)
+{
+ TupleHashEntry entry;
+ MemoryContext oldContext;
+ MinimalTuple key;
+
+ /* Need to run the hash functions in short-lived context */
+ oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+ /* Set up data needed by hash and match functions */
+ hashtable->inputslot = slot;
+ hashtable->in_hash_funcs = hashfunctions;
+ hashtable->cur_eq_func = eqcomp;
+
+ /* Search the hash table */
+ key = NULL; /* flag to reference inputslot */
+ entry = tuplehash_lookup(hashtable->hashtab, key);
+ MemoryContextSwitchTo(oldContext);
+
+ return entry;
+}
+
+/*
+ * If tuple is NULL, use the input slot instead. This convention avoids the
+ * need to materialize virtual input tuples unless they actually need to get
+ * copied into the table.
+ *
+ * Also, the caller must select an appropriate memory context for running
+ * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
+ */
+static uint32
+TupleHashTableHash_internal(struct tuplehash_hash *tb,
+ const MinimalTuple tuple)
+{
+ TupleHashTable hashtable = (TupleHashTable) tb->private_data;
+ int numCols = hashtable->numCols;
+ AttrNumber *keyColIdx = hashtable->keyColIdx;
+ uint32 hashkey = hashtable->hash_iv;
+ TupleTableSlot *slot;
+ FmgrInfo *hashfunctions;
+ int i;
+
+ if (tuple == NULL)
+ {
+ /* Process the current input tuple for the table */
+ slot = hashtable->inputslot;
+ hashfunctions = hashtable->in_hash_funcs;
+ }
+ else
+ {
+ /*
+ * Process a tuple already stored in the table.
+ *
+ * (this case never actually occurs due to the way simplehash.h is
+ * used, as the hash-value is stored in the entries)
+ */
+ slot = hashtable->tableslot;
+ ExecStoreMinimalTuple(tuple, slot, false);
+ hashfunctions = hashtable->tab_hash_funcs;
+ }
+
+ for (i = 0; i < numCols; i++)
+ {
+ AttrNumber att = keyColIdx[i];
+ Datum attr;
+ bool isNull;
+
+ /* rotate hashkey left 1 bit at each step */
+ hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+
+ attr = slot_getattr(slot, att, &isNull);
+
+ if (!isNull) /* treat nulls as having hash key 0 */
+ {
+ uint32 hkey;
+
+ hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
+ hashtable->tab_collations[i],
+ attr));
+ hashkey ^= hkey;
+ }
+ }
+
+ /*
+ * The way hashes are combined above, among each other and with the IV,
+ * doesn't lead to good bit perturbation. As the IV's goal is to lead to
+ * achieve that, perform a round of hashing of the combined hash -
+ * resulting in near perfect perturbation.
+ */
+ return murmurhash32(hashkey);
+}
+
+/*
+ * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
+ * so that we can avoid switching the memory context multiple times for
+ * LookupTupleHashEntry.
+ *
+ * NB: This function may or may not change the memory context. Caller is
+ * expected to change it back.
+ */
+static inline TupleHashEntry
+LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
+ bool *isnew, uint32 hash)
+{
+ TupleHashEntryData *entry;
+ bool found;
+ MinimalTuple key;
+
+ key = NULL; /* flag to reference inputslot */
+
+ if (isnew)
+ {
+ entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
+
+ if (found)
+ {
+ /* found pre-existing entry */
+ *isnew = false;
+ }
+ else
+ {
+ /* created new entry */
+ *isnew = true;
+ /* zero caller data */
+ entry->additional = NULL;
+ MemoryContextSwitchTo(hashtable->tablecxt);
+ /* Copy the first tuple into the table context */
+ entry->firstTuple = ExecCopySlotMinimalTuple(slot);
+ }
+ }
+ else
+ {
+ entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
+ }
+
+ return entry;
+}
+
+/*
+ * See whether two tuples (presumably of the same hash value) match
+ */
+static int
+TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
+{
+ TupleTableSlot *slot1;
+ TupleTableSlot *slot2;
+ TupleHashTable hashtable = (TupleHashTable) tb->private_data;
+ ExprContext *econtext = hashtable->exprcontext;
+
+ /*
+ * We assume that simplehash.h will only ever call us with the first
+ * argument being an actual table entry, and the second argument being
+ * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
+ * could be supported too, but is not currently required.
+ */
+ Assert(tuple1 != NULL);
+ slot1 = hashtable->tableslot;
+ ExecStoreMinimalTuple(tuple1, slot1, false);
+ Assert(tuple2 == NULL);
+ slot2 = hashtable->inputslot;
+
+ /* For crosstype comparisons, the inputslot must be first */
+ econtext->ecxt_innertuple = slot2;
+ econtext->ecxt_outertuple = slot1;
+ return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
+}