summaryrefslogtreecommitdiffstats
path: root/src/backend/executor/nodeSetOp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/executor/nodeSetOp.c')
-rw-r--r--src/backend/executor/nodeSetOp.c651
1 files changed, 651 insertions, 0 deletions
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
new file mode 100644
index 0000000..aad7ac0
--- /dev/null
+++ b/src/backend/executor/nodeSetOp.c
@@ -0,0 +1,651 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeSetOp.c
+ * Routines to handle INTERSECT and EXCEPT selection
+ *
+ * The input of a SetOp node consists of tuples from two relations,
+ * which have been combined into one dataset, with a junk attribute added
+ * that shows which relation each tuple came from. In SETOP_SORTED mode,
+ * the input has furthermore been sorted according to all the grouping
+ * columns (ie, all the non-junk attributes). The SetOp node scans each
+ * group of identical tuples to determine how many came from each input
+ * relation. Then it is a simple matter to emit the output demanded by the
+ * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
+ *
+ * In SETOP_HASHED mode, the input is delivered in no particular order,
+ * except that we know all the tuples from one input relation will come before
+ * all the tuples of the other. The planner guarantees that the first input
+ * relation is the left-hand one for EXCEPT, and tries to make the smaller
+ * input relation come first for INTERSECT. We build a hash table in memory
+ * with one entry for each group of identical tuples, and count the number of
+ * tuples in the group from each relation. After seeing all the input, we
+ * scan the hashtable and generate the correct output using those counts.
+ * We can avoid making hashtable entries for any tuples appearing only in the
+ * second input relation, since they cannot result in any output.
+ *
+ * This node type is not used for UNION or UNION ALL, since those can be
+ * implemented more cheaply (there's no need for the junk attribute to
+ * identify the source relation).
+ *
+ * Note that SetOp does no qual checking nor projection. The delivered
+ * output tuples are just copies of the first-to-arrive tuple in each
+ * input group.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/executor/nodeSetOp.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "executor/executor.h"
+#include "executor/nodeSetOp.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+
+
+/*
+ * SetOpStatePerGroupData - per-group working state
+ *
+ * These values are working state that is initialized at the start of
+ * an input tuple group and updated for each input tuple.
+ *
+ * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
+ * the plan state node. In SETOP_HASHED mode, the hash table contains one
+ * of these for each tuple group.
+ */
+typedef struct SetOpStatePerGroupData
+{
+ long numLeft; /* number of left-input dups in group */
+ long numRight; /* number of right-input dups in group */
+} SetOpStatePerGroupData;
+
+
+static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
+static void setop_fill_hash_table(SetOpState *setopstate);
+static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
+
+
+/*
+ * Initialize state for a new group of input values.
+ */
+static inline void
+initialize_counts(SetOpStatePerGroup pergroup)
+{
+ pergroup->numLeft = pergroup->numRight = 0;
+}
+
+/*
+ * Advance the appropriate counter for one input tuple.
+ */
+static inline void
+advance_counts(SetOpStatePerGroup pergroup, int flag)
+{
+ if (flag)
+ pergroup->numRight++;
+ else
+ pergroup->numLeft++;
+}
+
+/*
+ * Fetch the "flag" column from an input tuple.
+ * This is an integer column with value 0 for left side, 1 for right side.
+ */
+static int
+fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
+{
+ SetOp *node = (SetOp *) setopstate->ps.plan;
+ int flag;
+ bool isNull;
+
+ flag = DatumGetInt32(slot_getattr(inputslot,
+ node->flagColIdx,
+ &isNull));
+ Assert(!isNull);
+ Assert(flag == 0 || flag == 1);
+ return flag;
+}
+
+/*
+ * Initialize the hash table to empty.
+ */
+static void
+build_hash_table(SetOpState *setopstate)
+{
+ SetOp *node = (SetOp *) setopstate->ps.plan;
+ ExprContext *econtext = setopstate->ps.ps_ExprContext;
+ TupleDesc desc = ExecGetResultType(outerPlanState(setopstate));
+
+ Assert(node->strategy == SETOP_HASHED);
+ Assert(node->numGroups > 0);
+
+ setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
+ desc,
+ node->numCols,
+ node->dupColIdx,
+ setopstate->eqfuncoids,
+ setopstate->hashfunctions,
+ node->dupCollations,
+ node->numGroups,
+ 0,
+ setopstate->ps.state->es_query_cxt,
+ setopstate->tableContext,
+ econtext->ecxt_per_tuple_memory,
+ false);
+}
+
+/*
+ * We've completed processing a tuple group. Decide how many copies (if any)
+ * of its representative row to emit, and store the count into numOutput.
+ * This logic is straight from the SQL92 specification.
+ */
+static void
+set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
+{
+ SetOp *plannode = (SetOp *) setopstate->ps.plan;
+
+ switch (plannode->cmd)
+ {
+ case SETOPCMD_INTERSECT:
+ if (pergroup->numLeft > 0 && pergroup->numRight > 0)
+ setopstate->numOutput = 1;
+ else
+ setopstate->numOutput = 0;
+ break;
+ case SETOPCMD_INTERSECT_ALL:
+ setopstate->numOutput =
+ (pergroup->numLeft < pergroup->numRight) ?
+ pergroup->numLeft : pergroup->numRight;
+ break;
+ case SETOPCMD_EXCEPT:
+ if (pergroup->numLeft > 0 && pergroup->numRight == 0)
+ setopstate->numOutput = 1;
+ else
+ setopstate->numOutput = 0;
+ break;
+ case SETOPCMD_EXCEPT_ALL:
+ setopstate->numOutput =
+ (pergroup->numLeft < pergroup->numRight) ?
+ 0 : (pergroup->numLeft - pergroup->numRight);
+ break;
+ default:
+ elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
+ break;
+ }
+}
+
+
+/* ----------------------------------------------------------------
+ * ExecSetOp
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot * /* return: a tuple or NULL */
+ExecSetOp(PlanState *pstate)
+{
+ SetOpState *node = castNode(SetOpState, pstate);
+ SetOp *plannode = (SetOp *) node->ps.plan;
+ TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * If the previously-returned tuple needs to be returned more than once,
+ * keep returning it.
+ */
+ if (node->numOutput > 0)
+ {
+ node->numOutput--;
+ return resultTupleSlot;
+ }
+
+ /* Otherwise, we're done if we are out of groups */
+ if (node->setop_done)
+ return NULL;
+
+ /* Fetch the next tuple group according to the correct strategy */
+ if (plannode->strategy == SETOP_HASHED)
+ {
+ if (!node->table_filled)
+ setop_fill_hash_table(node);
+ return setop_retrieve_hash_table(node);
+ }
+ else
+ return setop_retrieve_direct(node);
+}
+
+/*
+ * ExecSetOp for non-hashed case
+ */
+static TupleTableSlot *
+setop_retrieve_direct(SetOpState *setopstate)
+{
+ PlanState *outerPlan;
+ SetOpStatePerGroup pergroup;
+ TupleTableSlot *outerslot;
+ TupleTableSlot *resultTupleSlot;
+ ExprContext *econtext = setopstate->ps.ps_ExprContext;
+
+ /*
+ * get state info from node
+ */
+ outerPlan = outerPlanState(setopstate);
+ pergroup = (SetOpStatePerGroup) setopstate->pergroup;
+ resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
+
+ /*
+ * We loop retrieving groups until we find one we should return
+ */
+ while (!setopstate->setop_done)
+ {
+ /*
+ * If we don't already have the first tuple of the new group, fetch it
+ * from the outer plan.
+ */
+ if (setopstate->grp_firstTuple == NULL)
+ {
+ outerslot = ExecProcNode(outerPlan);
+ if (!TupIsNull(outerslot))
+ {
+ /* Make a copy of the first input tuple */
+ setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
+ }
+ else
+ {
+ /* outer plan produced no tuples at all */
+ setopstate->setop_done = true;
+ return NULL;
+ }
+ }
+
+ /*
+ * Store the copied first input tuple in the tuple table slot reserved
+ * for it. The tuple will be deleted when it is cleared from the
+ * slot.
+ */
+ ExecStoreHeapTuple(setopstate->grp_firstTuple,
+ resultTupleSlot,
+ true);
+ setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
+
+ /* Initialize working state for a new input tuple group */
+ initialize_counts(pergroup);
+
+ /* Count the first input tuple */
+ advance_counts(pergroup,
+ fetch_tuple_flag(setopstate, resultTupleSlot));
+
+ /*
+ * Scan the outer plan until we exhaust it or cross a group boundary.
+ */
+ for (;;)
+ {
+ outerslot = ExecProcNode(outerPlan);
+ if (TupIsNull(outerslot))
+ {
+ /* no more outer-plan tuples available */
+ setopstate->setop_done = true;
+ break;
+ }
+
+ /*
+ * Check whether we've crossed a group boundary.
+ */
+ econtext->ecxt_outertuple = resultTupleSlot;
+ econtext->ecxt_innertuple = outerslot;
+
+ if (!ExecQualAndReset(setopstate->eqfunction, econtext))
+ {
+ /*
+ * Save the first input tuple of the next group.
+ */
+ setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
+ break;
+ }
+
+ /* Still in same group, so count this tuple */
+ advance_counts(pergroup,
+ fetch_tuple_flag(setopstate, outerslot));
+ }
+
+ /*
+ * Done scanning input tuple group. See if we should emit any copies
+ * of result tuple, and if so return the first copy.
+ */
+ set_output_count(setopstate, pergroup);
+
+ if (setopstate->numOutput > 0)
+ {
+ setopstate->numOutput--;
+ return resultTupleSlot;
+ }
+ }
+
+ /* No more groups */
+ ExecClearTuple(resultTupleSlot);
+ return NULL;
+}
+
+/*
+ * ExecSetOp for hashed case: phase 1, read input and build hash table
+ */
+static void
+setop_fill_hash_table(SetOpState *setopstate)
+{
+ SetOp *node = (SetOp *) setopstate->ps.plan;
+ PlanState *outerPlan;
+ int firstFlag;
+ bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
+ ExprContext *econtext = setopstate->ps.ps_ExprContext;
+
+ /*
+ * get state info from node
+ */
+ outerPlan = outerPlanState(setopstate);
+ firstFlag = node->firstFlag;
+ /* verify planner didn't mess up */
+ Assert(firstFlag == 0 ||
+ (firstFlag == 1 &&
+ (node->cmd == SETOPCMD_INTERSECT ||
+ node->cmd == SETOPCMD_INTERSECT_ALL)));
+
+ /*
+ * Process each outer-plan tuple, and then fetch the next one, until we
+ * exhaust the outer plan.
+ */
+ in_first_rel = true;
+ for (;;)
+ {
+ TupleTableSlot *outerslot;
+ int flag;
+ TupleHashEntryData *entry;
+ bool isnew;
+
+ outerslot = ExecProcNode(outerPlan);
+ if (TupIsNull(outerslot))
+ break;
+
+ /* Identify whether it's left or right input */
+ flag = fetch_tuple_flag(setopstate, outerslot);
+
+ if (flag == firstFlag)
+ {
+ /* (still) in first input relation */
+ Assert(in_first_rel);
+
+ /* Find or build hashtable entry for this tuple's group */
+ entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
+ &isnew, NULL);
+
+ /* If new tuple group, initialize counts */
+ if (isnew)
+ {
+ entry->additional = (SetOpStatePerGroup)
+ MemoryContextAlloc(setopstate->hashtable->tablecxt,
+ sizeof(SetOpStatePerGroupData));
+ initialize_counts((SetOpStatePerGroup) entry->additional);
+ }
+
+ /* Advance the counts */
+ advance_counts((SetOpStatePerGroup) entry->additional, flag);
+ }
+ else
+ {
+ /* reached second relation */
+ in_first_rel = false;
+
+ /* For tuples not seen previously, do not make hashtable entry */
+ entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
+ NULL, NULL);
+
+ /* Advance the counts if entry is already present */
+ if (entry)
+ advance_counts((SetOpStatePerGroup) entry->additional, flag);
+ }
+
+ /* Must reset expression context after each hashtable lookup */
+ ResetExprContext(econtext);
+ }
+
+ setopstate->table_filled = true;
+ /* Initialize to walk the hash table */
+ ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
+}
+
+/*
+ * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
+ */
+static TupleTableSlot *
+setop_retrieve_hash_table(SetOpState *setopstate)
+{
+ TupleHashEntryData *entry;
+ TupleTableSlot *resultTupleSlot;
+
+ /*
+ * get state info from node
+ */
+ resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
+
+ /*
+ * We loop retrieving groups until we find one we should return
+ */
+ while (!setopstate->setop_done)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * Find the next entry in the hash table
+ */
+ entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
+ if (entry == NULL)
+ {
+ /* No more entries in hashtable, so done */
+ setopstate->setop_done = true;
+ return NULL;
+ }
+
+ /*
+ * See if we should emit any copies of this tuple, and if so return
+ * the first copy.
+ */
+ set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
+
+ if (setopstate->numOutput > 0)
+ {
+ setopstate->numOutput--;
+ return ExecStoreMinimalTuple(entry->firstTuple,
+ resultTupleSlot,
+ false);
+ }
+ }
+
+ /* No more groups */
+ ExecClearTuple(resultTupleSlot);
+ return NULL;
+}
+
+/* ----------------------------------------------------------------
+ * ExecInitSetOp
+ *
+ * This initializes the setop node state structures and
+ * the node's subplan.
+ * ----------------------------------------------------------------
+ */
+SetOpState *
+ExecInitSetOp(SetOp *node, EState *estate, int eflags)
+{
+ SetOpState *setopstate;
+ TupleDesc outerDesc;
+
+ /* check for unsupported flags */
+ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
+
+ /*
+ * create state structure
+ */
+ setopstate = makeNode(SetOpState);
+ setopstate->ps.plan = (Plan *) node;
+ setopstate->ps.state = estate;
+ setopstate->ps.ExecProcNode = ExecSetOp;
+
+ setopstate->eqfuncoids = NULL;
+ setopstate->hashfunctions = NULL;
+ setopstate->setop_done = false;
+ setopstate->numOutput = 0;
+ setopstate->pergroup = NULL;
+ setopstate->grp_firstTuple = NULL;
+ setopstate->hashtable = NULL;
+ setopstate->tableContext = NULL;
+
+ /*
+ * create expression context
+ */
+ ExecAssignExprContext(estate, &setopstate->ps);
+
+ /*
+ * If hashing, we also need a longer-lived context to store the hash
+ * table. The table can't just be kept in the per-query context because
+ * we want to be able to throw it away in ExecReScanSetOp.
+ */
+ if (node->strategy == SETOP_HASHED)
+ setopstate->tableContext =
+ AllocSetContextCreate(CurrentMemoryContext,
+ "SetOp hash table",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /*
+ * initialize child nodes
+ *
+ * If we are hashing then the child plan does not need to handle REWIND
+ * efficiently; see ExecReScanSetOp.
+ */
+ if (node->strategy == SETOP_HASHED)
+ eflags &= ~EXEC_FLAG_REWIND;
+ outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
+ outerDesc = ExecGetResultType(outerPlanState(setopstate));
+
+ /*
+ * Initialize result slot and type. Setop nodes do no projections, so
+ * initialize projection info for this node appropriately.
+ */
+ ExecInitResultTupleSlotTL(&setopstate->ps,
+ node->strategy == SETOP_HASHED ?
+ &TTSOpsMinimalTuple : &TTSOpsHeapTuple);
+ setopstate->ps.ps_ProjInfo = NULL;
+
+ /*
+ * Precompute fmgr lookup data for inner loop. We need both equality and
+ * hashing functions to do it by hashing, but only equality if not
+ * hashing.
+ */
+ if (node->strategy == SETOP_HASHED)
+ execTuplesHashPrepare(node->numCols,
+ node->dupOperators,
+ &setopstate->eqfuncoids,
+ &setopstate->hashfunctions);
+ else
+ setopstate->eqfunction =
+ execTuplesMatchPrepare(outerDesc,
+ node->numCols,
+ node->dupColIdx,
+ node->dupOperators,
+ node->dupCollations,
+ &setopstate->ps);
+
+ if (node->strategy == SETOP_HASHED)
+ {
+ build_hash_table(setopstate);
+ setopstate->table_filled = false;
+ }
+ else
+ {
+ setopstate->pergroup =
+ (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
+ }
+
+ return setopstate;
+}
+
+/* ----------------------------------------------------------------
+ * ExecEndSetOp
+ *
+ * This shuts down the subplan and frees resources allocated
+ * to this node.
+ * ----------------------------------------------------------------
+ */
+void
+ExecEndSetOp(SetOpState *node)
+{
+ /* clean up tuple table */
+ ExecClearTuple(node->ps.ps_ResultTupleSlot);
+
+ /* free subsidiary stuff including hashtable */
+ if (node->tableContext)
+ MemoryContextDelete(node->tableContext);
+ ExecFreeExprContext(&node->ps);
+
+ ExecEndNode(outerPlanState(node));
+}
+
+
+void
+ExecReScanSetOp(SetOpState *node)
+{
+ ExecClearTuple(node->ps.ps_ResultTupleSlot);
+ node->setop_done = false;
+ node->numOutput = 0;
+
+ if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
+ {
+ /*
+ * In the hashed case, if we haven't yet built the hash table then we
+ * can just return; nothing done yet, so nothing to undo. If subnode's
+ * chgParam is not NULL then it will be re-scanned by ExecProcNode,
+ * else no reason to re-scan it at all.
+ */
+ if (!node->table_filled)
+ return;
+
+ /*
+ * If we do have the hash table and the subplan does not have any
+ * parameter changes, then we can just rescan the existing hash table;
+ * no need to build it again.
+ */
+ if (node->ps.lefttree->chgParam == NULL)
+ {
+ ResetTupleHashIterator(node->hashtable, &node->hashiter);
+ return;
+ }
+ }
+
+ /* Release first tuple of group, if we have made a copy */
+ if (node->grp_firstTuple != NULL)
+ {
+ heap_freetuple(node->grp_firstTuple);
+ node->grp_firstTuple = NULL;
+ }
+
+ /* Release any hashtable storage */
+ if (node->tableContext)
+ MemoryContextResetAndDeleteChildren(node->tableContext);
+
+ /* And rebuild empty hashtable if needed */
+ if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
+ {
+ ResetTupleHashTable(node->hashtable);
+ node->table_filled = false;
+ }
+
+ /*
+ * if chgParam of subnode is not null then plan will be re-scanned by
+ * first ExecProcNode.
+ */
+ if (node->ps.lefttree->chgParam == NULL)
+ ExecReScan(node->ps.lefttree);
+}