1 files changed, 651 insertions, 0 deletions
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
new file mode 100644
index 0000000..aad7ac0
--- /dev/null
+++ b/src/backend/executor/nodeSetOp.c
@@ -0,0 +1,651 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeSetOp.c
+ *	  Routines to handle INTERSECT and EXCEPT selection
+ *
+ * The input of a SetOp node consists of tuples from two relations,
+ * which have been combined into one dataset, with a junk attribute added
+ * that shows which relation each tuple came from.  In SETOP_SORTED mode,
+ * the input has furthermore been sorted according to all the grouping
+ * columns (ie, all the non-junk attributes).  The SetOp node scans each
+ * group of identical tuples to determine how many came from each input
+ * relation.  Then it is a simple matter to emit the output demanded by the
+ * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
+ *
+ * In SETOP_HASHED mode, the input is delivered in no particular order,
+ * except that we know all the tuples from one input relation will come before
+ * all the tuples of the other.  The planner guarantees that the first input
+ * relation is the left-hand one for EXCEPT, and tries to make the smaller
+ * input relation come first for INTERSECT.  We build a hash table in memory
+ * with one entry for each group of identical tuples, and count the number of
+ * tuples in the group from each relation.  After seeing all the input, we
+ * scan the hashtable and generate the correct output using those counts.
+ * We can avoid making hashtable entries for any tuples appearing only in the
+ * second input relation, since they cannot result in any output.
+ *
+ * This node type is not used for UNION or UNION ALL, since those can be
+ * implemented more cheaply (there's no need for the junk attribute to
+ * identify the source relation).
+ *
+ * Note that SetOp does no qual checking nor projection.  The delivered
+ * output tuples are just copies of the first-to-arrive tuple in each
+ * input group.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/executor/nodeSetOp.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "executor/executor.h"
+#include "executor/nodeSetOp.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+
+
+/*
+ * SetOpStatePerGroupData - per-group working state
+ *
+ * These values are working state that is initialized at the start of
+ * an input tuple group and updated for each input tuple.
+ *
+ * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
+ * the plan state node.  In SETOP_HASHED mode, the hash table contains one
+ * of these for each tuple group.
+ */
+typedef struct SetOpStatePerGroupData
+{
+	long		numLeft;		/* number of left-input dups in group */
+	long		numRight;		/* number of right-input dups in group */
+}			SetOpStatePerGroupData;
+
+
+static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
+static void setop_fill_hash_table(SetOpState *setopstate);
+static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
+
+
+/*
+ * Initialize state for a new group of input values.
+ */
+static inline void
+initialize_counts(SetOpStatePerGroup pergroup)
+{
+	pergroup->numLeft = pergroup->numRight = 0;
+}
+
+/*
+ * Advance the appropriate counter for one input tuple.
+ */
+static inline void
+advance_counts(SetOpStatePerGroup pergroup, int flag)
+{
+	if (flag)
+		pergroup->numRight++;
+	else
+		pergroup->numLeft++;
+}
+
+/*
+ * Fetch the "flag" column from an input tuple.
+ * This is an integer column with value 0 for left side, 1 for right side.
+ */
+static int
+fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
+{
+	SetOp	   *node = (SetOp *) setopstate->ps.plan;
+	int			flag;
+	bool		isNull;
+
+	flag = DatumGetInt32(slot_getattr(inputslot,
+									  node->flagColIdx,
+									  &isNull));
+	Assert(!isNull);
+	Assert(flag == 0 || flag == 1);
+	return flag;
+}
+
+/*
+ * Initialize the hash table to empty.
+ */
+static void
+build_hash_table(SetOpState *setopstate)
+{
+	SetOp	   *node = (SetOp *) setopstate->ps.plan;
+	ExprContext *econtext = setopstate->ps.ps_ExprContext;
+	TupleDesc	desc = ExecGetResultType(outerPlanState(setopstate));
+
+	Assert(node->strategy == SETOP_HASHED);
+	Assert(node->numGroups > 0);
+
+	setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
+												   desc,
+												   node->numCols,
+												   node->dupColIdx,
+												   setopstate->eqfuncoids,
+												   setopstate->hashfunctions,
+												   node->dupCollations,
+												   node->numGroups,
+												   0,
+												   setopstate->ps.state->es_query_cxt,
+												   setopstate->tableContext,
+												   econtext->ecxt_per_tuple_memory,
+												   false);
+}
+
+/*
+ * We've completed processing a tuple group.  Decide how many copies (if any)
+ * of its representative row to emit, and store the count into numOutput.
+ * This logic is straight from the SQL92 specification.
+ */
+static void
+set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
+{
+	SetOp	   *plannode = (SetOp *) setopstate->ps.plan;
+
+	switch (plannode->cmd)
+	{
+		case SETOPCMD_INTERSECT:
+			if (pergroup->numLeft > 0 && pergroup->numRight > 0)
+				setopstate->numOutput = 1;
+			else
+				setopstate->numOutput = 0;
+			break;
+		case SETOPCMD_INTERSECT_ALL:
+			setopstate->numOutput =
+				(pergroup->numLeft < pergroup->numRight) ?
+				pergroup->numLeft : pergroup->numRight;
+			break;
+		case SETOPCMD_EXCEPT:
+			if (pergroup->numLeft > 0 && pergroup->numRight == 0)
+				setopstate->numOutput = 1;
+			else
+				setopstate->numOutput = 0;
+			break;
+		case SETOPCMD_EXCEPT_ALL:
+			setopstate->numOutput =
+				(pergroup->numLeft < pergroup->numRight) ?
+				0 : (pergroup->numLeft - pergroup->numRight);
+			break;
+		default:
+			elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
+			break;
+	}
+}
+
+
+/* ----------------------------------------------------------------
+ *		ExecSetOp
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *			/* return: a tuple or NULL */
+ExecSetOp(PlanState *pstate)
+{
+	SetOpState *node = castNode(SetOpState, pstate);
+	SetOp	   *plannode = (SetOp *) node->ps.plan;
+	TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
+
+	CHECK_FOR_INTERRUPTS();
+
+	/*
+	 * If the previously-returned tuple needs to be returned more than once,
+	 * keep returning it.
+	 */
+	if (node->numOutput > 0)
+	{
+		node->numOutput--;
+		return resultTupleSlot;
+	}
+
+	/* Otherwise, we're done if we are out of groups */
+	if (node->setop_done)
+		return NULL;
+
+	/* Fetch the next tuple group according to the correct strategy */
+	if (plannode->strategy == SETOP_HASHED)
+	{
+		if (!node->table_filled)
+			setop_fill_hash_table(node);
+		return setop_retrieve_hash_table(node);
+	}
+	else
+		return setop_retrieve_direct(node);
+}
+
+/*
+ * ExecSetOp for non-hashed case
+ */
+static TupleTableSlot *
+setop_retrieve_direct(SetOpState *setopstate)
+{
+	PlanState  *outerPlan;
+	SetOpStatePerGroup pergroup;
+	TupleTableSlot *outerslot;
+	TupleTableSlot *resultTupleSlot;
+	ExprContext *econtext = setopstate->ps.ps_ExprContext;
+
+	/*
+	 * get state info from node
+	 */
+	outerPlan = outerPlanState(setopstate);
+	pergroup = (SetOpStatePerGroup) setopstate->pergroup;
+	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
+
+	/*
+	 * We loop retrieving groups until we find one we should return
+	 */
+	while (!setopstate->setop_done)
+	{
+		/*
+		 * If we don't already have the first tuple of the new group, fetch it
+		 * from the outer plan.
+		 */
+		if (setopstate->grp_firstTuple == NULL)
+		{
+			outerslot = ExecProcNode(outerPlan);
+			if (!TupIsNull(outerslot))
+			{
+				/* Make a copy of the first input tuple */
+				setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
+			}
+			else
+			{
+				/* outer plan produced no tuples at all */
+				setopstate->setop_done = true;
+				return NULL;
+			}
+		}
+
+		/*
+		 * Store the copied first input tuple in the tuple table slot reserved
+		 * for it.  The tuple will be deleted when it is cleared from the
+		 * slot.
+		 */
+		ExecStoreHeapTuple(setopstate->grp_firstTuple,
+						   resultTupleSlot,
+						   true);
+		setopstate->grp_firstTuple = NULL;	/* don't keep two pointers */
+
+		/* Initialize working state for a new input tuple group */
+		initialize_counts(pergroup);
+
+		/* Count the first input tuple */
+		advance_counts(pergroup,
+					   fetch_tuple_flag(setopstate, resultTupleSlot));
+
+		/*
+		 * Scan the outer plan until we exhaust it or cross a group boundary.
+		 */
+		for (;;)
+		{
+			outerslot = ExecProcNode(outerPlan);
+			if (TupIsNull(outerslot))
+			{
+				/* no more outer-plan tuples available */
+				setopstate->setop_done = true;
+				break;
+			}
+
+			/*
+			 * Check whether we've crossed a group boundary.
+			 */
+			econtext->ecxt_outertuple = resultTupleSlot;
+			econtext->ecxt_innertuple = outerslot;
+
+			if (!ExecQualAndReset(setopstate->eqfunction, econtext))
+			{
+				/*
+				 * Save the first input tuple of the next group.
+				 */
+				setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
+				break;
+			}
+
+			/* Still in same group, so count this tuple */
+			advance_counts(pergroup,
+						   fetch_tuple_flag(setopstate, outerslot));
+		}
+
+		/*
+		 * Done scanning input tuple group.  See if we should emit any copies
+		 * of result tuple, and if so return the first copy.
+		 */
+		set_output_count(setopstate, pergroup);
+
+		if (setopstate->numOutput > 0)
+		{
+			setopstate->numOutput--;
+			return resultTupleSlot;
+		}
+	}
+
+	/* No more groups */
+	ExecClearTuple(resultTupleSlot);
+	return NULL;
+}
+
+/*
+ * ExecSetOp for hashed case: phase 1, read input and build hash table
+ */
+static void
+setop_fill_hash_table(SetOpState *setopstate)
+{
+	SetOp	   *node = (SetOp *) setopstate->ps.plan;
+	PlanState  *outerPlan;
+	int			firstFlag;
+	bool		in_first_rel PG_USED_FOR_ASSERTS_ONLY;
+	ExprContext *econtext = setopstate->ps.ps_ExprContext;
+
+	/*
+	 * get state info from node
+	 */
+	outerPlan = outerPlanState(setopstate);
+	firstFlag = node->firstFlag;
+	/* verify planner didn't mess up */
+	Assert(firstFlag == 0 ||
+		   (firstFlag == 1 &&
+			(node->cmd == SETOPCMD_INTERSECT ||
+			 node->cmd == SETOPCMD_INTERSECT_ALL)));
+
+	/*
+	 * Process each outer-plan tuple, and then fetch the next one, until we
+	 * exhaust the outer plan.
+	 */
+	in_first_rel = true;
+	for (;;)
+	{
+		TupleTableSlot *outerslot;
+		int			flag;
+		TupleHashEntryData *entry;
+		bool		isnew;
+
+		outerslot = ExecProcNode(outerPlan);
+		if (TupIsNull(outerslot))
+			break;
+
+		/* Identify whether it's left or right input */
+		flag = fetch_tuple_flag(setopstate, outerslot);
+
+		if (flag == firstFlag)
+		{
+			/* (still) in first input relation */
+			Assert(in_first_rel);
+
+			/* Find or build hashtable entry for this tuple's group */
+			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
+										 &isnew, NULL);
+
+			/* If new tuple group, initialize counts */
+			if (isnew)
+			{
+				entry->additional = (SetOpStatePerGroup)
+					MemoryContextAlloc(setopstate->hashtable->tablecxt,
+									   sizeof(SetOpStatePerGroupData));
+				initialize_counts((SetOpStatePerGroup) entry->additional);
+			}
+
+			/* Advance the counts */
+			advance_counts((SetOpStatePerGroup) entry->additional, flag);
+		}
+		else
+		{
+			/* reached second relation */
+			in_first_rel = false;
+
+			/* For tuples not seen previously, do not make hashtable entry */
+			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
+										 NULL, NULL);
+
+			/* Advance the counts if entry is already present */
+			if (entry)
+				advance_counts((SetOpStatePerGroup) entry->additional, flag);
+		}
+
+		/* Must reset expression context after each hashtable lookup */
+		ResetExprContext(econtext);
+	}
+
+	setopstate->table_filled = true;
+	/* Initialize to walk the hash table */
+	ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
+}
+
+/*
+ * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
+ */
+static TupleTableSlot *
+setop_retrieve_hash_table(SetOpState *setopstate)
+{
+	TupleHashEntryData *entry;
+	TupleTableSlot *resultTupleSlot;
+
+	/*
+	 * get state info from node
+	 */
+	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
+
+	/*
+	 * We loop retrieving groups until we find one we should return
+	 */
+	while (!setopstate->setop_done)
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * Find the next entry in the hash table
+		 */
+		entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
+		if (entry == NULL)
+		{
+			/* No more entries in hashtable, so done */
+			setopstate->setop_done = true;
+			return NULL;
+		}
+
+		/*
+		 * See if we should emit any copies of this tuple, and if so return
+		 * the first copy.
+		 */
+		set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
+
+		if (setopstate->numOutput > 0)
+		{
+			setopstate->numOutput--;
+			return ExecStoreMinimalTuple(entry->firstTuple,
+										 resultTupleSlot,
+										 false);
+		}
+	}
+
+	/* No more groups */
+	ExecClearTuple(resultTupleSlot);
+	return NULL;
+}
+
+/* ----------------------------------------------------------------
+ *		ExecInitSetOp
+ *
+ *		This initializes the setop node state structures and
+ *		the node's subplan.
+ * ----------------------------------------------------------------
+ */
+SetOpState *
+ExecInitSetOp(SetOp *node, EState *estate, int eflags)
+{
+	SetOpState *setopstate;
+	TupleDesc	outerDesc;
+
+	/* check for unsupported flags */
+	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
+
+	/*
+	 * create state structure
+	 */
+	setopstate = makeNode(SetOpState);
+	setopstate->ps.plan = (Plan *) node;
+	setopstate->ps.state = estate;
+	setopstate->ps.ExecProcNode = ExecSetOp;
+
+	setopstate->eqfuncoids = NULL;
+	setopstate->hashfunctions = NULL;
+	setopstate->setop_done = false;
+	setopstate->numOutput = 0;
+	setopstate->pergroup = NULL;
+	setopstate->grp_firstTuple = NULL;
+	setopstate->hashtable = NULL;
+	setopstate->tableContext = NULL;
+
+	/*
+	 * create expression context
+	 */
+	ExecAssignExprContext(estate, &setopstate->ps);
+
+	/*
+	 * If hashing, we also need a longer-lived context to store the hash
+	 * table.  The table can't just be kept in the per-query context because
+	 * we want to be able to throw it away in ExecReScanSetOp.
+	 */
+	if (node->strategy == SETOP_HASHED)
+		setopstate->tableContext =
+			AllocSetContextCreate(CurrentMemoryContext,
+								  "SetOp hash table",
+								  ALLOCSET_DEFAULT_SIZES);
+
+	/*
+	 * initialize child nodes
+	 *
+	 * If we are hashing then the child plan does not need to handle REWIND
+	 * efficiently; see ExecReScanSetOp.
+	 */
+	if (node->strategy == SETOP_HASHED)
+		eflags &= ~EXEC_FLAG_REWIND;
+	outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
+	outerDesc = ExecGetResultType(outerPlanState(setopstate));
+
+	/*
+	 * Initialize result slot and type. Setop nodes do no projections, so
+	 * initialize projection info for this node appropriately.
+	 */
+	ExecInitResultTupleSlotTL(&setopstate->ps,
+							  node->strategy == SETOP_HASHED ?
+							  &TTSOpsMinimalTuple : &TTSOpsHeapTuple);
+	setopstate->ps.ps_ProjInfo = NULL;
+
+	/*
+	 * Precompute fmgr lookup data for inner loop. We need both equality and
+	 * hashing functions to do it by hashing, but only equality if not
+	 * hashing.
+	 */
+	if (node->strategy == SETOP_HASHED)
+		execTuplesHashPrepare(node->numCols,
+							  node->dupOperators,
+							  &setopstate->eqfuncoids,
+							  &setopstate->hashfunctions);
+	else
+		setopstate->eqfunction =
+			execTuplesMatchPrepare(outerDesc,
+								   node->numCols,
+								   node->dupColIdx,
+								   node->dupOperators,
+								   node->dupCollations,
+								   &setopstate->ps);
+
+	if (node->strategy == SETOP_HASHED)
+	{
+		build_hash_table(setopstate);
+		setopstate->table_filled = false;
+	}
+	else
+	{
+		setopstate->pergroup =
+			(SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
+	}
+
+	return setopstate;
+}
+
+/* ----------------------------------------------------------------
+ *		ExecEndSetOp
+ *
+ *		This shuts down the subplan and frees resources allocated
+ *		to this node.
+ * ----------------------------------------------------------------
+ */
+void
+ExecEndSetOp(SetOpState *node)
+{
+	/* clean up tuple table */
+	ExecClearTuple(node->ps.ps_ResultTupleSlot);
+
+	/* free subsidiary stuff including hashtable */
+	if (node->tableContext)
+		MemoryContextDelete(node->tableContext);
+	ExecFreeExprContext(&node->ps);
+
+	ExecEndNode(outerPlanState(node));
+}
+
+
+void
+ExecReScanSetOp(SetOpState *node)
+{
+	ExecClearTuple(node->ps.ps_ResultTupleSlot);
+	node->setop_done = false;
+	node->numOutput = 0;
+
+	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
+	{
+		/*
+		 * In the hashed case, if we haven't yet built the hash table then we
+		 * can just return; nothing done yet, so nothing to undo. If subnode's
+		 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
+		 * else no reason to re-scan it at all.
+		 */
+		if (!node->table_filled)
+			return;
+
+		/*
+		 * If we do have the hash table and the subplan does not have any
+		 * parameter changes, then we can just rescan the existing hash table;
+		 * no need to build it again.
+		 */
+		if (node->ps.lefttree->chgParam == NULL)
+		{
+			ResetTupleHashIterator(node->hashtable, &node->hashiter);
+			return;
+		}
+	}
+
+	/* Release first tuple of group, if we have made a copy */
+	if (node->grp_firstTuple != NULL)
+	{
+		heap_freetuple(node->grp_firstTuple);
+		node->grp_firstTuple = NULL;
+	}
+
+	/* Release any hashtable storage */
+	if (node->tableContext)
+		MemoryContextResetAndDeleteChildren(node->tableContext);
+
+	/* And rebuild empty hashtable if needed */
+	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
+	{
+		ResetTupleHashTable(node->hashtable);
+		node->table_filled = false;
+	}
+
+	/*
+	 * if chgParam of subnode is not null then plan will be re-scanned by
+	 * first ExecProcNode.
+	 */
+	if (node->ps.lefttree->chgParam == NULL)
+		ExecReScan(node->ps.lefttree);
+}