diff options
Diffstat (limited to 'src/backend/executor/nodeSetOp.c')
-rw-r--r-- | src/backend/executor/nodeSetOp.c | 651 |
1 files changed, 651 insertions, 0 deletions
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c new file mode 100644 index 0000000..aad7ac0 --- /dev/null +++ b/src/backend/executor/nodeSetOp.c @@ -0,0 +1,651 @@ +/*------------------------------------------------------------------------- + * + * nodeSetOp.c + * Routines to handle INTERSECT and EXCEPT selection + * + * The input of a SetOp node consists of tuples from two relations, + * which have been combined into one dataset, with a junk attribute added + * that shows which relation each tuple came from. In SETOP_SORTED mode, + * the input has furthermore been sorted according to all the grouping + * columns (ie, all the non-junk attributes). The SetOp node scans each + * group of identical tuples to determine how many came from each input + * relation. Then it is a simple matter to emit the output demanded by the + * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL. + * + * In SETOP_HASHED mode, the input is delivered in no particular order, + * except that we know all the tuples from one input relation will come before + * all the tuples of the other. The planner guarantees that the first input + * relation is the left-hand one for EXCEPT, and tries to make the smaller + * input relation come first for INTERSECT. We build a hash table in memory + * with one entry for each group of identical tuples, and count the number of + * tuples in the group from each relation. After seeing all the input, we + * scan the hashtable and generate the correct output using those counts. + * We can avoid making hashtable entries for any tuples appearing only in the + * second input relation, since they cannot result in any output. + * + * This node type is not used for UNION or UNION ALL, since those can be + * implemented more cheaply (there's no need for the junk attribute to + * identify the source relation). + * + * Note that SetOp does no qual checking nor projection. The delivered + * output tuples are just copies of the first-to-arrive tuple in each + * input group. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeSetOp.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "executor/executor.h" +#include "executor/nodeSetOp.h" +#include "miscadmin.h" +#include "utils/memutils.h" + + +/* + * SetOpStatePerGroupData - per-group working state + * + * These values are working state that is initialized at the start of + * an input tuple group and updated for each input tuple. + * + * In SETOP_SORTED mode, we need only one of these structs, and it's kept in + * the plan state node. In SETOP_HASHED mode, the hash table contains one + * of these for each tuple group. + */ +typedef struct SetOpStatePerGroupData +{ + long numLeft; /* number of left-input dups in group */ + long numRight; /* number of right-input dups in group */ +} SetOpStatePerGroupData; + + +static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate); +static void setop_fill_hash_table(SetOpState *setopstate); +static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate); + + +/* + * Initialize state for a new group of input values. + */ +static inline void +initialize_counts(SetOpStatePerGroup pergroup) +{ + pergroup->numLeft = pergroup->numRight = 0; +} + +/* + * Advance the appropriate counter for one input tuple. + */ +static inline void +advance_counts(SetOpStatePerGroup pergroup, int flag) +{ + if (flag) + pergroup->numRight++; + else + pergroup->numLeft++; +} + +/* + * Fetch the "flag" column from an input tuple. + * This is an integer column with value 0 for left side, 1 for right side. + */ +static int +fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot) +{ + SetOp *node = (SetOp *) setopstate->ps.plan; + int flag; + bool isNull; + + flag = DatumGetInt32(slot_getattr(inputslot, + node->flagColIdx, + &isNull)); + Assert(!isNull); + Assert(flag == 0 || flag == 1); + return flag; +} + +/* + * Initialize the hash table to empty. + */ +static void +build_hash_table(SetOpState *setopstate) +{ + SetOp *node = (SetOp *) setopstate->ps.plan; + ExprContext *econtext = setopstate->ps.ps_ExprContext; + TupleDesc desc = ExecGetResultType(outerPlanState(setopstate)); + + Assert(node->strategy == SETOP_HASHED); + Assert(node->numGroups > 0); + + setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps, + desc, + node->numCols, + node->dupColIdx, + setopstate->eqfuncoids, + setopstate->hashfunctions, + node->dupCollations, + node->numGroups, + 0, + setopstate->ps.state->es_query_cxt, + setopstate->tableContext, + econtext->ecxt_per_tuple_memory, + false); +} + +/* + * We've completed processing a tuple group. Decide how many copies (if any) + * of its representative row to emit, and store the count into numOutput. + * This logic is straight from the SQL92 specification. + */ +static void +set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup) +{ + SetOp *plannode = (SetOp *) setopstate->ps.plan; + + switch (plannode->cmd) + { + case SETOPCMD_INTERSECT: + if (pergroup->numLeft > 0 && pergroup->numRight > 0) + setopstate->numOutput = 1; + else + setopstate->numOutput = 0; + break; + case SETOPCMD_INTERSECT_ALL: + setopstate->numOutput = + (pergroup->numLeft < pergroup->numRight) ? + pergroup->numLeft : pergroup->numRight; + break; + case SETOPCMD_EXCEPT: + if (pergroup->numLeft > 0 && pergroup->numRight == 0) + setopstate->numOutput = 1; + else + setopstate->numOutput = 0; + break; + case SETOPCMD_EXCEPT_ALL: + setopstate->numOutput = + (pergroup->numLeft < pergroup->numRight) ? + 0 : (pergroup->numLeft - pergroup->numRight); + break; + default: + elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd); + break; + } +} + + +/* ---------------------------------------------------------------- + * ExecSetOp + * ---------------------------------------------------------------- + */ +static TupleTableSlot * /* return: a tuple or NULL */ +ExecSetOp(PlanState *pstate) +{ + SetOpState *node = castNode(SetOpState, pstate); + SetOp *plannode = (SetOp *) node->ps.plan; + TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot; + + CHECK_FOR_INTERRUPTS(); + + /* + * If the previously-returned tuple needs to be returned more than once, + * keep returning it. + */ + if (node->numOutput > 0) + { + node->numOutput--; + return resultTupleSlot; + } + + /* Otherwise, we're done if we are out of groups */ + if (node->setop_done) + return NULL; + + /* Fetch the next tuple group according to the correct strategy */ + if (plannode->strategy == SETOP_HASHED) + { + if (!node->table_filled) + setop_fill_hash_table(node); + return setop_retrieve_hash_table(node); + } + else + return setop_retrieve_direct(node); +} + +/* + * ExecSetOp for non-hashed case + */ +static TupleTableSlot * +setop_retrieve_direct(SetOpState *setopstate) +{ + PlanState *outerPlan; + SetOpStatePerGroup pergroup; + TupleTableSlot *outerslot; + TupleTableSlot *resultTupleSlot; + ExprContext *econtext = setopstate->ps.ps_ExprContext; + + /* + * get state info from node + */ + outerPlan = outerPlanState(setopstate); + pergroup = (SetOpStatePerGroup) setopstate->pergroup; + resultTupleSlot = setopstate->ps.ps_ResultTupleSlot; + + /* + * We loop retrieving groups until we find one we should return + */ + while (!setopstate->setop_done) + { + /* + * If we don't already have the first tuple of the new group, fetch it + * from the outer plan. + */ + if (setopstate->grp_firstTuple == NULL) + { + outerslot = ExecProcNode(outerPlan); + if (!TupIsNull(outerslot)) + { + /* Make a copy of the first input tuple */ + setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot); + } + else + { + /* outer plan produced no tuples at all */ + setopstate->setop_done = true; + return NULL; + } + } + + /* + * Store the copied first input tuple in the tuple table slot reserved + * for it. The tuple will be deleted when it is cleared from the + * slot. + */ + ExecStoreHeapTuple(setopstate->grp_firstTuple, + resultTupleSlot, + true); + setopstate->grp_firstTuple = NULL; /* don't keep two pointers */ + + /* Initialize working state for a new input tuple group */ + initialize_counts(pergroup); + + /* Count the first input tuple */ + advance_counts(pergroup, + fetch_tuple_flag(setopstate, resultTupleSlot)); + + /* + * Scan the outer plan until we exhaust it or cross a group boundary. + */ + for (;;) + { + outerslot = ExecProcNode(outerPlan); + if (TupIsNull(outerslot)) + { + /* no more outer-plan tuples available */ + setopstate->setop_done = true; + break; + } + + /* + * Check whether we've crossed a group boundary. + */ + econtext->ecxt_outertuple = resultTupleSlot; + econtext->ecxt_innertuple = outerslot; + + if (!ExecQualAndReset(setopstate->eqfunction, econtext)) + { + /* + * Save the first input tuple of the next group. + */ + setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot); + break; + } + + /* Still in same group, so count this tuple */ + advance_counts(pergroup, + fetch_tuple_flag(setopstate, outerslot)); + } + + /* + * Done scanning input tuple group. See if we should emit any copies + * of result tuple, and if so return the first copy. + */ + set_output_count(setopstate, pergroup); + + if (setopstate->numOutput > 0) + { + setopstate->numOutput--; + return resultTupleSlot; + } + } + + /* No more groups */ + ExecClearTuple(resultTupleSlot); + return NULL; +} + +/* + * ExecSetOp for hashed case: phase 1, read input and build hash table + */ +static void +setop_fill_hash_table(SetOpState *setopstate) +{ + SetOp *node = (SetOp *) setopstate->ps.plan; + PlanState *outerPlan; + int firstFlag; + bool in_first_rel PG_USED_FOR_ASSERTS_ONLY; + ExprContext *econtext = setopstate->ps.ps_ExprContext; + + /* + * get state info from node + */ + outerPlan = outerPlanState(setopstate); + firstFlag = node->firstFlag; + /* verify planner didn't mess up */ + Assert(firstFlag == 0 || + (firstFlag == 1 && + (node->cmd == SETOPCMD_INTERSECT || + node->cmd == SETOPCMD_INTERSECT_ALL))); + + /* + * Process each outer-plan tuple, and then fetch the next one, until we + * exhaust the outer plan. + */ + in_first_rel = true; + for (;;) + { + TupleTableSlot *outerslot; + int flag; + TupleHashEntryData *entry; + bool isnew; + + outerslot = ExecProcNode(outerPlan); + if (TupIsNull(outerslot)) + break; + + /* Identify whether it's left or right input */ + flag = fetch_tuple_flag(setopstate, outerslot); + + if (flag == firstFlag) + { + /* (still) in first input relation */ + Assert(in_first_rel); + + /* Find or build hashtable entry for this tuple's group */ + entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, + &isnew, NULL); + + /* If new tuple group, initialize counts */ + if (isnew) + { + entry->additional = (SetOpStatePerGroup) + MemoryContextAlloc(setopstate->hashtable->tablecxt, + sizeof(SetOpStatePerGroupData)); + initialize_counts((SetOpStatePerGroup) entry->additional); + } + + /* Advance the counts */ + advance_counts((SetOpStatePerGroup) entry->additional, flag); + } + else + { + /* reached second relation */ + in_first_rel = false; + + /* For tuples not seen previously, do not make hashtable entry */ + entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, + NULL, NULL); + + /* Advance the counts if entry is already present */ + if (entry) + advance_counts((SetOpStatePerGroup) entry->additional, flag); + } + + /* Must reset expression context after each hashtable lookup */ + ResetExprContext(econtext); + } + + setopstate->table_filled = true; + /* Initialize to walk the hash table */ + ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter); +} + +/* + * ExecSetOp for hashed case: phase 2, retrieving groups from hash table + */ +static TupleTableSlot * +setop_retrieve_hash_table(SetOpState *setopstate) +{ + TupleHashEntryData *entry; + TupleTableSlot *resultTupleSlot; + + /* + * get state info from node + */ + resultTupleSlot = setopstate->ps.ps_ResultTupleSlot; + + /* + * We loop retrieving groups until we find one we should return + */ + while (!setopstate->setop_done) + { + CHECK_FOR_INTERRUPTS(); + + /* + * Find the next entry in the hash table + */ + entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter); + if (entry == NULL) + { + /* No more entries in hashtable, so done */ + setopstate->setop_done = true; + return NULL; + } + + /* + * See if we should emit any copies of this tuple, and if so return + * the first copy. + */ + set_output_count(setopstate, (SetOpStatePerGroup) entry->additional); + + if (setopstate->numOutput > 0) + { + setopstate->numOutput--; + return ExecStoreMinimalTuple(entry->firstTuple, + resultTupleSlot, + false); + } + } + + /* No more groups */ + ExecClearTuple(resultTupleSlot); + return NULL; +} + +/* ---------------------------------------------------------------- + * ExecInitSetOp + * + * This initializes the setop node state structures and + * the node's subplan. + * ---------------------------------------------------------------- + */ +SetOpState * +ExecInitSetOp(SetOp *node, EState *estate, int eflags) +{ + SetOpState *setopstate; + TupleDesc outerDesc; + + /* check for unsupported flags */ + Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + + /* + * create state structure + */ + setopstate = makeNode(SetOpState); + setopstate->ps.plan = (Plan *) node; + setopstate->ps.state = estate; + setopstate->ps.ExecProcNode = ExecSetOp; + + setopstate->eqfuncoids = NULL; + setopstate->hashfunctions = NULL; + setopstate->setop_done = false; + setopstate->numOutput = 0; + setopstate->pergroup = NULL; + setopstate->grp_firstTuple = NULL; + setopstate->hashtable = NULL; + setopstate->tableContext = NULL; + + /* + * create expression context + */ + ExecAssignExprContext(estate, &setopstate->ps); + + /* + * If hashing, we also need a longer-lived context to store the hash + * table. The table can't just be kept in the per-query context because + * we want to be able to throw it away in ExecReScanSetOp. + */ + if (node->strategy == SETOP_HASHED) + setopstate->tableContext = + AllocSetContextCreate(CurrentMemoryContext, + "SetOp hash table", + ALLOCSET_DEFAULT_SIZES); + + /* + * initialize child nodes + * + * If we are hashing then the child plan does not need to handle REWIND + * efficiently; see ExecReScanSetOp. + */ + if (node->strategy == SETOP_HASHED) + eflags &= ~EXEC_FLAG_REWIND; + outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags); + outerDesc = ExecGetResultType(outerPlanState(setopstate)); + + /* + * Initialize result slot and type. Setop nodes do no projections, so + * initialize projection info for this node appropriately. + */ + ExecInitResultTupleSlotTL(&setopstate->ps, + node->strategy == SETOP_HASHED ? + &TTSOpsMinimalTuple : &TTSOpsHeapTuple); + setopstate->ps.ps_ProjInfo = NULL; + + /* + * Precompute fmgr lookup data for inner loop. We need both equality and + * hashing functions to do it by hashing, but only equality if not + * hashing. + */ + if (node->strategy == SETOP_HASHED) + execTuplesHashPrepare(node->numCols, + node->dupOperators, + &setopstate->eqfuncoids, + &setopstate->hashfunctions); + else + setopstate->eqfunction = + execTuplesMatchPrepare(outerDesc, + node->numCols, + node->dupColIdx, + node->dupOperators, + node->dupCollations, + &setopstate->ps); + + if (node->strategy == SETOP_HASHED) + { + build_hash_table(setopstate); + setopstate->table_filled = false; + } + else + { + setopstate->pergroup = + (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData)); + } + + return setopstate; +} + +/* ---------------------------------------------------------------- + * ExecEndSetOp + * + * This shuts down the subplan and frees resources allocated + * to this node. + * ---------------------------------------------------------------- + */ +void +ExecEndSetOp(SetOpState *node) +{ + /* clean up tuple table */ + ExecClearTuple(node->ps.ps_ResultTupleSlot); + + /* free subsidiary stuff including hashtable */ + if (node->tableContext) + MemoryContextDelete(node->tableContext); + ExecFreeExprContext(&node->ps); + + ExecEndNode(outerPlanState(node)); +} + + +void +ExecReScanSetOp(SetOpState *node) +{ + ExecClearTuple(node->ps.ps_ResultTupleSlot); + node->setop_done = false; + node->numOutput = 0; + + if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED) + { + /* + * In the hashed case, if we haven't yet built the hash table then we + * can just return; nothing done yet, so nothing to undo. If subnode's + * chgParam is not NULL then it will be re-scanned by ExecProcNode, + * else no reason to re-scan it at all. + */ + if (!node->table_filled) + return; + + /* + * If we do have the hash table and the subplan does not have any + * parameter changes, then we can just rescan the existing hash table; + * no need to build it again. + */ + if (node->ps.lefttree->chgParam == NULL) + { + ResetTupleHashIterator(node->hashtable, &node->hashiter); + return; + } + } + + /* Release first tuple of group, if we have made a copy */ + if (node->grp_firstTuple != NULL) + { + heap_freetuple(node->grp_firstTuple); + node->grp_firstTuple = NULL; + } + + /* Release any hashtable storage */ + if (node->tableContext) + MemoryContextResetAndDeleteChildren(node->tableContext); + + /* And rebuild empty hashtable if needed */ + if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED) + { + ResetTupleHashTable(node->hashtable); + node->table_filled = false; + } + + /* + * if chgParam of subnode is not null then plan will be re-scanned by + * first ExecProcNode. + */ + if (node->ps.lefttree->chgParam == NULL) + ExecReScan(node->ps.lefttree); +} |