diff options
Diffstat (limited to 'src/include/nodes/execnodes.h')
-rw-r--r-- | src/include/nodes/execnodes.h | 2646 |
1 files changed, 2646 insertions, 0 deletions
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h new file mode 100644 index 0000000..3dfac5b --- /dev/null +++ b/src/include/nodes/execnodes.h @@ -0,0 +1,2646 @@ +/*------------------------------------------------------------------------- + * + * execnodes.h + * definitions for executor state nodes + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/nodes/execnodes.h + * + *------------------------------------------------------------------------- + */ +#ifndef EXECNODES_H +#define EXECNODES_H + +#include "access/tupconvert.h" +#include "executor/instrument.h" +#include "fmgr.h" +#include "lib/ilist.h" +#include "lib/pairingheap.h" +#include "nodes/params.h" +#include "nodes/plannodes.h" +#include "nodes/tidbitmap.h" +#include "partitioning/partdefs.h" +#include "storage/condition_variable.h" +#include "utils/hsearch.h" +#include "utils/queryenvironment.h" +#include "utils/reltrigger.h" +#include "utils/sharedtuplestore.h" +#include "utils/snapshot.h" +#include "utils/sortsupport.h" +#include "utils/tuplesort.h" +#include "utils/tuplestore.h" + +struct PlanState; /* forward references in this file */ +struct ParallelHashJoinState; +struct ExecRowMark; +struct ExprState; +struct ExprContext; +struct RangeTblEntry; /* avoid including parsenodes.h here */ +struct ExprEvalStep; /* avoid including execExpr.h everywhere */ +struct CopyMultiInsertBuffer; + + +/* ---------------- + * ExprState node + * + * ExprState is the top-level node for expression evaluation. + * It contains instructions (in ->steps) to evaluate the expression. + * ---------------- + */ +typedef Datum (*ExprStateEvalFunc) (struct ExprState *expression, + struct ExprContext *econtext, + bool *isNull); + +/* Bits in ExprState->flags (see also execExpr.h for private flag bits): */ +/* expression is for use with ExecQual() */ +#define EEO_FLAG_IS_QUAL (1 << 0) + +typedef struct ExprState +{ + NodeTag tag; + + uint8 flags; /* bitmask of EEO_FLAG_* bits, see above */ + + /* + * Storage for result value of a scalar expression, or for individual + * column results within expressions built by ExecBuildProjectionInfo(). + */ +#define FIELDNO_EXPRSTATE_RESNULL 2 + bool resnull; +#define FIELDNO_EXPRSTATE_RESVALUE 3 + Datum resvalue; + + /* + * If projecting a tuple result, this slot holds the result; else NULL. + */ +#define FIELDNO_EXPRSTATE_RESULTSLOT 4 + TupleTableSlot *resultslot; + + /* + * Instructions to compute expression's return value. + */ + struct ExprEvalStep *steps; + + /* + * Function that actually evaluates the expression. This can be set to + * different values depending on the complexity of the expression. + */ + ExprStateEvalFunc evalfunc; + + /* original expression tree, for debugging only */ + Expr *expr; + + /* private state for an evalfunc */ + void *evalfunc_private; + + /* + * XXX: following fields only needed during "compilation" (ExecInitExpr); + * could be thrown away afterwards. + */ + + int steps_len; /* number of steps currently */ + int steps_alloc; /* allocated length of steps array */ + +#define FIELDNO_EXPRSTATE_PARENT 11 + struct PlanState *parent; /* parent PlanState node, if any */ + ParamListInfo ext_params; /* for compiling PARAM_EXTERN nodes */ + + Datum *innermost_caseval; + bool *innermost_casenull; + + Datum *innermost_domainval; + bool *innermost_domainnull; +} ExprState; + + +/* ---------------- + * IndexInfo information + * + * this struct holds the information needed to construct new index + * entries for a particular index. Used for both index_build and + * retail creation of index entries. + * + * NumIndexAttrs total number of columns in this index + * NumIndexKeyAttrs number of key columns in index + * IndexAttrNumbers underlying-rel attribute numbers used as keys + * (zeroes indicate expressions). It also contains + * info about included columns. + * Expressions expr trees for expression entries, or NIL if none + * ExpressionsState exec state for expressions, or NIL if none + * Predicate partial-index predicate, or NIL if none + * PredicateState exec state for predicate, or NIL if none + * ExclusionOps Per-column exclusion operators, or NULL if none + * ExclusionProcs Underlying function OIDs for ExclusionOps + * ExclusionStrats Opclass strategy numbers for ExclusionOps + * UniqueOps These are like Exclusion*, but for unique indexes + * UniqueProcs + * UniqueStrats + * Unique is it a unique index? + * OpclassOptions opclass-specific options, or NULL if none + * ReadyForInserts is it valid for inserts? + * Concurrent are we doing a concurrent index build? + * BrokenHotChain did we detect any broken HOT chains? + * ParallelWorkers # of workers requested (excludes leader) + * Am Oid of index AM + * AmCache private cache area for index AM + * Context memory context holding this IndexInfo + * + * ii_Concurrent, ii_BrokenHotChain, and ii_ParallelWorkers are used only + * during index build; they're conventionally zeroed otherwise. + * ---------------- + */ +typedef struct IndexInfo +{ + NodeTag type; + int ii_NumIndexAttrs; /* total number of columns in index */ + int ii_NumIndexKeyAttrs; /* number of key columns in index */ + AttrNumber ii_IndexAttrNumbers[INDEX_MAX_KEYS]; + List *ii_Expressions; /* list of Expr */ + List *ii_ExpressionsState; /* list of ExprState */ + List *ii_Predicate; /* list of Expr */ + ExprState *ii_PredicateState; + Oid *ii_ExclusionOps; /* array with one entry per column */ + Oid *ii_ExclusionProcs; /* array with one entry per column */ + uint16 *ii_ExclusionStrats; /* array with one entry per column */ + Oid *ii_UniqueOps; /* array with one entry per column */ + Oid *ii_UniqueProcs; /* array with one entry per column */ + uint16 *ii_UniqueStrats; /* array with one entry per column */ + Datum *ii_OpclassOptions; /* array with one entry per column */ + bool ii_Unique; + bool ii_ReadyForInserts; + bool ii_Concurrent; + bool ii_BrokenHotChain; + int ii_ParallelWorkers; + Oid ii_Am; + void *ii_AmCache; + MemoryContext ii_Context; +} IndexInfo; + +/* ---------------- + * ExprContext_CB + * + * List of callbacks to be called at ExprContext shutdown. + * ---------------- + */ +typedef void (*ExprContextCallbackFunction) (Datum arg); + +typedef struct ExprContext_CB +{ + struct ExprContext_CB *next; + ExprContextCallbackFunction function; + Datum arg; +} ExprContext_CB; + +/* ---------------- + * ExprContext + * + * This class holds the "current context" information + * needed to evaluate expressions for doing tuple qualifications + * and tuple projections. For example, if an expression refers + * to an attribute in the current inner tuple then we need to know + * what the current inner tuple is and so we look at the expression + * context. + * + * There are two memory contexts associated with an ExprContext: + * * ecxt_per_query_memory is a query-lifespan context, typically the same + * context the ExprContext node itself is allocated in. This context + * can be used for purposes such as storing function call cache info. + * * ecxt_per_tuple_memory is a short-term context for expression results. + * As the name suggests, it will typically be reset once per tuple, + * before we begin to evaluate expressions for that tuple. Each + * ExprContext normally has its very own per-tuple memory context. + * + * CurrentMemoryContext should be set to ecxt_per_tuple_memory before + * calling ExecEvalExpr() --- see ExecEvalExprSwitchContext(). + * ---------------- + */ +typedef struct ExprContext +{ + NodeTag type; + + /* Tuples that Var nodes in expression may refer to */ +#define FIELDNO_EXPRCONTEXT_SCANTUPLE 1 + TupleTableSlot *ecxt_scantuple; +#define FIELDNO_EXPRCONTEXT_INNERTUPLE 2 + TupleTableSlot *ecxt_innertuple; +#define FIELDNO_EXPRCONTEXT_OUTERTUPLE 3 + TupleTableSlot *ecxt_outertuple; + + /* Memory contexts for expression evaluation --- see notes above */ + MemoryContext ecxt_per_query_memory; + MemoryContext ecxt_per_tuple_memory; + + /* Values to substitute for Param nodes in expression */ + ParamExecData *ecxt_param_exec_vals; /* for PARAM_EXEC params */ + ParamListInfo ecxt_param_list_info; /* for other param types */ + + /* + * Values to substitute for Aggref nodes in the expressions of an Agg + * node, or for WindowFunc nodes within a WindowAgg node. + */ +#define FIELDNO_EXPRCONTEXT_AGGVALUES 8 + Datum *ecxt_aggvalues; /* precomputed values for aggs/windowfuncs */ +#define FIELDNO_EXPRCONTEXT_AGGNULLS 9 + bool *ecxt_aggnulls; /* null flags for aggs/windowfuncs */ + + /* Value to substitute for CaseTestExpr nodes in expression */ +#define FIELDNO_EXPRCONTEXT_CASEDATUM 10 + Datum caseValue_datum; +#define FIELDNO_EXPRCONTEXT_CASENULL 11 + bool caseValue_isNull; + + /* Value to substitute for CoerceToDomainValue nodes in expression */ +#define FIELDNO_EXPRCONTEXT_DOMAINDATUM 12 + Datum domainValue_datum; +#define FIELDNO_EXPRCONTEXT_DOMAINNULL 13 + bool domainValue_isNull; + + /* Link to containing EState (NULL if a standalone ExprContext) */ + struct EState *ecxt_estate; + + /* Functions to call back when ExprContext is shut down or rescanned */ + ExprContext_CB *ecxt_callbacks; +} ExprContext; + +/* + * Set-result status used when evaluating functions potentially returning a + * set. + */ +typedef enum +{ + ExprSingleResult, /* expression does not return a set */ + ExprMultipleResult, /* this result is an element of a set */ + ExprEndResult /* there are no more elements in the set */ +} ExprDoneCond; + +/* + * Return modes for functions returning sets. Note values must be chosen + * as separate bits so that a bitmask can be formed to indicate supported + * modes. SFRM_Materialize_Random and SFRM_Materialize_Preferred are + * auxiliary flags about SFRM_Materialize mode, rather than separate modes. + */ +typedef enum +{ + SFRM_ValuePerCall = 0x01, /* one value returned per call */ + SFRM_Materialize = 0x02, /* result set instantiated in Tuplestore */ + SFRM_Materialize_Random = 0x04, /* Tuplestore needs randomAccess */ + SFRM_Materialize_Preferred = 0x08 /* caller prefers Tuplestore */ +} SetFunctionReturnMode; + +/* + * When calling a function that might return a set (multiple rows), + * a node of this type is passed as fcinfo->resultinfo to allow + * return status to be passed back. A function returning set should + * raise an error if no such resultinfo is provided. + */ +typedef struct ReturnSetInfo +{ + NodeTag type; + /* values set by caller: */ + ExprContext *econtext; /* context function is being called in */ + TupleDesc expectedDesc; /* tuple descriptor expected by caller */ + int allowedModes; /* bitmask: return modes caller can handle */ + /* result status from function (but pre-initialized by caller): */ + SetFunctionReturnMode returnMode; /* actual return mode */ + ExprDoneCond isDone; /* status for ValuePerCall mode */ + /* fields filled by function in Materialize return mode: */ + Tuplestorestate *setResult; /* holds the complete returned tuple set */ + TupleDesc setDesc; /* actual descriptor for returned tuples */ +} ReturnSetInfo; + +/* ---------------- + * ProjectionInfo node information + * + * This is all the information needed to perform projections --- + * that is, form new tuples by evaluation of targetlist expressions. + * Nodes which need to do projections create one of these. + * + * The target tuple slot is kept in ProjectionInfo->pi_state.resultslot. + * ExecProject() evaluates the tlist, forms a tuple, and stores it + * in the given slot. Note that the result will be a "virtual" tuple + * unless ExecMaterializeSlot() is then called to force it to be + * converted to a physical tuple. The slot must have a tupledesc + * that matches the output of the tlist! + * ---------------- + */ +typedef struct ProjectionInfo +{ + NodeTag type; + /* instructions to evaluate projection */ + ExprState pi_state; + /* expression context in which to evaluate expression */ + ExprContext *pi_exprContext; +} ProjectionInfo; + +/* ---------------- + * JunkFilter + * + * This class is used to store information regarding junk attributes. + * A junk attribute is an attribute in a tuple that is needed only for + * storing intermediate information in the executor, and does not belong + * in emitted tuples. For example, when we do an UPDATE query, + * the planner adds a "junk" entry to the targetlist so that the tuples + * returned to ExecutePlan() contain an extra attribute: the ctid of + * the tuple to be updated. This is needed to do the update, but we + * don't want the ctid to be part of the stored new tuple! So, we + * apply a "junk filter" to remove the junk attributes and form the + * real output tuple. The junkfilter code also provides routines to + * extract the values of the junk attribute(s) from the input tuple. + * + * targetList: the original target list (including junk attributes). + * cleanTupType: the tuple descriptor for the "clean" tuple (with + * junk attributes removed). + * cleanMap: A map with the correspondence between the non-junk + * attribute numbers of the "original" tuple and the + * attribute numbers of the "clean" tuple. + * resultSlot: tuple slot used to hold cleaned tuple. + * ---------------- + */ +typedef struct JunkFilter +{ + NodeTag type; + List *jf_targetList; + TupleDesc jf_cleanTupType; + AttrNumber *jf_cleanMap; + TupleTableSlot *jf_resultSlot; +} JunkFilter; + +/* + * OnConflictSetState + * + * Executor state of an ON CONFLICT DO UPDATE operation. + */ +typedef struct OnConflictSetState +{ + NodeTag type; + + TupleTableSlot *oc_Existing; /* slot to store existing target tuple in */ + TupleTableSlot *oc_ProjSlot; /* CONFLICT ... SET ... projection target */ + ProjectionInfo *oc_ProjInfo; /* for ON CONFLICT DO UPDATE SET */ + ExprState *oc_WhereClause; /* state for the WHERE clause */ +} OnConflictSetState; + +/* + * ResultRelInfo + * + * Whenever we update an existing relation, we have to update indexes on the + * relation, and perhaps also fire triggers. ResultRelInfo holds all the + * information needed about a result relation, including indexes. + * + * Normally, a ResultRelInfo refers to a table that is in the query's range + * table; then ri_RangeTableIndex is the RT index and ri_RelationDesc is + * just a copy of the relevant es_relations[] entry. However, in some + * situations we create ResultRelInfos for relations that are not in the + * range table, namely for targets of tuple routing in a partitioned table, + * and when firing triggers in tables other than the target tables (See + * ExecGetTriggerResultRel). In these situations, ri_RangeTableIndex is 0 + * and ri_RelationDesc is a separately-opened relcache pointer that needs to + * be separately closed. + */ +typedef struct ResultRelInfo +{ + NodeTag type; + + /* result relation's range table index, or 0 if not in range table */ + Index ri_RangeTableIndex; + + /* relation descriptor for result relation */ + Relation ri_RelationDesc; + + /* # of indices existing on result relation */ + int ri_NumIndices; + + /* array of relation descriptors for indices */ + RelationPtr ri_IndexRelationDescs; + + /* array of key/attr info for indices */ + IndexInfo **ri_IndexRelationInfo; + + /* + * For UPDATE/DELETE result relations, the attribute number of the row + * identity junk attribute in the source plan's output tuples + */ + AttrNumber ri_RowIdAttNo; + + /* Projection to generate new tuple in an INSERT/UPDATE */ + ProjectionInfo *ri_projectNew; + /* Slot to hold that tuple */ + TupleTableSlot *ri_newTupleSlot; + /* Slot to hold the old tuple being updated */ + TupleTableSlot *ri_oldTupleSlot; + /* Have the projection and the slots above been initialized? */ + bool ri_projectNewInfoValid; + + /* triggers to be fired, if any */ + TriggerDesc *ri_TrigDesc; + + /* cached lookup info for trigger functions */ + FmgrInfo *ri_TrigFunctions; + + /* array of trigger WHEN expr states */ + ExprState **ri_TrigWhenExprs; + + /* optional runtime measurements for triggers */ + Instrumentation *ri_TrigInstrument; + + /* On-demand created slots for triggers / returning processing */ + TupleTableSlot *ri_ReturningSlot; /* for trigger output tuples */ + TupleTableSlot *ri_TrigOldSlot; /* for a trigger's old tuple */ + TupleTableSlot *ri_TrigNewSlot; /* for a trigger's new tuple */ + + /* FDW callback functions, if foreign table */ + struct FdwRoutine *ri_FdwRoutine; + + /* available to save private state of FDW */ + void *ri_FdwState; + + /* true when modifying foreign table directly */ + bool ri_usesFdwDirectModify; + + /* batch insert stuff */ + int ri_NumSlots; /* number of slots in the array */ + int ri_NumSlotsInitialized; /* number of initialized slots */ + int ri_BatchSize; /* max slots inserted in a single batch */ + TupleTableSlot **ri_Slots; /* input tuples for batch insert */ + TupleTableSlot **ri_PlanSlots; + + /* list of WithCheckOption's to be checked */ + List *ri_WithCheckOptions; + + /* list of WithCheckOption expr states */ + List *ri_WithCheckOptionExprs; + + /* array of constraint-checking expr states */ + ExprState **ri_ConstraintExprs; + + /* array of stored generated columns expr states */ + ExprState **ri_GeneratedExprs; + + /* number of stored generated columns we need to compute */ + int ri_NumGeneratedNeeded; + + /* list of RETURNING expressions */ + List *ri_returningList; + + /* for computing a RETURNING list */ + ProjectionInfo *ri_projectReturning; + + /* list of arbiter indexes to use to check conflicts */ + List *ri_onConflictArbiterIndexes; + + /* ON CONFLICT evaluation state */ + OnConflictSetState *ri_onConflict; + + /* partition check expression state (NULL if not set up yet) */ + ExprState *ri_PartitionCheckExpr; + + /* + * Information needed by tuple routing target relations + * + * RootResultRelInfo gives the target relation mentioned in the query, if + * it's a partitioned table. It is not set if the target relation + * mentioned in the query is an inherited table, nor when tuple routing is + * not needed. + * + * RootToPartitionMap and PartitionTupleSlot, initialized by + * ExecInitRoutingInfo, are non-NULL if partition has a different tuple + * format than the root table. + */ + struct ResultRelInfo *ri_RootResultRelInfo; + TupleConversionMap *ri_RootToPartitionMap; + TupleTableSlot *ri_PartitionTupleSlot; + + /* + * Map to convert child result relation tuples to the format of the table + * actually mentioned in the query (called "root"). Computed only if + * needed. A NULL map value indicates that no conversion is needed, so we + * must have a separate flag to show if the map has been computed. + */ + TupleConversionMap *ri_ChildToRootMap; + bool ri_ChildToRootMapValid; + + /* for use by copyfrom.c when performing multi-inserts */ + struct CopyMultiInsertBuffer *ri_CopyMultiInsertBuffer; +} ResultRelInfo; + +/* ---------------- + * AsyncRequest + * + * State for an asynchronous tuple request. + * ---------------- + */ +typedef struct AsyncRequest +{ + struct PlanState *requestor; /* Node that wants a tuple */ + struct PlanState *requestee; /* Node from which a tuple is wanted */ + int request_index; /* Scratch space for requestor */ + bool callback_pending; /* Callback is needed */ + bool request_complete; /* Request complete, result valid */ + TupleTableSlot *result; /* Result (NULL or an empty slot if no more + * tuples) */ +} AsyncRequest; + +/* ---------------- + * EState information + * + * Working state for an Executor invocation + * ---------------- + */ +typedef struct EState +{ + NodeTag type; + + /* Basic state for all query types: */ + ScanDirection es_direction; /* current scan direction */ + Snapshot es_snapshot; /* time qual to use */ + Snapshot es_crosscheck_snapshot; /* crosscheck time qual for RI */ + List *es_range_table; /* List of RangeTblEntry */ + Index es_range_table_size; /* size of the range table arrays */ + Relation *es_relations; /* Array of per-range-table-entry Relation + * pointers, or NULL if not yet opened */ + struct ExecRowMark **es_rowmarks; /* Array of per-range-table-entry + * ExecRowMarks, or NULL if none */ + PlannedStmt *es_plannedstmt; /* link to top of plan tree */ + const char *es_sourceText; /* Source text from QueryDesc */ + + JunkFilter *es_junkFilter; /* top-level junk filter, if any */ + + /* If query can insert/delete tuples, the command ID to mark them with */ + CommandId es_output_cid; + + /* Info about target table(s) for insert/update/delete queries: */ + ResultRelInfo **es_result_relations; /* Array of per-range-table-entry + * ResultRelInfo pointers, or NULL + * if not a target table */ + List *es_opened_result_relations; /* List of non-NULL entries in + * es_result_relations in no + * specific order */ + + PartitionDirectory es_partition_directory; /* for PartitionDesc lookup */ + + /* + * The following list contains ResultRelInfos created by the tuple routing + * code for partitions that aren't found in the es_result_relations array. + */ + List *es_tuple_routing_result_relations; + + /* Stuff used for firing triggers: */ + List *es_trig_target_relations; /* trigger-only ResultRelInfos */ + + /* Parameter info: */ + ParamListInfo es_param_list_info; /* values of external params */ + ParamExecData *es_param_exec_vals; /* values of internal params */ + + QueryEnvironment *es_queryEnv; /* query environment */ + + /* Other working state: */ + MemoryContext es_query_cxt; /* per-query context in which EState lives */ + + List *es_tupleTable; /* List of TupleTableSlots */ + + uint64 es_processed; /* # of tuples processed */ + + int es_top_eflags; /* eflags passed to ExecutorStart */ + int es_instrument; /* OR of InstrumentOption flags */ + bool es_finished; /* true when ExecutorFinish is done */ + + List *es_exprcontexts; /* List of ExprContexts within EState */ + + List *es_subplanstates; /* List of PlanState for SubPlans */ + + List *es_auxmodifytables; /* List of secondary ModifyTableStates */ + + /* + * this ExprContext is for per-output-tuple operations, such as constraint + * checks and index-value computations. It will be reset for each output + * tuple. Note that it will be created only if needed. + */ + ExprContext *es_per_tuple_exprcontext; + + /* + * If not NULL, this is an EPQState's EState. This is a field in EState + * both to allow EvalPlanQual aware executor nodes to detect that they + * need to perform EPQ related work, and to provide necessary information + * to do so. + */ + struct EPQState *es_epq_active; + + bool es_use_parallel_mode; /* can we use parallel workers? */ + + /* The per-query shared memory area to use for parallel execution. */ + struct dsa_area *es_query_dsa; + + /* + * JIT information. es_jit_flags indicates whether JIT should be performed + * and with which options. es_jit is created on-demand when JITing is + * performed. + * + * es_jit_worker_instr is the combined, on demand allocated, + * instrumentation from all workers. The leader's instrumentation is kept + * separate, and is combined on demand by ExplainPrintJITSummary(). + */ + int es_jit_flags; + struct JitContext *es_jit; + struct JitInstrumentation *es_jit_worker_instr; +} EState; + + +/* + * ExecRowMark - + * runtime representation of FOR [KEY] UPDATE/SHARE clauses + * + * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we will have an + * ExecRowMark for each non-target relation in the query (except inheritance + * parent RTEs, which can be ignored at runtime). Virtual relations such as + * subqueries-in-FROM will have an ExecRowMark with relation == NULL. See + * PlanRowMark for details about most of the fields. In addition to fields + * directly derived from PlanRowMark, we store an activity flag (to denote + * inactive children of inheritance trees), curCtid, which is used by the + * WHERE CURRENT OF code, and ermExtra, which is available for use by the plan + * node that sources the relation (e.g., for a foreign table the FDW can use + * ermExtra to hold information). + * + * EState->es_rowmarks is an array of these structs, indexed by RT index, + * with NULLs for irrelevant RT indexes. es_rowmarks itself is NULL if + * there are no rowmarks. + */ +typedef struct ExecRowMark +{ + Relation relation; /* opened and suitably locked relation */ + Oid relid; /* its OID (or InvalidOid, if subquery) */ + Index rti; /* its range table index */ + Index prti; /* parent range table index, if child */ + Index rowmarkId; /* unique identifier for resjunk columns */ + RowMarkType markType; /* see enum in nodes/plannodes.h */ + LockClauseStrength strength; /* LockingClause's strength, or LCS_NONE */ + LockWaitPolicy waitPolicy; /* NOWAIT and SKIP LOCKED */ + bool ermActive; /* is this mark relevant for current tuple? */ + ItemPointerData curCtid; /* ctid of currently locked tuple, if any */ + void *ermExtra; /* available for use by relation source node */ +} ExecRowMark; + +/* + * ExecAuxRowMark - + * additional runtime representation of FOR [KEY] UPDATE/SHARE clauses + * + * Each LockRows and ModifyTable node keeps a list of the rowmarks it needs to + * deal with. In addition to a pointer to the related entry in es_rowmarks, + * this struct carries the column number(s) of the resjunk columns associated + * with the rowmark (see comments for PlanRowMark for more detail). + */ +typedef struct ExecAuxRowMark +{ + ExecRowMark *rowmark; /* related entry in es_rowmarks */ + AttrNumber ctidAttNo; /* resno of ctid junk attribute, if any */ + AttrNumber toidAttNo; /* resno of tableoid junk attribute, if any */ + AttrNumber wholeAttNo; /* resno of whole-row junk attribute, if any */ +} ExecAuxRowMark; + + +/* ---------------------------------------------------------------- + * Tuple Hash Tables + * + * All-in-memory tuple hash tables are used for a number of purposes. + * + * Note: tab_hash_funcs are for the key datatype(s) stored in the table, + * and tab_eq_funcs are non-cross-type equality operators for those types. + * Normally these are the only functions used, but FindTupleHashEntry() + * supports searching a hashtable using cross-data-type hashing. For that, + * the caller must supply hash functions for the LHS datatype as well as + * the cross-type equality operators to use. in_hash_funcs and cur_eq_func + * are set to point to the caller's function arrays while doing such a search. + * During LookupTupleHashEntry(), they point to tab_hash_funcs and + * tab_eq_func respectively. + * ---------------------------------------------------------------- + */ +typedef struct TupleHashEntryData *TupleHashEntry; +typedef struct TupleHashTableData *TupleHashTable; + +typedef struct TupleHashEntryData +{ + MinimalTuple firstTuple; /* copy of first tuple in this group */ + void *additional; /* user data */ + uint32 status; /* hash status */ + uint32 hash; /* hash value (cached) */ +} TupleHashEntryData; + +/* define parameters necessary to generate the tuple hash table interface */ +#define SH_PREFIX tuplehash +#define SH_ELEMENT_TYPE TupleHashEntryData +#define SH_KEY_TYPE MinimalTuple +#define SH_SCOPE extern +#define SH_DECLARE +#include "lib/simplehash.h" + +typedef struct TupleHashTableData +{ + tuplehash_hash *hashtab; /* underlying hash table */ + int numCols; /* number of columns in lookup key */ + AttrNumber *keyColIdx; /* attr numbers of key columns */ + FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ + ExprState *tab_eq_func; /* comparator for table datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ + MemoryContext tablecxt; /* memory context containing table */ + MemoryContext tempcxt; /* context for function evaluations */ + Size entrysize; /* actual size to make each hash entry */ + TupleTableSlot *tableslot; /* slot for referencing table entries */ + /* The following fields are set transiently for each table search: */ + TupleTableSlot *inputslot; /* current input tuple's slot */ + FmgrInfo *in_hash_funcs; /* hash functions for input datatype(s) */ + ExprState *cur_eq_func; /* comparator for input vs. table */ + uint32 hash_iv; /* hash-function IV */ + ExprContext *exprcontext; /* expression context */ +} TupleHashTableData; + +typedef tuplehash_iterator TupleHashIterator; + +/* + * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan. + * Use ResetTupleHashIterator if the table can be frozen (in this case no + * explicit scan termination is needed). + */ +#define InitTupleHashIterator(htable, iter) \ + tuplehash_start_iterate(htable->hashtab, iter) +#define TermTupleHashIterator(iter) \ + ((void) 0) +#define ResetTupleHashIterator(htable, iter) \ + InitTupleHashIterator(htable, iter) +#define ScanTupleHashTable(htable, iter) \ + tuplehash_iterate(htable->hashtab, iter) + + +/* ---------------------------------------------------------------- + * Expression State Nodes + * + * Formerly, there was a separate executor expression state node corresponding + * to each node in a planned expression tree. That's no longer the case; for + * common expression node types, all the execution info is embedded into + * step(s) in a single ExprState node. But we still have a few executor state + * node types for selected expression node types, mostly those in which info + * has to be shared with other parts of the execution state tree. + * ---------------------------------------------------------------- + */ + +/* ---------------- + * WindowFuncExprState node + * ---------------- + */ +typedef struct WindowFuncExprState +{ + NodeTag type; + WindowFunc *wfunc; /* expression plan node */ + List *args; /* ExprStates for argument expressions */ + ExprState *aggfilter; /* FILTER expression */ + int wfuncno; /* ID number for wfunc within its plan node */ +} WindowFuncExprState; + + +/* ---------------- + * SetExprState node + * + * State for evaluating a potentially set-returning expression (like FuncExpr + * or OpExpr). In some cases, like some of the expressions in ROWS FROM(...) + * the expression might not be a SRF, but nonetheless it uses the same + * machinery as SRFs; it will be treated as a SRF returning a single row. + * ---------------- + */ +typedef struct SetExprState +{ + NodeTag type; + Expr *expr; /* expression plan node */ + List *args; /* ExprStates for argument expressions */ + + /* + * In ROWS FROM, functions can be inlined, removing the FuncExpr normally + * inside. In such a case this is the compiled expression (which cannot + * return a set), which'll be evaluated using regular ExecEvalExpr(). + */ + ExprState *elidedFuncState; + + /* + * Function manager's lookup info for the target function. If func.fn_oid + * is InvalidOid, we haven't initialized it yet (nor any of the following + * fields, except funcReturnsSet). + */ + FmgrInfo func; + + /* + * For a set-returning function (SRF) that returns a tuplestore, we keep + * the tuplestore here and dole out the result rows one at a time. The + * slot holds the row currently being returned. + */ + Tuplestorestate *funcResultStore; + TupleTableSlot *funcResultSlot; + + /* + * In some cases we need to compute a tuple descriptor for the function's + * output. If so, it's stored here. + */ + TupleDesc funcResultDesc; + bool funcReturnsTuple; /* valid when funcResultDesc isn't NULL */ + + /* + * Remember whether the function is declared to return a set. This is set + * by ExecInitExpr, and is valid even before the FmgrInfo is set up. + */ + bool funcReturnsSet; + + /* + * setArgsValid is true when we are evaluating a set-returning function + * that uses value-per-call mode and we are in the middle of a call + * series; we want to pass the same argument values to the function again + * (and again, until it returns ExprEndResult). This indicates that + * fcinfo_data already contains valid argument data. + */ + bool setArgsValid; + + /* + * Flag to remember whether we have registered a shutdown callback for + * this SetExprState. We do so only if funcResultStore or setArgsValid + * has been set at least once (since all the callback is for is to release + * the tuplestore or clear setArgsValid). + */ + bool shutdown_reg; /* a shutdown callback is registered */ + + /* + * Call parameter structure for the function. This has been initialized + * (by InitFunctionCallInfoData) if func.fn_oid is valid. It also saves + * argument values between calls, when setArgsValid is true. + */ + FunctionCallInfo fcinfo; +} SetExprState; + +/* ---------------- + * SubPlanState node + * ---------------- + */ +typedef struct SubPlanState +{ + NodeTag type; + SubPlan *subplan; /* expression plan node */ + struct PlanState *planstate; /* subselect plan's state tree */ + struct PlanState *parent; /* parent plan node's state tree */ + ExprState *testexpr; /* state of combining expression */ + List *args; /* states of argument expression(s) */ + HeapTuple curTuple; /* copy of most recent tuple from subplan */ + Datum curArray; /* most recent array from ARRAY() subplan */ + /* these are used when hashing the subselect's output: */ + TupleDesc descRight; /* subselect desc after projection */ + ProjectionInfo *projLeft; /* for projecting lefthand exprs */ + ProjectionInfo *projRight; /* for projecting subselect output */ + TupleHashTable hashtable; /* hash table for no-nulls subselect rows */ + TupleHashTable hashnulls; /* hash table for rows with null(s) */ + bool havehashrows; /* true if hashtable is not empty */ + bool havenullrows; /* true if hashnulls is not empty */ + MemoryContext hashtablecxt; /* memory context containing hash tables */ + MemoryContext hashtempcxt; /* temp memory context for hash tables */ + ExprContext *innerecontext; /* econtext for computing inner tuples */ + int numCols; /* number of columns being hashed */ + /* each of the remaining fields is an array of length numCols: */ + AttrNumber *keyColIdx; /* control data for hash tables */ + Oid *tab_eq_funcoids; /* equality func oids for table + * datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ + FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ + FmgrInfo *tab_eq_funcs; /* equality functions for table datatype(s) */ + FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */ + FmgrInfo *cur_eq_funcs; /* equality functions for LHS vs. table */ + ExprState *cur_eq_comp; /* equality comparator for LHS vs. table */ +} SubPlanState; + +/* + * DomainConstraintState - one item to check during CoerceToDomain + * + * Note: we consider this to be part of an ExprState tree, so we give it + * a name following the xxxState convention. But there's no directly + * associated plan-tree node. + */ +typedef enum DomainConstraintType +{ + DOM_CONSTRAINT_NOTNULL, + DOM_CONSTRAINT_CHECK +} DomainConstraintType; + +typedef struct DomainConstraintState +{ + NodeTag type; + DomainConstraintType constrainttype; /* constraint type */ + char *name; /* name of constraint (for error msgs) */ + Expr *check_expr; /* for CHECK, a boolean expression */ + ExprState *check_exprstate; /* check_expr's eval state, or NULL */ +} DomainConstraintState; + + +/* ---------------------------------------------------------------- + * Executor State Trees + * + * An executing query has a PlanState tree paralleling the Plan tree + * that describes the plan. + * ---------------------------------------------------------------- + */ + +/* ---------------- + * ExecProcNodeMtd + * + * This is the method called by ExecProcNode to return the next tuple + * from an executor node. It returns NULL, or an empty TupleTableSlot, + * if no more tuples are available. + * ---------------- + */ +typedef TupleTableSlot *(*ExecProcNodeMtd) (struct PlanState *pstate); + +/* ---------------- + * PlanState node + * + * We never actually instantiate any PlanState nodes; this is just the common + * abstract superclass for all PlanState-type nodes. + * ---------------- + */ +typedef struct PlanState +{ + NodeTag type; + + Plan *plan; /* associated Plan node */ + + EState *state; /* at execution time, states of individual + * nodes point to one EState for the whole + * top-level plan */ + + ExecProcNodeMtd ExecProcNode; /* function to return next tuple */ + ExecProcNodeMtd ExecProcNodeReal; /* actual function, if above is a + * wrapper */ + + Instrumentation *instrument; /* Optional runtime stats for this node */ + WorkerInstrumentation *worker_instrument; /* per-worker instrumentation */ + + /* Per-worker JIT instrumentation */ + struct SharedJitInstrumentation *worker_jit_instrument; + + /* + * Common structural data for all Plan types. These links to subsidiary + * state trees parallel links in the associated plan tree (except for the + * subPlan list, which does not exist in the plan tree). + */ + ExprState *qual; /* boolean qual condition */ + struct PlanState *lefttree; /* input plan tree(s) */ + struct PlanState *righttree; + + List *initPlan; /* Init SubPlanState nodes (un-correlated expr + * subselects) */ + List *subPlan; /* SubPlanState nodes in my expressions */ + + /* + * State for management of parameter-change-driven rescanning + */ + Bitmapset *chgParam; /* set of IDs of changed Params */ + + /* + * Other run-time state needed by most if not all node types. + */ + TupleDesc ps_ResultTupleDesc; /* node's return type */ + TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */ + ExprContext *ps_ExprContext; /* node's expression-evaluation context */ + ProjectionInfo *ps_ProjInfo; /* info for doing tuple projection */ + + bool async_capable; /* true if node is async-capable */ + + /* + * Scanslot's descriptor if known. This is a bit of a hack, but otherwise + * it's hard for expression compilation to optimize based on the + * descriptor, without encoding knowledge about all executor nodes. + */ + TupleDesc scandesc; + + /* + * Define the slot types for inner, outer and scanslots for expression + * contexts with this state as a parent. If *opsset is set, then + * *opsfixed indicates whether *ops is guaranteed to be the type of slot + * used. That means that every slot in the corresponding + * ExprContext.ecxt_*tuple will point to a slot of that type, while + * evaluating the expression. If *opsfixed is false, but *ops is set, + * that indicates the most likely type of slot. + * + * The scan* fields are set by ExecInitScanTupleSlot(). If that's not + * called, nodes can initialize the fields themselves. + * + * If outer/inneropsset is false, the information is inferred on-demand + * using ExecGetResultSlotOps() on ->righttree/lefttree, using the + * corresponding node's resultops* fields. + * + * The result* fields are automatically set when ExecInitResultSlot is + * used (be it directly or when the slot is created by + * ExecAssignScanProjectionInfo() / + * ExecConditionalAssignProjectionInfo()). If no projection is necessary + * ExecConditionalAssignProjectionInfo() defaults those fields to the scan + * operations. + */ + const TupleTableSlotOps *scanops; + const TupleTableSlotOps *outerops; + const TupleTableSlotOps *innerops; + const TupleTableSlotOps *resultops; + bool scanopsfixed; + bool outeropsfixed; + bool inneropsfixed; + bool resultopsfixed; + bool scanopsset; + bool outeropsset; + bool inneropsset; + bool resultopsset; +} PlanState; + +/* ---------------- + * these are defined to avoid confusion problems with "left" + * and "right" and "inner" and "outer". The convention is that + * the "left" plan is the "outer" plan and the "right" plan is + * the inner plan, but these make the code more readable. + * ---------------- + */ +#define innerPlanState(node) (((PlanState *)(node))->righttree) +#define outerPlanState(node) (((PlanState *)(node))->lefttree) + +/* Macros for inline access to certain instrumentation counters */ +#define InstrCountTuples2(node, delta) \ + do { \ + if (((PlanState *)(node))->instrument) \ + ((PlanState *)(node))->instrument->ntuples2 += (delta); \ + } while (0) +#define InstrCountFiltered1(node, delta) \ + do { \ + if (((PlanState *)(node))->instrument) \ + ((PlanState *)(node))->instrument->nfiltered1 += (delta); \ + } while(0) +#define InstrCountFiltered2(node, delta) \ + do { \ + if (((PlanState *)(node))->instrument) \ + ((PlanState *)(node))->instrument->nfiltered2 += (delta); \ + } while(0) + +/* + * EPQState is state for executing an EvalPlanQual recheck on a candidate + * tuples e.g. in ModifyTable or LockRows. + * + * To execute EPQ a separate EState is created (stored in ->recheckestate), + * which shares some resources, like the rangetable, with the main query's + * EState (stored in ->parentestate). The (sub-)tree of the plan that needs to + * be rechecked (in ->plan), is separately initialized (into + * ->recheckplanstate), but shares plan nodes with the corresponding nodes in + * the main query. The scan nodes in that separate executor tree are changed + * to return only the current tuple of interest for the respective + * table. Those tuples are either provided by the caller (using + * EvalPlanQualSlot), and/or found using the rowmark mechanism (non-locking + * rowmarks by the EPQ machinery itself, locking ones by the caller). + * + * While the plan to be checked may be changed using EvalPlanQualSetPlan(), + * all such plans need to share the same EState. + */ +typedef struct EPQState +{ + /* Initialized at EvalPlanQualInit() time: */ + + EState *parentestate; /* main query's EState */ + int epqParam; /* ID of Param to force scan node re-eval */ + + /* + * Tuples to be substituted by scan nodes. They need to set up, before + * calling EvalPlanQual()/EvalPlanQualNext(), into the slot returned by + * EvalPlanQualSlot(scanrelid). The array is indexed by scanrelid - 1. + */ + List *tuple_table; /* tuple table for relsubs_slot */ + TupleTableSlot **relsubs_slot; + + /* + * Initialized by EvalPlanQualInit(), may be changed later with + * EvalPlanQualSetPlan(): + */ + + Plan *plan; /* plan tree to be executed */ + List *arowMarks; /* ExecAuxRowMarks (non-locking only) */ + + + /* + * The original output tuple to be rechecked. Set by + * EvalPlanQualSetSlot(), before EvalPlanQualNext() or EvalPlanQual() may + * be called. + */ + TupleTableSlot *origslot; + + + /* Initialized or reset by EvalPlanQualBegin(): */ + + EState *recheckestate; /* EState for EPQ execution, see above */ + + /* + * Rowmarks that can be fetched on-demand using + * EvalPlanQualFetchRowMark(), indexed by scanrelid - 1. Only non-locking + * rowmarks. + */ + ExecAuxRowMark **relsubs_rowmark; + + /* + * True if a relation's EPQ tuple has been fetched for relation, indexed + * by scanrelid - 1. + */ + bool *relsubs_done; + + PlanState *recheckplanstate; /* EPQ specific exec nodes, for ->plan */ +} EPQState; + + +/* ---------------- + * ResultState information + * ---------------- + */ +typedef struct ResultState +{ + PlanState ps; /* its first field is NodeTag */ + ExprState *resconstantqual; + bool rs_done; /* are we done? */ + bool rs_checkqual; /* do we need to check the qual? */ +} ResultState; + +/* ---------------- + * ProjectSetState information + * + * Note: at least one of the "elems" will be a SetExprState; the rest are + * regular ExprStates. + * ---------------- + */ +typedef struct ProjectSetState +{ + PlanState ps; /* its first field is NodeTag */ + Node **elems; /* array of expression states */ + ExprDoneCond *elemdone; /* array of per-SRF is-done states */ + int nelems; /* length of elemdone[] array */ + bool pending_srf_tuples; /* still evaluating srfs in tlist? */ + MemoryContext argcontext; /* context for SRF arguments */ +} ProjectSetState; + +/* ---------------- + * ModifyTableState information + * ---------------- + */ +typedef struct ModifyTableState +{ + PlanState ps; /* its first field is NodeTag */ + CmdType operation; /* INSERT, UPDATE, or DELETE */ + bool canSetTag; /* do we set the command tag/es_processed? */ + bool mt_done; /* are we done? */ + int mt_nrels; /* number of entries in resultRelInfo[] */ + ResultRelInfo *resultRelInfo; /* info about target relation(s) */ + + /* + * Target relation mentioned in the original statement, used to fire + * statement-level triggers and as the root for tuple routing. (This + * might point to one of the resultRelInfo[] entries, but it can also be a + * distinct struct.) + */ + ResultRelInfo *rootResultRelInfo; + + EPQState mt_epqstate; /* for evaluating EvalPlanQual rechecks */ + bool fireBSTriggers; /* do we need to fire stmt triggers? */ + + /* + * These fields are used for inherited UPDATE and DELETE, to track which + * target relation a given tuple is from. If there are a lot of target + * relations, we use a hash table to translate table OIDs to + * resultRelInfo[] indexes; otherwise mt_resultOidHash is NULL. + */ + int mt_resultOidAttno; /* resno of "tableoid" junk attr */ + Oid mt_lastResultOid; /* last-seen value of tableoid */ + int mt_lastResultIndex; /* corresponding index in resultRelInfo[] */ + HTAB *mt_resultOidHash; /* optional hash table to speed lookups */ + + /* + * Slot for storing tuples in the root partitioned table's rowtype during + * an UPDATE of a partitioned table. + */ + TupleTableSlot *mt_root_tuple_slot; + + /* Tuple-routing support info */ + struct PartitionTupleRouting *mt_partition_tuple_routing; + + /* controls transition table population for specified operation */ + struct TransitionCaptureState *mt_transition_capture; + + /* controls transition table population for INSERT...ON CONFLICT UPDATE */ + struct TransitionCaptureState *mt_oc_transition_capture; +} ModifyTableState; + +/* ---------------- + * AppendState information + * + * nplans how many plans are in the array + * whichplan which synchronous plan is being executed (0 .. n-1) + * or a special negative value. See nodeAppend.c. + * prune_state details required to allow partitions to be + * eliminated from the scan, or NULL if not possible. + * valid_subplans for runtime pruning, valid synchronous appendplans + * indexes to scan. + * ---------------- + */ + +struct AppendState; +typedef struct AppendState AppendState; +struct ParallelAppendState; +typedef struct ParallelAppendState ParallelAppendState; +struct PartitionPruneState; + +struct AppendState +{ + PlanState ps; /* its first field is NodeTag */ + PlanState **appendplans; /* array of PlanStates for my inputs */ + int as_nplans; + int as_whichplan; + bool as_begun; /* false means need to initialize */ + Bitmapset *as_asyncplans; /* asynchronous plans indexes */ + int as_nasyncplans; /* # of asynchronous plans */ + AsyncRequest **as_asyncrequests; /* array of AsyncRequests */ + TupleTableSlot **as_asyncresults; /* unreturned results of async plans */ + int as_nasyncresults; /* # of valid entries in as_asyncresults */ + bool as_syncdone; /* true if all synchronous plans done in + * asynchronous mode, else false */ + int as_nasyncremain; /* # of remaining asynchronous plans */ + Bitmapset *as_needrequest; /* asynchronous plans needing a new request */ + struct WaitEventSet *as_eventset; /* WaitEventSet used to configure file + * descriptor wait events */ + int as_first_partial_plan; /* Index of 'appendplans' containing + * the first partial plan */ + ParallelAppendState *as_pstate; /* parallel coordination info */ + Size pstate_len; /* size of parallel coordination info */ + struct PartitionPruneState *as_prune_state; + Bitmapset *as_valid_subplans; + Bitmapset *as_valid_asyncplans; /* valid asynchronous plans indexes */ + bool (*choose_next_subplan) (AppendState *); +}; + +/* ---------------- + * MergeAppendState information + * + * nplans how many plans are in the array + * nkeys number of sort key columns + * sortkeys sort keys in SortSupport representation + * slots current output tuple of each subplan + * heap heap of active tuples + * initialized true if we have fetched first tuple from each subplan + * prune_state details required to allow partitions to be + * eliminated from the scan, or NULL if not possible. + * valid_subplans for runtime pruning, valid mergeplans indexes to + * scan. + * ---------------- + */ +typedef struct MergeAppendState +{ + PlanState ps; /* its first field is NodeTag */ + PlanState **mergeplans; /* array of PlanStates for my inputs */ + int ms_nplans; + int ms_nkeys; + SortSupport ms_sortkeys; /* array of length ms_nkeys */ + TupleTableSlot **ms_slots; /* array of length ms_nplans */ + struct binaryheap *ms_heap; /* binary heap of slot indices */ + bool ms_initialized; /* are subplans started? */ + struct PartitionPruneState *ms_prune_state; + Bitmapset *ms_valid_subplans; +} MergeAppendState; + +/* ---------------- + * RecursiveUnionState information + * + * RecursiveUnionState is used for performing a recursive union. + * + * recursing T when we're done scanning the non-recursive term + * intermediate_empty T if intermediate_table is currently empty + * working_table working table (to be scanned by recursive term) + * intermediate_table current recursive output (next generation of WT) + * ---------------- + */ +typedef struct RecursiveUnionState +{ + PlanState ps; /* its first field is NodeTag */ + bool recursing; + bool intermediate_empty; + Tuplestorestate *working_table; + Tuplestorestate *intermediate_table; + /* Remaining fields are unused in UNION ALL case */ + Oid *eqfuncoids; /* per-grouping-field equality fns */ + FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ + MemoryContext tempContext; /* short-term context for comparisons */ + TupleHashTable hashtable; /* hash table for tuples already seen */ + MemoryContext tableContext; /* memory context containing hash table */ +} RecursiveUnionState; + +/* ---------------- + * BitmapAndState information + * ---------------- + */ +typedef struct BitmapAndState +{ + PlanState ps; /* its first field is NodeTag */ + PlanState **bitmapplans; /* array of PlanStates for my inputs */ + int nplans; /* number of input plans */ +} BitmapAndState; + +/* ---------------- + * BitmapOrState information + * ---------------- + */ +typedef struct BitmapOrState +{ + PlanState ps; /* its first field is NodeTag */ + PlanState **bitmapplans; /* array of PlanStates for my inputs */ + int nplans; /* number of input plans */ +} BitmapOrState; + +/* ---------------------------------------------------------------- + * Scan State Information + * ---------------------------------------------------------------- + */ + +/* ---------------- + * ScanState information + * + * ScanState extends PlanState for node types that represent + * scans of an underlying relation. It can also be used for nodes + * that scan the output of an underlying plan node --- in that case, + * only ScanTupleSlot is actually useful, and it refers to the tuple + * retrieved from the subplan. + * + * currentRelation relation being scanned (NULL if none) + * currentScanDesc current scan descriptor for scan (NULL if none) + * ScanTupleSlot pointer to slot in tuple table holding scan tuple + * ---------------- + */ +typedef struct ScanState +{ + PlanState ps; /* its first field is NodeTag */ + Relation ss_currentRelation; + struct TableScanDescData *ss_currentScanDesc; + TupleTableSlot *ss_ScanTupleSlot; +} ScanState; + +/* ---------------- + * SeqScanState information + * ---------------- + */ +typedef struct SeqScanState +{ + ScanState ss; /* its first field is NodeTag */ + Size pscan_len; /* size of parallel heap scan descriptor */ +} SeqScanState; + +/* ---------------- + * SampleScanState information + * ---------------- + */ +typedef struct SampleScanState +{ + ScanState ss; + List *args; /* expr states for TABLESAMPLE params */ + ExprState *repeatable; /* expr state for REPEATABLE expr */ + /* use struct pointer to avoid including tsmapi.h here */ + struct TsmRoutine *tsmroutine; /* descriptor for tablesample method */ + void *tsm_state; /* tablesample method can keep state here */ + bool use_bulkread; /* use bulkread buffer access strategy? */ + bool use_pagemode; /* use page-at-a-time visibility checking? */ + bool begun; /* false means need to call BeginSampleScan */ + uint32 seed; /* random seed */ + int64 donetuples; /* number of tuples already returned */ + bool haveblock; /* has a block for sampling been determined */ + bool done; /* exhausted all tuples? */ +} SampleScanState; + +/* + * These structs store information about index quals that don't have simple + * constant right-hand sides. See comments for ExecIndexBuildScanKeys() + * for discussion. + */ +typedef struct +{ + struct ScanKeyData *scan_key; /* scankey to put value into */ + ExprState *key_expr; /* expr to evaluate to get value */ + bool key_toastable; /* is expr's result a toastable datatype? */ +} IndexRuntimeKeyInfo; + +typedef struct +{ + struct ScanKeyData *scan_key; /* scankey to put value into */ + ExprState *array_expr; /* expr to evaluate to get array value */ + int next_elem; /* next array element to use */ + int num_elems; /* number of elems in current array value */ + Datum *elem_values; /* array of num_elems Datums */ + bool *elem_nulls; /* array of num_elems is-null flags */ +} IndexArrayKeyInfo; + +/* ---------------- + * IndexScanState information + * + * indexqualorig execution state for indexqualorig expressions + * indexorderbyorig execution state for indexorderbyorig expressions + * ScanKeys Skey structures for index quals + * NumScanKeys number of ScanKeys + * OrderByKeys Skey structures for index ordering operators + * NumOrderByKeys number of OrderByKeys + * RuntimeKeys info about Skeys that must be evaluated at runtime + * NumRuntimeKeys number of RuntimeKeys + * RuntimeKeysReady true if runtime Skeys have been computed + * RuntimeContext expr context for evaling runtime Skeys + * RelationDesc index relation descriptor + * ScanDesc index scan descriptor + * + * ReorderQueue tuples that need reordering due to re-check + * ReachedEnd have we fetched all tuples from index already? + * OrderByValues values of ORDER BY exprs of last fetched tuple + * OrderByNulls null flags for OrderByValues + * SortSupport for reordering ORDER BY exprs + * OrderByTypByVals is the datatype of order by expression pass-by-value? + * OrderByTypLens typlens of the datatypes of order by expressions + * PscanLen size of parallel index scan descriptor + * ---------------- + */ +typedef struct IndexScanState +{ + ScanState ss; /* its first field is NodeTag */ + ExprState *indexqualorig; + List *indexorderbyorig; + struct ScanKeyData *iss_ScanKeys; + int iss_NumScanKeys; + struct ScanKeyData *iss_OrderByKeys; + int iss_NumOrderByKeys; + IndexRuntimeKeyInfo *iss_RuntimeKeys; + int iss_NumRuntimeKeys; + bool iss_RuntimeKeysReady; + ExprContext *iss_RuntimeContext; + Relation iss_RelationDesc; + struct IndexScanDescData *iss_ScanDesc; + + /* These are needed for re-checking ORDER BY expr ordering */ + pairingheap *iss_ReorderQueue; + bool iss_ReachedEnd; + Datum *iss_OrderByValues; + bool *iss_OrderByNulls; + SortSupport iss_SortSupport; + bool *iss_OrderByTypByVals; + int16 *iss_OrderByTypLens; + Size iss_PscanLen; +} IndexScanState; + +/* ---------------- + * IndexOnlyScanState information + * + * recheckqual execution state for recheckqual expressions + * ScanKeys Skey structures for index quals + * NumScanKeys number of ScanKeys + * OrderByKeys Skey structures for index ordering operators + * NumOrderByKeys number of OrderByKeys + * RuntimeKeys info about Skeys that must be evaluated at runtime + * NumRuntimeKeys number of RuntimeKeys + * RuntimeKeysReady true if runtime Skeys have been computed + * RuntimeContext expr context for evaling runtime Skeys + * RelationDesc index relation descriptor + * ScanDesc index scan descriptor + * TableSlot slot for holding tuples fetched from the table + * VMBuffer buffer in use for visibility map testing, if any + * PscanLen size of parallel index-only scan descriptor + * ---------------- + */ +typedef struct IndexOnlyScanState +{ + ScanState ss; /* its first field is NodeTag */ + ExprState *recheckqual; + struct ScanKeyData *ioss_ScanKeys; + int ioss_NumScanKeys; + struct ScanKeyData *ioss_OrderByKeys; + int ioss_NumOrderByKeys; + IndexRuntimeKeyInfo *ioss_RuntimeKeys; + int ioss_NumRuntimeKeys; + bool ioss_RuntimeKeysReady; + ExprContext *ioss_RuntimeContext; + Relation ioss_RelationDesc; + struct IndexScanDescData *ioss_ScanDesc; + TupleTableSlot *ioss_TableSlot; + Buffer ioss_VMBuffer; + Size ioss_PscanLen; +} IndexOnlyScanState; + +/* ---------------- + * BitmapIndexScanState information + * + * result bitmap to return output into, or NULL + * ScanKeys Skey structures for index quals + * NumScanKeys number of ScanKeys + * RuntimeKeys info about Skeys that must be evaluated at runtime + * NumRuntimeKeys number of RuntimeKeys + * ArrayKeys info about Skeys that come from ScalarArrayOpExprs + * NumArrayKeys number of ArrayKeys + * RuntimeKeysReady true if runtime Skeys have been computed + * RuntimeContext expr context for evaling runtime Skeys + * RelationDesc index relation descriptor + * ScanDesc index scan descriptor + * ---------------- + */ +typedef struct BitmapIndexScanState +{ + ScanState ss; /* its first field is NodeTag */ + TIDBitmap *biss_result; + struct ScanKeyData *biss_ScanKeys; + int biss_NumScanKeys; + IndexRuntimeKeyInfo *biss_RuntimeKeys; + int biss_NumRuntimeKeys; + IndexArrayKeyInfo *biss_ArrayKeys; + int biss_NumArrayKeys; + bool biss_RuntimeKeysReady; + ExprContext *biss_RuntimeContext; + Relation biss_RelationDesc; + struct IndexScanDescData *biss_ScanDesc; +} BitmapIndexScanState; + +/* ---------------- + * SharedBitmapState information + * + * BM_INITIAL TIDBitmap creation is not yet started, so first worker + * to see this state will set the state to BM_INPROGRESS + * and that process will be responsible for creating + * TIDBitmap. + * BM_INPROGRESS TIDBitmap creation is in progress; workers need to + * sleep until it's finished. + * BM_FINISHED TIDBitmap creation is done, so now all workers can + * proceed to iterate over TIDBitmap. + * ---------------- + */ +typedef enum +{ + BM_INITIAL, + BM_INPROGRESS, + BM_FINISHED +} SharedBitmapState; + +/* ---------------- + * ParallelBitmapHeapState information + * tbmiterator iterator for scanning current pages + * prefetch_iterator iterator for prefetching ahead of current page + * mutex mutual exclusion for the prefetching variable + * and state + * prefetch_pages # pages prefetch iterator is ahead of current + * prefetch_target current target prefetch distance + * state current state of the TIDBitmap + * cv conditional wait variable + * phs_snapshot_data snapshot data shared to workers + * ---------------- + */ +typedef struct ParallelBitmapHeapState +{ + dsa_pointer tbmiterator; + dsa_pointer prefetch_iterator; + slock_t mutex; + int prefetch_pages; + int prefetch_target; + SharedBitmapState state; + ConditionVariable cv; + char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER]; +} ParallelBitmapHeapState; + +/* ---------------- + * BitmapHeapScanState information + * + * bitmapqualorig execution state for bitmapqualorig expressions + * tbm bitmap obtained from child index scan(s) + * tbmiterator iterator for scanning current pages + * tbmres current-page data + * can_skip_fetch can we potentially skip tuple fetches in this scan? + * return_empty_tuples number of empty tuples to return + * vmbuffer buffer for visibility-map lookups + * pvmbuffer ditto, for prefetched pages + * exact_pages total number of exact pages retrieved + * lossy_pages total number of lossy pages retrieved + * prefetch_iterator iterator for prefetching ahead of current page + * prefetch_pages # pages prefetch iterator is ahead of current + * prefetch_target current target prefetch distance + * prefetch_maximum maximum value for prefetch_target + * pscan_len size of the shared memory for parallel bitmap + * initialized is node is ready to iterate + * shared_tbmiterator shared iterator + * shared_prefetch_iterator shared iterator for prefetching + * pstate shared state for parallel bitmap scan + * ---------------- + */ +typedef struct BitmapHeapScanState +{ + ScanState ss; /* its first field is NodeTag */ + ExprState *bitmapqualorig; + TIDBitmap *tbm; + TBMIterator *tbmiterator; + TBMIterateResult *tbmres; + bool can_skip_fetch; + int return_empty_tuples; + Buffer vmbuffer; + Buffer pvmbuffer; + long exact_pages; + long lossy_pages; + TBMIterator *prefetch_iterator; + int prefetch_pages; + int prefetch_target; + int prefetch_maximum; + Size pscan_len; + bool initialized; + TBMSharedIterator *shared_tbmiterator; + TBMSharedIterator *shared_prefetch_iterator; + ParallelBitmapHeapState *pstate; +} BitmapHeapScanState; + +/* ---------------- + * TidScanState information + * + * tidexprs list of TidExpr structs (see nodeTidscan.c) + * isCurrentOf scan has a CurrentOfExpr qual + * NumTids number of tids in this scan + * TidPtr index of currently fetched tid + * TidList evaluated item pointers (array of size NumTids) + * htup currently-fetched tuple, if any + * ---------------- + */ +typedef struct TidScanState +{ + ScanState ss; /* its first field is NodeTag */ + List *tss_tidexprs; + bool tss_isCurrentOf; + int tss_NumTids; + int tss_TidPtr; + ItemPointerData *tss_TidList; + HeapTupleData tss_htup; +} TidScanState; + +/* ---------------- + * TidRangeScanState information + * + * trss_tidexprs list of TidOpExpr structs (see nodeTidrangescan.c) + * trss_mintid the lowest TID in the scan range + * trss_maxtid the highest TID in the scan range + * trss_inScan is a scan currently in progress? + * ---------------- + */ +typedef struct TidRangeScanState +{ + ScanState ss; /* its first field is NodeTag */ + List *trss_tidexprs; + ItemPointerData trss_mintid; + ItemPointerData trss_maxtid; + bool trss_inScan; +} TidRangeScanState; + +/* ---------------- + * SubqueryScanState information + * + * SubqueryScanState is used for scanning a sub-query in the range table. + * ScanTupleSlot references the current output tuple of the sub-query. + * ---------------- + */ +typedef struct SubqueryScanState +{ + ScanState ss; /* its first field is NodeTag */ + PlanState *subplan; +} SubqueryScanState; + +/* ---------------- + * FunctionScanState information + * + * Function nodes are used to scan the results of a + * function appearing in FROM (typically a function returning set). + * + * eflags node's capability flags + * ordinality is this scan WITH ORDINALITY? + * simple true if we have 1 function and no ordinality + * ordinal current ordinal column value + * nfuncs number of functions being executed + * funcstates per-function execution states (private in + * nodeFunctionscan.c) + * argcontext memory context to evaluate function arguments in + * ---------------- + */ +struct FunctionScanPerFuncState; + +typedef struct FunctionScanState +{ + ScanState ss; /* its first field is NodeTag */ + int eflags; + bool ordinality; + bool simple; + int64 ordinal; + int nfuncs; + struct FunctionScanPerFuncState *funcstates; /* array of length nfuncs */ + MemoryContext argcontext; +} FunctionScanState; + +/* ---------------- + * ValuesScanState information + * + * ValuesScan nodes are used to scan the results of a VALUES list + * + * rowcontext per-expression-list context + * exprlists array of expression lists being evaluated + * exprstatelists array of expression state lists, for SubPlans only + * array_len size of above arrays + * curr_idx current array index (0-based) + * + * Note: ss.ps.ps_ExprContext is used to evaluate any qual or projection + * expressions attached to the node. We create a second ExprContext, + * rowcontext, in which to build the executor expression state for each + * Values sublist. Resetting this context lets us get rid of expression + * state for each row, avoiding major memory leakage over a long values list. + * However, that doesn't work for sublists containing SubPlans, because a + * SubPlan has to be connected up to the outer plan tree to work properly. + * Therefore, for only those sublists containing SubPlans, we do expression + * state construction at executor start, and store those pointers in + * exprstatelists[]. NULL entries in that array correspond to simple + * subexpressions that are handled as described above. + * ---------------- + */ +typedef struct ValuesScanState +{ + ScanState ss; /* its first field is NodeTag */ + ExprContext *rowcontext; + List **exprlists; + List **exprstatelists; + int array_len; + int curr_idx; +} ValuesScanState; + +/* ---------------- + * TableFuncScanState node + * + * Used in table-expression functions like XMLTABLE. + * ---------------- + */ +typedef struct TableFuncScanState +{ + ScanState ss; /* its first field is NodeTag */ + ExprState *docexpr; /* state for document expression */ + ExprState *rowexpr; /* state for row-generating expression */ + List *colexprs; /* state for column-generating expression */ + List *coldefexprs; /* state for column default expressions */ + List *ns_names; /* same as TableFunc.ns_names */ + List *ns_uris; /* list of states of namespace URI exprs */ + Bitmapset *notnulls; /* nullability flag for each output column */ + void *opaque; /* table builder private space */ + const struct TableFuncRoutine *routine; /* table builder methods */ + FmgrInfo *in_functions; /* input function for each column */ + Oid *typioparams; /* typioparam for each column */ + int64 ordinal; /* row number to be output next */ + MemoryContext perTableCxt; /* per-table context */ + Tuplestorestate *tupstore; /* output tuple store */ +} TableFuncScanState; + +/* ---------------- + * CteScanState information + * + * CteScan nodes are used to scan a CommonTableExpr query. + * + * Multiple CteScan nodes can read out from the same CTE query. We use + * a tuplestore to hold rows that have been read from the CTE query but + * not yet consumed by all readers. + * ---------------- + */ +typedef struct CteScanState +{ + ScanState ss; /* its first field is NodeTag */ + int eflags; /* capability flags to pass to tuplestore */ + int readptr; /* index of my tuplestore read pointer */ + PlanState *cteplanstate; /* PlanState for the CTE query itself */ + /* Link to the "leader" CteScanState (possibly this same node) */ + struct CteScanState *leader; + /* The remaining fields are only valid in the "leader" CteScanState */ + Tuplestorestate *cte_table; /* rows already read from the CTE query */ + bool eof_cte; /* reached end of CTE query? */ +} CteScanState; + +/* ---------------- + * NamedTuplestoreScanState information + * + * NamedTuplestoreScan nodes are used to scan a Tuplestore created and + * named prior to execution of the query. An example is a transition + * table for an AFTER trigger. + * + * Multiple NamedTuplestoreScan nodes can read out from the same Tuplestore. + * ---------------- + */ +typedef struct NamedTuplestoreScanState +{ + ScanState ss; /* its first field is NodeTag */ + int readptr; /* index of my tuplestore read pointer */ + TupleDesc tupdesc; /* format of the tuples in the tuplestore */ + Tuplestorestate *relation; /* the rows */ +} NamedTuplestoreScanState; + +/* ---------------- + * WorkTableScanState information + * + * WorkTableScan nodes are used to scan the work table created by + * a RecursiveUnion node. We locate the RecursiveUnion node + * during executor startup. + * ---------------- + */ +typedef struct WorkTableScanState +{ + ScanState ss; /* its first field is NodeTag */ + RecursiveUnionState *rustate; +} WorkTableScanState; + +/* ---------------- + * ForeignScanState information + * + * ForeignScan nodes are used to scan foreign-data tables. + * ---------------- + */ +typedef struct ForeignScanState +{ + ScanState ss; /* its first field is NodeTag */ + ExprState *fdw_recheck_quals; /* original quals not in ss.ps.qual */ + Size pscan_len; /* size of parallel coordination information */ + ResultRelInfo *resultRelInfo; /* result rel info, if UPDATE or DELETE */ + /* use struct pointer to avoid including fdwapi.h here */ + struct FdwRoutine *fdwroutine; + void *fdw_state; /* foreign-data wrapper can keep state here */ +} ForeignScanState; + +/* ---------------- + * CustomScanState information + * + * CustomScan nodes are used to execute custom code within executor. + * + * Core code must avoid assuming that the CustomScanState is only as large as + * the structure declared here; providers are allowed to make it the first + * element in a larger structure, and typically would need to do so. The + * struct is actually allocated by the CreateCustomScanState method associated + * with the plan node. Any additional fields can be initialized there, or in + * the BeginCustomScan method. + * ---------------- + */ +struct CustomExecMethods; + +typedef struct CustomScanState +{ + ScanState ss; + uint32 flags; /* mask of CUSTOMPATH_* flags, see + * nodes/extensible.h */ + List *custom_ps; /* list of child PlanState nodes, if any */ + Size pscan_len; /* size of parallel coordination information */ + const struct CustomExecMethods *methods; +} CustomScanState; + +/* ---------------------------------------------------------------- + * Join State Information + * ---------------------------------------------------------------- + */ + +/* ---------------- + * JoinState information + * + * Superclass for state nodes of join plans. + * ---------------- + */ +typedef struct JoinState +{ + PlanState ps; + JoinType jointype; + bool single_match; /* True if we should skip to next outer tuple + * after finding one inner match */ + ExprState *joinqual; /* JOIN quals (in addition to ps.qual) */ +} JoinState; + +/* ---------------- + * NestLoopState information + * + * NeedNewOuter true if need new outer tuple on next call + * MatchedOuter true if found a join match for current outer tuple + * NullInnerTupleSlot prepared null tuple for left outer joins + * ---------------- + */ +typedef struct NestLoopState +{ + JoinState js; /* its first field is NodeTag */ + bool nl_NeedNewOuter; + bool nl_MatchedOuter; + TupleTableSlot *nl_NullInnerTupleSlot; +} NestLoopState; + +/* ---------------- + * MergeJoinState information + * + * NumClauses number of mergejoinable join clauses + * Clauses info for each mergejoinable clause + * JoinState current state of ExecMergeJoin state machine + * SkipMarkRestore true if we may skip Mark and Restore operations + * ExtraMarks true to issue extra Mark operations on inner scan + * ConstFalseJoin true if we have a constant-false joinqual + * FillOuter true if should emit unjoined outer tuples anyway + * FillInner true if should emit unjoined inner tuples anyway + * MatchedOuter true if found a join match for current outer tuple + * MatchedInner true if found a join match for current inner tuple + * OuterTupleSlot slot in tuple table for cur outer tuple + * InnerTupleSlot slot in tuple table for cur inner tuple + * MarkedTupleSlot slot in tuple table for marked tuple + * NullOuterTupleSlot prepared null tuple for right outer joins + * NullInnerTupleSlot prepared null tuple for left outer joins + * OuterEContext workspace for computing outer tuple's join values + * InnerEContext workspace for computing inner tuple's join values + * ---------------- + */ +/* private in nodeMergejoin.c: */ +typedef struct MergeJoinClauseData *MergeJoinClause; + +typedef struct MergeJoinState +{ + JoinState js; /* its first field is NodeTag */ + int mj_NumClauses; + MergeJoinClause mj_Clauses; /* array of length mj_NumClauses */ + int mj_JoinState; + bool mj_SkipMarkRestore; + bool mj_ExtraMarks; + bool mj_ConstFalseJoin; + bool mj_FillOuter; + bool mj_FillInner; + bool mj_MatchedOuter; + bool mj_MatchedInner; + TupleTableSlot *mj_OuterTupleSlot; + TupleTableSlot *mj_InnerTupleSlot; + TupleTableSlot *mj_MarkedTupleSlot; + TupleTableSlot *mj_NullOuterTupleSlot; + TupleTableSlot *mj_NullInnerTupleSlot; + ExprContext *mj_OuterEContext; + ExprContext *mj_InnerEContext; +} MergeJoinState; + +/* ---------------- + * HashJoinState information + * + * hashclauses original form of the hashjoin condition + * hj_OuterHashKeys the outer hash keys in the hashjoin condition + * hj_HashOperators the join operators in the hashjoin condition + * hj_HashTable hash table for the hashjoin + * (NULL if table not built yet) + * hj_CurHashValue hash value for current outer tuple + * hj_CurBucketNo regular bucket# for current outer tuple + * hj_CurSkewBucketNo skew bucket# for current outer tuple + * hj_CurTuple last inner tuple matched to current outer + * tuple, or NULL if starting search + * (hj_CurXXX variables are undefined if + * OuterTupleSlot is empty!) + * hj_OuterTupleSlot tuple slot for outer tuples + * hj_HashTupleSlot tuple slot for inner (hashed) tuples + * hj_NullOuterTupleSlot prepared null tuple for right/full outer joins + * hj_NullInnerTupleSlot prepared null tuple for left/full outer joins + * hj_FirstOuterTupleSlot first tuple retrieved from outer plan + * hj_JoinState current state of ExecHashJoin state machine + * hj_MatchedOuter true if found a join match for current outer + * hj_OuterNotEmpty true if outer relation known not empty + * ---------------- + */ + +/* these structs are defined in executor/hashjoin.h: */ +typedef struct HashJoinTupleData *HashJoinTuple; +typedef struct HashJoinTableData *HashJoinTable; + +typedef struct HashJoinState +{ + JoinState js; /* its first field is NodeTag */ + ExprState *hashclauses; + List *hj_OuterHashKeys; /* list of ExprState nodes */ + List *hj_HashOperators; /* list of operator OIDs */ + List *hj_Collations; + HashJoinTable hj_HashTable; + uint32 hj_CurHashValue; + int hj_CurBucketNo; + int hj_CurSkewBucketNo; + HashJoinTuple hj_CurTuple; + TupleTableSlot *hj_OuterTupleSlot; + TupleTableSlot *hj_HashTupleSlot; + TupleTableSlot *hj_NullOuterTupleSlot; + TupleTableSlot *hj_NullInnerTupleSlot; + TupleTableSlot *hj_FirstOuterTupleSlot; + int hj_JoinState; + bool hj_MatchedOuter; + bool hj_OuterNotEmpty; +} HashJoinState; + + +/* ---------------------------------------------------------------- + * Materialization State Information + * ---------------------------------------------------------------- + */ + +/* ---------------- + * MaterialState information + * + * materialize nodes are used to materialize the results + * of a subplan into a temporary file. + * + * ss.ss_ScanTupleSlot refers to output of underlying plan. + * ---------------- + */ +typedef struct MaterialState +{ + ScanState ss; /* its first field is NodeTag */ + int eflags; /* capability flags to pass to tuplestore */ + bool eof_underlying; /* reached end of underlying plan? */ + Tuplestorestate *tuplestorestate; +} MaterialState; + +struct MemoizeEntry; +struct MemoizeTuple; +struct MemoizeKey; + +typedef struct MemoizeInstrumentation +{ + uint64 cache_hits; /* number of rescans where we've found the + * scan parameter values to be cached */ + uint64 cache_misses; /* number of rescans where we've not found the + * scan parameter values to be cached. */ + uint64 cache_evictions; /* number of cache entries removed due to + * the need to free memory */ + uint64 cache_overflows; /* number of times we've had to bypass the + * cache when filling it due to not being + * able to free enough space to store the + * current scan's tuples. */ + uint64 mem_peak; /* peak memory usage in bytes */ +} MemoizeInstrumentation; + +/* ---------------- + * Shared memory container for per-worker memoize information + * ---------------- + */ +typedef struct SharedMemoizeInfo +{ + int num_workers; + MemoizeInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedMemoizeInfo; + +/* ---------------- + * MemoizeState information + * + * memoize nodes are used to cache recent and commonly seen results from + * a parameterized scan. + * ---------------- + */ +typedef struct MemoizeState +{ + ScanState ss; /* its first field is NodeTag */ + int mstatus; /* value of ExecMemoize state machine */ + int nkeys; /* number of cache keys */ + struct memoize_hash *hashtable; /* hash table for cache entries */ + TupleDesc hashkeydesc; /* tuple descriptor for cache keys */ + TupleTableSlot *tableslot; /* min tuple slot for existing cache entries */ + TupleTableSlot *probeslot; /* virtual slot used for hash lookups */ + ExprState *cache_eq_expr; /* Compare exec params to hash key */ + ExprState **param_exprs; /* exprs containing the parameters to this + * node */ + FmgrInfo *hashfunctions; /* lookup data for hash funcs nkeys in size */ + Oid *collations; /* collation for comparisons nkeys in size */ + uint64 mem_used; /* bytes of memory used by cache */ + uint64 mem_limit; /* memory limit in bytes for the cache */ + MemoryContext tableContext; /* memory context to store cache data */ + dlist_head lru_list; /* least recently used entry list */ + struct MemoizeTuple *last_tuple; /* Used to point to the last tuple + * returned during a cache hit and the + * tuple we last stored when + * populating the cache. */ + struct MemoizeEntry *entry; /* the entry that 'last_tuple' belongs to or + * NULL if 'last_tuple' is NULL. */ + bool singlerow; /* true if the cache entry is to be marked as + * complete after caching the first tuple. */ + bool binary_mode; /* true when cache key should be compared bit + * by bit, false when using hash equality ops */ + MemoizeInstrumentation stats; /* execution statistics */ + SharedMemoizeInfo *shared_info; /* statistics for parallel workers */ + Bitmapset *keyparamids; /* Param->paramids of expressions belonging to + * param_exprs */ +} MemoizeState; + +/* ---------------- + * When performing sorting by multiple keys, it's possible that the input + * dataset is already sorted on a prefix of those keys. We call these + * "presorted keys". + * PresortedKeyData represents information about one such key. + * ---------------- + */ +typedef struct PresortedKeyData +{ + FmgrInfo flinfo; /* comparison function info */ + FunctionCallInfo fcinfo; /* comparison function call info */ + OffsetNumber attno; /* attribute number in tuple */ +} PresortedKeyData; + +/* ---------------- + * Shared memory container for per-worker sort information + * ---------------- + */ +typedef struct SharedSortInfo +{ + int num_workers; + TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedSortInfo; + +/* ---------------- + * SortState information + * ---------------- + */ +typedef struct SortState +{ + ScanState ss; /* its first field is NodeTag */ + bool randomAccess; /* need random access to sort output? */ + bool bounded; /* is the result set bounded? */ + int64 bound; /* if bounded, how many tuples are needed */ + bool sort_Done; /* sort completed yet? */ + bool bounded_Done; /* value of bounded we did the sort with */ + int64 bound_Done; /* value of bound we did the sort with */ + void *tuplesortstate; /* private state of tuplesort.c */ + bool am_worker; /* are we a worker? */ + SharedSortInfo *shared_info; /* one entry per worker */ +} SortState; + +/* ---------------- + * Instrumentation information for IncrementalSort + * ---------------- + */ +typedef struct IncrementalSortGroupInfo +{ + int64 groupCount; + int64 maxDiskSpaceUsed; + int64 totalDiskSpaceUsed; + int64 maxMemorySpaceUsed; + int64 totalMemorySpaceUsed; + bits32 sortMethods; /* bitmask of TuplesortMethod */ +} IncrementalSortGroupInfo; + +typedef struct IncrementalSortInfo +{ + IncrementalSortGroupInfo fullsortGroupInfo; + IncrementalSortGroupInfo prefixsortGroupInfo; +} IncrementalSortInfo; + +/* ---------------- + * Shared memory container for per-worker incremental sort information + * ---------------- + */ +typedef struct SharedIncrementalSortInfo +{ + int num_workers; + IncrementalSortInfo sinfo[FLEXIBLE_ARRAY_MEMBER]; +} SharedIncrementalSortInfo; + +/* ---------------- + * IncrementalSortState information + * ---------------- + */ +typedef enum +{ + INCSORT_LOADFULLSORT, + INCSORT_LOADPREFIXSORT, + INCSORT_READFULLSORT, + INCSORT_READPREFIXSORT, +} IncrementalSortExecutionStatus; + +typedef struct IncrementalSortState +{ + ScanState ss; /* its first field is NodeTag */ + bool bounded; /* is the result set bounded? */ + int64 bound; /* if bounded, how many tuples are needed */ + bool outerNodeDone; /* finished fetching tuples from outer node */ + int64 bound_Done; /* value of bound we did the sort with */ + IncrementalSortExecutionStatus execution_status; + int64 n_fullsort_remaining; + Tuplesortstate *fullsort_state; /* private state of tuplesort.c */ + Tuplesortstate *prefixsort_state; /* private state of tuplesort.c */ + /* the keys by which the input path is already sorted */ + PresortedKeyData *presorted_keys; + + IncrementalSortInfo incsort_info; + + /* slot for pivot tuple defining values of presorted keys within group */ + TupleTableSlot *group_pivot; + TupleTableSlot *transfer_tuple; + bool am_worker; /* are we a worker? */ + SharedIncrementalSortInfo *shared_info; /* one entry per worker */ +} IncrementalSortState; + +/* --------------------- + * GroupState information + * --------------------- + */ +typedef struct GroupState +{ + ScanState ss; /* its first field is NodeTag */ + ExprState *eqfunction; /* equality function */ + bool grp_done; /* indicates completion of Group scan */ +} GroupState; + +/* --------------------- + * per-worker aggregate information + * --------------------- + */ +typedef struct AggregateInstrumentation +{ + Size hash_mem_peak; /* peak hash table memory usage */ + uint64 hash_disk_used; /* kB of disk space used */ + int hash_batches_used; /* batches used during entire execution */ +} AggregateInstrumentation; + +/* ---------------- + * Shared memory container for per-worker aggregate information + * ---------------- + */ +typedef struct SharedAggInfo +{ + int num_workers; + AggregateInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedAggInfo; + +/* --------------------- + * AggState information + * + * ss.ss_ScanTupleSlot refers to output of underlying plan. + * + * Note: ss.ps.ps_ExprContext contains ecxt_aggvalues and + * ecxt_aggnulls arrays, which hold the computed agg values for the current + * input group during evaluation of an Agg node's output tuple(s). We + * create a second ExprContext, tmpcontext, in which to evaluate input + * expressions and run the aggregate transition functions. + * --------------------- + */ +/* these structs are private in nodeAgg.c: */ +typedef struct AggStatePerAggData *AggStatePerAgg; +typedef struct AggStatePerTransData *AggStatePerTrans; +typedef struct AggStatePerGroupData *AggStatePerGroup; +typedef struct AggStatePerPhaseData *AggStatePerPhase; +typedef struct AggStatePerHashData *AggStatePerHash; + +typedef struct AggState +{ + ScanState ss; /* its first field is NodeTag */ + List *aggs; /* all Aggref nodes in targetlist & quals */ + int numaggs; /* length of list (could be zero!) */ + int numtrans; /* number of pertrans items */ + AggStrategy aggstrategy; /* strategy mode */ + AggSplit aggsplit; /* agg-splitting mode, see nodes.h */ + AggStatePerPhase phase; /* pointer to current phase data */ + int numphases; /* number of phases (including phase 0) */ + int current_phase; /* current phase number */ + AggStatePerAgg peragg; /* per-Aggref information */ + AggStatePerTrans pertrans; /* per-Trans state information */ + ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */ + ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */ + ExprContext *tmpcontext; /* econtext for input expressions */ +#define FIELDNO_AGGSTATE_CURAGGCONTEXT 14 + ExprContext *curaggcontext; /* currently active aggcontext */ + AggStatePerAgg curperagg; /* currently active aggregate, if any */ +#define FIELDNO_AGGSTATE_CURPERTRANS 16 + AggStatePerTrans curpertrans; /* currently active trans state, if any */ + bool input_done; /* indicates end of input */ + bool agg_done; /* indicates completion of Agg scan */ + int projected_set; /* The last projected grouping set */ +#define FIELDNO_AGGSTATE_CURRENT_SET 20 + int current_set; /* The current grouping set being evaluated */ + Bitmapset *grouped_cols; /* grouped cols in current projection */ + List *all_grouped_cols; /* list of all grouped cols in DESC order */ + Bitmapset *colnos_needed; /* all columns needed from the outer plan */ + int max_colno_needed; /* highest colno needed from outer plan */ + bool all_cols_needed; /* are all cols from outer plan needed? */ + /* These fields are for grouping set phase data */ + int maxsets; /* The max number of sets in any phase */ + AggStatePerPhase phases; /* array of all phases */ + Tuplesortstate *sort_in; /* sorted input to phases > 1 */ + Tuplesortstate *sort_out; /* input is copied here for next phase */ + TupleTableSlot *sort_slot; /* slot for sort results */ + /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ + AggStatePerGroup *pergroups; /* grouping set indexed array of per-group + * pointers */ + HeapTuple grp_firstTuple; /* copy of first tuple of current group */ + /* these fields are used in AGG_HASHED and AGG_MIXED modes: */ + bool table_filled; /* hash table filled yet? */ + int num_hashes; + MemoryContext hash_metacxt; /* memory for hash table itself */ + struct HashTapeInfo *hash_tapeinfo; /* metadata for spill tapes */ + struct HashAggSpill *hash_spills; /* HashAggSpill for each grouping set, + * exists only during first pass */ + TupleTableSlot *hash_spill_rslot; /* for reading spill files */ + TupleTableSlot *hash_spill_wslot; /* for writing spill files */ + List *hash_batches; /* hash batches remaining to be processed */ + bool hash_ever_spilled; /* ever spilled during this execution? */ + bool hash_spill_mode; /* we hit a limit during the current batch + * and we must not create new groups */ + Size hash_mem_limit; /* limit before spilling hash table */ + uint64 hash_ngroups_limit; /* limit before spilling hash table */ + int hash_planned_partitions; /* number of partitions planned + * for first pass */ + double hashentrysize; /* estimate revised during execution */ + Size hash_mem_peak; /* peak hash table memory usage */ + uint64 hash_ngroups_current; /* number of groups currently in + * memory in all hash tables */ + uint64 hash_disk_used; /* kB of disk space used */ + int hash_batches_used; /* batches used during entire execution */ + + AggStatePerHash perhash; /* array of per-hashtable data */ + AggStatePerGroup *hash_pergroup; /* grouping set indexed array of + * per-group pointers */ + + /* support for evaluation of agg input expressions: */ +#define FIELDNO_AGGSTATE_ALL_PERGROUPS 53 + AggStatePerGroup *all_pergroups; /* array of first ->pergroups, than + * ->hash_pergroup */ + ProjectionInfo *combinedproj; /* projection machinery */ + SharedAggInfo *shared_info; /* one entry per worker */ +} AggState; + +/* ---------------- + * WindowAggState information + * ---------------- + */ +/* these structs are private in nodeWindowAgg.c: */ +typedef struct WindowStatePerFuncData *WindowStatePerFunc; +typedef struct WindowStatePerAggData *WindowStatePerAgg; + +typedef struct WindowAggState +{ + ScanState ss; /* its first field is NodeTag */ + + /* these fields are filled in by ExecInitExpr: */ + List *funcs; /* all WindowFunc nodes in targetlist */ + int numfuncs; /* total number of window functions */ + int numaggs; /* number that are plain aggregates */ + + WindowStatePerFunc perfunc; /* per-window-function information */ + WindowStatePerAgg peragg; /* per-plain-aggregate information */ + ExprState *partEqfunction; /* equality funcs for partition columns */ + ExprState *ordEqfunction; /* equality funcs for ordering columns */ + Tuplestorestate *buffer; /* stores rows of current partition */ + int current_ptr; /* read pointer # for current row */ + int framehead_ptr; /* read pointer # for frame head, if used */ + int frametail_ptr; /* read pointer # for frame tail, if used */ + int grouptail_ptr; /* read pointer # for group tail, if used */ + int64 spooled_rows; /* total # of rows in buffer */ + int64 currentpos; /* position of current row in partition */ + int64 frameheadpos; /* current frame head position */ + int64 frametailpos; /* current frame tail position (frame end+1) */ + /* use struct pointer to avoid including windowapi.h here */ + struct WindowObjectData *agg_winobj; /* winobj for aggregate fetches */ + int64 aggregatedbase; /* start row for current aggregates */ + int64 aggregatedupto; /* rows before this one are aggregated */ + + int frameOptions; /* frame_clause options, see WindowDef */ + ExprState *startOffset; /* expression for starting bound offset */ + ExprState *endOffset; /* expression for ending bound offset */ + Datum startOffsetValue; /* result of startOffset evaluation */ + Datum endOffsetValue; /* result of endOffset evaluation */ + + /* these fields are used with RANGE offset PRECEDING/FOLLOWING: */ + FmgrInfo startInRangeFunc; /* in_range function for startOffset */ + FmgrInfo endInRangeFunc; /* in_range function for endOffset */ + Oid inRangeColl; /* collation for in_range tests */ + bool inRangeAsc; /* use ASC sort order for in_range tests? */ + bool inRangeNullsFirst; /* nulls sort first for in_range tests? */ + + /* these fields are used in GROUPS mode: */ + int64 currentgroup; /* peer group # of current row in partition */ + int64 frameheadgroup; /* peer group # of frame head row */ + int64 frametailgroup; /* peer group # of frame tail row */ + int64 groupheadpos; /* current row's peer group head position */ + int64 grouptailpos; /* " " " " tail position (group end+1) */ + + MemoryContext partcontext; /* context for partition-lifespan data */ + MemoryContext aggcontext; /* shared context for aggregate working data */ + MemoryContext curaggcontext; /* current aggregate's working data */ + ExprContext *tmpcontext; /* short-term evaluation context */ + + bool all_first; /* true if the scan is starting */ + bool all_done; /* true if the scan is finished */ + bool partition_spooled; /* true if all tuples in current partition + * have been spooled into tuplestore */ + bool more_partitions; /* true if there's more partitions after + * this one */ + bool framehead_valid; /* true if frameheadpos is known up to + * date for current row */ + bool frametail_valid; /* true if frametailpos is known up to + * date for current row */ + bool grouptail_valid; /* true if grouptailpos is known up to + * date for current row */ + + TupleTableSlot *first_part_slot; /* first tuple of current or next + * partition */ + TupleTableSlot *framehead_slot; /* first tuple of current frame */ + TupleTableSlot *frametail_slot; /* first tuple after current frame */ + + /* temporary slots for tuples fetched back from tuplestore */ + TupleTableSlot *agg_row_slot; + TupleTableSlot *temp_slot_1; + TupleTableSlot *temp_slot_2; +} WindowAggState; + +/* ---------------- + * UniqueState information + * + * Unique nodes are used "on top of" sort nodes to discard + * duplicate tuples returned from the sort phase. Basically + * all it does is compare the current tuple from the subplan + * with the previously fetched tuple (stored in its result slot). + * If the two are identical in all interesting fields, then + * we just fetch another tuple from the sort and try again. + * ---------------- + */ +typedef struct UniqueState +{ + PlanState ps; /* its first field is NodeTag */ + ExprState *eqfunction; /* tuple equality qual */ +} UniqueState; + +/* ---------------- + * GatherState information + * + * Gather nodes launch 1 or more parallel workers, run a subplan + * in those workers, and collect the results. + * ---------------- + */ +typedef struct GatherState +{ + PlanState ps; /* its first field is NodeTag */ + bool initialized; /* workers launched? */ + bool need_to_scan_locally; /* need to read from local plan? */ + int64 tuples_needed; /* tuple bound, see ExecSetTupleBound */ + /* these fields are set up once: */ + TupleTableSlot *funnel_slot; + struct ParallelExecutorInfo *pei; + /* all remaining fields are reinitialized during a rescan: */ + int nworkers_launched; /* original number of workers */ + int nreaders; /* number of still-active workers */ + int nextreader; /* next one to try to read from */ + struct TupleQueueReader **reader; /* array with nreaders active entries */ +} GatherState; + +/* ---------------- + * GatherMergeState information + * + * Gather merge nodes launch 1 or more parallel workers, run a + * subplan which produces sorted output in each worker, and then + * merge the results into a single sorted stream. + * ---------------- + */ +struct GMReaderTupleBuffer; /* private in nodeGatherMerge.c */ + +typedef struct GatherMergeState +{ + PlanState ps; /* its first field is NodeTag */ + bool initialized; /* workers launched? */ + bool gm_initialized; /* gather_merge_init() done? */ + bool need_to_scan_locally; /* need to read from local plan? */ + int64 tuples_needed; /* tuple bound, see ExecSetTupleBound */ + /* these fields are set up once: */ + TupleDesc tupDesc; /* descriptor for subplan result tuples */ + int gm_nkeys; /* number of sort columns */ + SortSupport gm_sortkeys; /* array of length gm_nkeys */ + struct ParallelExecutorInfo *pei; + /* all remaining fields are reinitialized during a rescan */ + /* (but the arrays are not reallocated, just cleared) */ + int nworkers_launched; /* original number of workers */ + int nreaders; /* number of active workers */ + TupleTableSlot **gm_slots; /* array with nreaders+1 entries */ + struct TupleQueueReader **reader; /* array with nreaders active entries */ + struct GMReaderTupleBuffer *gm_tuple_buffers; /* nreaders tuple buffers */ + struct binaryheap *gm_heap; /* binary heap of slot indices */ +} GatherMergeState; + +/* ---------------- + * Values displayed by EXPLAIN ANALYZE + * ---------------- + */ +typedef struct HashInstrumentation +{ + int nbuckets; /* number of buckets at end of execution */ + int nbuckets_original; /* planned number of buckets */ + int nbatch; /* number of batches at end of execution */ + int nbatch_original; /* planned number of batches */ + Size space_peak; /* peak memory usage in bytes */ +} HashInstrumentation; + +/* ---------------- + * Shared memory container for per-worker hash information + * ---------------- + */ +typedef struct SharedHashInfo +{ + int num_workers; + HashInstrumentation hinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedHashInfo; + +/* ---------------- + * HashState information + * ---------------- + */ +typedef struct HashState +{ + PlanState ps; /* its first field is NodeTag */ + HashJoinTable hashtable; /* hash table for the hashjoin */ + List *hashkeys; /* list of ExprState nodes */ + + /* + * In a parallelized hash join, the leader retains a pointer to the + * shared-memory stats area in its shared_info field, and then copies the + * shared-memory info back to local storage before DSM shutdown. The + * shared_info field remains NULL in workers, or in non-parallel joins. + */ + SharedHashInfo *shared_info; + + /* + * If we are collecting hash stats, this points to an initially-zeroed + * collection area, which could be either local storage or in shared + * memory; either way it's for just one process. + */ + HashInstrumentation *hinstrument; + + /* Parallel hash state. */ + struct ParallelHashJoinState *parallel_state; +} HashState; + +/* ---------------- + * SetOpState information + * + * Even in "sorted" mode, SetOp nodes are more complex than a simple + * Unique, since we have to count how many duplicates to return. But + * we also support hashing, so this is really more like a cut-down + * form of Agg. + * ---------------- + */ +/* this struct is private in nodeSetOp.c: */ +typedef struct SetOpStatePerGroupData *SetOpStatePerGroup; + +typedef struct SetOpState +{ + PlanState ps; /* its first field is NodeTag */ + ExprState *eqfunction; /* equality comparator */ + Oid *eqfuncoids; /* per-grouping-field equality fns */ + FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ + bool setop_done; /* indicates completion of output scan */ + long numOutput; /* number of dups left to output */ + /* these fields are used in SETOP_SORTED mode: */ + SetOpStatePerGroup pergroup; /* per-group working state */ + HeapTuple grp_firstTuple; /* copy of first tuple of current group */ + /* these fields are used in SETOP_HASHED mode: */ + TupleHashTable hashtable; /* hash table with one entry per group */ + MemoryContext tableContext; /* memory context containing hash table */ + bool table_filled; /* hash table filled yet? */ + TupleHashIterator hashiter; /* for iterating through hash table */ +} SetOpState; + +/* ---------------- + * LockRowsState information + * + * LockRows nodes are used to enforce FOR [KEY] UPDATE/SHARE locking. + * ---------------- + */ +typedef struct LockRowsState +{ + PlanState ps; /* its first field is NodeTag */ + List *lr_arowMarks; /* List of ExecAuxRowMarks */ + EPQState lr_epqstate; /* for evaluating EvalPlanQual rechecks */ +} LockRowsState; + +/* ---------------- + * LimitState information + * + * Limit nodes are used to enforce LIMIT/OFFSET clauses. + * They just select the desired subrange of their subplan's output. + * + * offset is the number of initial tuples to skip (0 does nothing). + * count is the number of tuples to return after skipping the offset tuples. + * If no limit count was specified, count is undefined and noCount is true. + * When lstate == LIMIT_INITIAL, offset/count/noCount haven't been set yet. + * ---------------- + */ +typedef enum +{ + LIMIT_INITIAL, /* initial state for LIMIT node */ + LIMIT_RESCAN, /* rescan after recomputing parameters */ + LIMIT_EMPTY, /* there are no returnable rows */ + LIMIT_INWINDOW, /* have returned a row in the window */ + LIMIT_WINDOWEND_TIES, /* have returned a tied row */ + LIMIT_SUBPLANEOF, /* at EOF of subplan (within window) */ + LIMIT_WINDOWEND, /* stepped off end of window */ + LIMIT_WINDOWSTART /* stepped off beginning of window */ +} LimitStateCond; + +typedef struct LimitState +{ + PlanState ps; /* its first field is NodeTag */ + ExprState *limitOffset; /* OFFSET parameter, or NULL if none */ + ExprState *limitCount; /* COUNT parameter, or NULL if none */ + LimitOption limitOption; /* limit specification type */ + int64 offset; /* current OFFSET value */ + int64 count; /* current COUNT, if any */ + bool noCount; /* if true, ignore count */ + LimitStateCond lstate; /* state machine status, as above */ + int64 position; /* 1-based index of last tuple returned */ + TupleTableSlot *subSlot; /* tuple last obtained from subplan */ + ExprState *eqfunction; /* tuple equality qual in case of WITH TIES + * option */ + TupleTableSlot *last_slot; /* slot for evaluation of ties */ +} LimitState; + +#endif /* EXECNODES_H */ |