diff options
Diffstat (limited to 'src/backend/executor/nodeTidscan.c')
-rw-r--r-- | src/backend/executor/nodeTidscan.c | 558 |
1 files changed, 558 insertions, 0 deletions
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c new file mode 100644 index 0000000..4116d1f --- /dev/null +++ b/src/backend/executor/nodeTidscan.c @@ -0,0 +1,558 @@ +/*------------------------------------------------------------------------- + * + * nodeTidscan.c + * Routines to support direct tid scans of relations + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeTidscan.c + * + *------------------------------------------------------------------------- + */ +/* + * INTERFACE ROUTINES + * + * ExecTidScan scans a relation using tids + * ExecInitTidScan creates and initializes state info. + * ExecReScanTidScan rescans the tid relation. + * ExecEndTidScan releases all storage. + */ +#include "postgres.h" + +#include "access/sysattr.h" +#include "access/tableam.h" +#include "catalog/pg_type.h" +#include "executor/execdebug.h" +#include "executor/nodeTidscan.h" +#include "lib/qunique.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "storage/bufmgr.h" +#include "utils/array.h" +#include "utils/rel.h" + + +#define IsCTIDVar(node) \ + ((node) != NULL && \ + IsA((node), Var) && \ + ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \ + ((Var *) (node))->varlevelsup == 0) + +/* one element in tss_tidexprs */ +typedef struct TidExpr +{ + ExprState *exprstate; /* ExprState for a TID-yielding subexpr */ + bool isarray; /* if true, it yields tid[] not just tid */ + CurrentOfExpr *cexpr; /* alternatively, we can have CURRENT OF */ +} TidExpr; + +static void TidExprListCreate(TidScanState *tidstate); +static void TidListEval(TidScanState *tidstate); +static int itemptr_comparator(const void *a, const void *b); +static TupleTableSlot *TidNext(TidScanState *node); + + +/* + * Extract the qual subexpressions that yield TIDs to search for, + * and compile them into ExprStates if they're ordinary expressions. + * + * CURRENT OF is a special case that we can't compile usefully; + * just drop it into the TidExpr list as-is. + */ +static void +TidExprListCreate(TidScanState *tidstate) +{ + TidScan *node = (TidScan *) tidstate->ss.ps.plan; + ListCell *l; + + tidstate->tss_tidexprs = NIL; + tidstate->tss_isCurrentOf = false; + + foreach(l, node->tidquals) + { + Expr *expr = (Expr *) lfirst(l); + TidExpr *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr)); + + if (is_opclause(expr)) + { + Node *arg1; + Node *arg2; + + arg1 = get_leftop(expr); + arg2 = get_rightop(expr); + if (IsCTIDVar(arg1)) + tidexpr->exprstate = ExecInitExpr((Expr *) arg2, + &tidstate->ss.ps); + else if (IsCTIDVar(arg2)) + tidexpr->exprstate = ExecInitExpr((Expr *) arg1, + &tidstate->ss.ps); + else + elog(ERROR, "could not identify CTID variable"); + tidexpr->isarray = false; + } + else if (expr && IsA(expr, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saex = (ScalarArrayOpExpr *) expr; + + Assert(IsCTIDVar(linitial(saex->args))); + tidexpr->exprstate = ExecInitExpr(lsecond(saex->args), + &tidstate->ss.ps); + tidexpr->isarray = true; + } + else if (expr && IsA(expr, CurrentOfExpr)) + { + CurrentOfExpr *cexpr = (CurrentOfExpr *) expr; + + tidexpr->cexpr = cexpr; + tidstate->tss_isCurrentOf = true; + } + else + elog(ERROR, "could not identify CTID expression"); + + tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr); + } + + /* CurrentOfExpr could never appear OR'd with something else */ + Assert(list_length(tidstate->tss_tidexprs) == 1 || + !tidstate->tss_isCurrentOf); +} + +/* + * Compute the list of TIDs to be visited, by evaluating the expressions + * for them. + * + * (The result is actually an array, not a list.) + */ +static void +TidListEval(TidScanState *tidstate) +{ + ExprContext *econtext = tidstate->ss.ps.ps_ExprContext; + TableScanDesc scan; + ItemPointerData *tidList; + int numAllocTids; + int numTids; + ListCell *l; + + /* + * Start scan on-demand - initializing a scan isn't free (e.g. heap stats + * the size of the table), so it makes sense to delay that until needed - + * the node might never get executed. + */ + if (tidstate->ss.ss_currentScanDesc == NULL) + tidstate->ss.ss_currentScanDesc = + table_beginscan_tid(tidstate->ss.ss_currentRelation, + tidstate->ss.ps.state->es_snapshot); + scan = tidstate->ss.ss_currentScanDesc; + + /* + * We initialize the array with enough slots for the case that all quals + * are simple OpExprs or CurrentOfExprs. If there are any + * ScalarArrayOpExprs, we may have to enlarge the array. + */ + numAllocTids = list_length(tidstate->tss_tidexprs); + tidList = (ItemPointerData *) + palloc(numAllocTids * sizeof(ItemPointerData)); + numTids = 0; + + foreach(l, tidstate->tss_tidexprs) + { + TidExpr *tidexpr = (TidExpr *) lfirst(l); + ItemPointer itemptr; + bool isNull; + + if (tidexpr->exprstate && !tidexpr->isarray) + { + itemptr = (ItemPointer) + DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate, + econtext, + &isNull)); + if (isNull) + continue; + + /* + * We silently discard any TIDs that the AM considers invalid + * (E.g. for heap, they could be out of range at the time of scan + * start. Since we hold at least AccessShareLock on the table, it + * won't be possible for someone to truncate away the blocks we + * intend to visit.). + */ + if (!table_tuple_tid_valid(scan, itemptr)) + continue; + + if (numTids >= numAllocTids) + { + numAllocTids *= 2; + tidList = (ItemPointerData *) + repalloc(tidList, + numAllocTids * sizeof(ItemPointerData)); + } + tidList[numTids++] = *itemptr; + } + else if (tidexpr->exprstate && tidexpr->isarray) + { + Datum arraydatum; + ArrayType *itemarray; + Datum *ipdatums; + bool *ipnulls; + int ndatums; + int i; + + arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate, + econtext, + &isNull); + if (isNull) + continue; + itemarray = DatumGetArrayTypeP(arraydatum); + deconstruct_array(itemarray, + TIDOID, sizeof(ItemPointerData), false, TYPALIGN_SHORT, + &ipdatums, &ipnulls, &ndatums); + if (numTids + ndatums > numAllocTids) + { + numAllocTids = numTids + ndatums; + tidList = (ItemPointerData *) + repalloc(tidList, + numAllocTids * sizeof(ItemPointerData)); + } + for (i = 0; i < ndatums; i++) + { + if (ipnulls[i]) + continue; + + itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]); + + if (!table_tuple_tid_valid(scan, itemptr)) + continue; + + tidList[numTids++] = *itemptr; + } + pfree(ipdatums); + pfree(ipnulls); + } + else + { + ItemPointerData cursor_tid; + + Assert(tidexpr->cexpr); + if (execCurrentOf(tidexpr->cexpr, econtext, + RelationGetRelid(tidstate->ss.ss_currentRelation), + &cursor_tid)) + { + if (numTids >= numAllocTids) + { + numAllocTids *= 2; + tidList = (ItemPointerData *) + repalloc(tidList, + numAllocTids * sizeof(ItemPointerData)); + } + tidList[numTids++] = cursor_tid; + } + } + } + + /* + * Sort the array of TIDs into order, and eliminate duplicates. + * Eliminating duplicates is necessary since we want OR semantics across + * the list. Sorting makes it easier to detect duplicates, and as a bonus + * ensures that we will visit the heap in the most efficient way. + */ + if (numTids > 1) + { + /* CurrentOfExpr could never appear OR'd with something else */ + Assert(!tidstate->tss_isCurrentOf); + + qsort((void *) tidList, numTids, sizeof(ItemPointerData), + itemptr_comparator); + numTids = qunique(tidList, numTids, sizeof(ItemPointerData), + itemptr_comparator); + } + + tidstate->tss_TidList = tidList; + tidstate->tss_NumTids = numTids; + tidstate->tss_TidPtr = -1; +} + +/* + * qsort comparator for ItemPointerData items + */ +static int +itemptr_comparator(const void *a, const void *b) +{ + const ItemPointerData *ipa = (const ItemPointerData *) a; + const ItemPointerData *ipb = (const ItemPointerData *) b; + BlockNumber ba = ItemPointerGetBlockNumber(ipa); + BlockNumber bb = ItemPointerGetBlockNumber(ipb); + OffsetNumber oa = ItemPointerGetOffsetNumber(ipa); + OffsetNumber ob = ItemPointerGetOffsetNumber(ipb); + + if (ba < bb) + return -1; + if (ba > bb) + return 1; + if (oa < ob) + return -1; + if (oa > ob) + return 1; + return 0; +} + +/* ---------------------------------------------------------------- + * TidNext + * + * Retrieve a tuple from the TidScan node's currentRelation + * using the tids in the TidScanState information. + * + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +TidNext(TidScanState *node) +{ + EState *estate; + ScanDirection direction; + Snapshot snapshot; + TableScanDesc scan; + Relation heapRelation; + TupleTableSlot *slot; + ItemPointerData *tidList; + int numTids; + bool bBackward; + + /* + * extract necessary information from tid scan node + */ + estate = node->ss.ps.state; + direction = estate->es_direction; + snapshot = estate->es_snapshot; + heapRelation = node->ss.ss_currentRelation; + slot = node->ss.ss_ScanTupleSlot; + + /* + * First time through, compute the list of TIDs to be visited + */ + if (node->tss_TidList == NULL) + TidListEval(node); + + scan = node->ss.ss_currentScanDesc; + tidList = node->tss_TidList; + numTids = node->tss_NumTids; + + /* + * Initialize or advance scan position, depending on direction. + */ + bBackward = ScanDirectionIsBackward(direction); + if (bBackward) + { + if (node->tss_TidPtr < 0) + { + /* initialize for backward scan */ + node->tss_TidPtr = numTids - 1; + } + else + node->tss_TidPtr--; + } + else + { + if (node->tss_TidPtr < 0) + { + /* initialize for forward scan */ + node->tss_TidPtr = 0; + } + else + node->tss_TidPtr++; + } + + while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids) + { + ItemPointerData tid = tidList[node->tss_TidPtr]; + + /* + * For WHERE CURRENT OF, the tuple retrieved from the cursor might + * since have been updated; if so, we should fetch the version that is + * current according to our snapshot. + */ + if (node->tss_isCurrentOf) + table_tuple_get_latest_tid(scan, &tid); + + if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot)) + return slot; + + /* Bad TID or failed snapshot qual; try next */ + if (bBackward) + node->tss_TidPtr--; + else + node->tss_TidPtr++; + + CHECK_FOR_INTERRUPTS(); + } + + /* + * if we get here it means the tid scan failed so we are at the end of the + * scan.. + */ + return ExecClearTuple(slot); +} + +/* + * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual + */ +static bool +TidRecheck(TidScanState *node, TupleTableSlot *slot) +{ + /* + * XXX shouldn't we check here to make sure tuple matches TID list? In + * runtime-key case this is not certain, is it? However, in the WHERE + * CURRENT OF case it might not match anyway ... + */ + return true; +} + + +/* ---------------------------------------------------------------- + * ExecTidScan(node) + * + * Scans the relation using tids and returns + * the next qualifying tuple in the direction specified. + * We call the ExecScan() routine and pass it the appropriate + * access method functions. + * + * Conditions: + * -- the "cursor" maintained by the AMI is positioned at the tuple + * returned previously. + * + * Initial States: + * -- the relation indicated is opened for scanning so that the + * "cursor" is positioned before the first qualifying tuple. + * -- tss_TidPtr is -1. + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +ExecTidScan(PlanState *pstate) +{ + TidScanState *node = castNode(TidScanState, pstate); + + return ExecScan(&node->ss, + (ExecScanAccessMtd) TidNext, + (ExecScanRecheckMtd) TidRecheck); +} + +/* ---------------------------------------------------------------- + * ExecReScanTidScan(node) + * ---------------------------------------------------------------- + */ +void +ExecReScanTidScan(TidScanState *node) +{ + if (node->tss_TidList) + pfree(node->tss_TidList); + node->tss_TidList = NULL; + node->tss_NumTids = 0; + node->tss_TidPtr = -1; + + /* not really necessary, but seems good form */ + if (node->ss.ss_currentScanDesc) + table_rescan(node->ss.ss_currentScanDesc, NULL); + + ExecScanReScan(&node->ss); +} + +/* ---------------------------------------------------------------- + * ExecEndTidScan + * + * Releases any storage allocated through C routines. + * Returns nothing. + * ---------------------------------------------------------------- + */ +void +ExecEndTidScan(TidScanState *node) +{ + if (node->ss.ss_currentScanDesc) + table_endscan(node->ss.ss_currentScanDesc); + + /* + * Free the exprcontext + */ + ExecFreeExprContext(&node->ss.ps); + + /* + * clear out tuple table slots + */ + if (node->ss.ps.ps_ResultTupleSlot) + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); +} + +/* ---------------------------------------------------------------- + * ExecInitTidScan + * + * Initializes the tid scan's state information, creates + * scan keys, and opens the base and tid relations. + * + * Parameters: + * node: TidScan node produced by the planner. + * estate: the execution state initialized in InitPlan. + * ---------------------------------------------------------------- + */ +TidScanState * +ExecInitTidScan(TidScan *node, EState *estate, int eflags) +{ + TidScanState *tidstate; + Relation currentRelation; + + /* + * create state structure + */ + tidstate = makeNode(TidScanState); + tidstate->ss.ps.plan = (Plan *) node; + tidstate->ss.ps.state = estate; + tidstate->ss.ps.ExecProcNode = ExecTidScan; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &tidstate->ss.ps); + + /* + * mark tid list as not computed yet + */ + tidstate->tss_TidList = NULL; + tidstate->tss_NumTids = 0; + tidstate->tss_TidPtr = -1; + + /* + * open the scan relation + */ + currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); + + tidstate->ss.ss_currentRelation = currentRelation; + tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */ + + /* + * get the scan type from the relation descriptor. + */ + ExecInitScanTupleSlot(estate, &tidstate->ss, + RelationGetDescr(currentRelation), + table_slot_callbacks(currentRelation)); + + /* + * Initialize result type and projection. + */ + ExecInitResultTypeTL(&tidstate->ss.ps); + ExecAssignScanProjectionInfo(&tidstate->ss); + + /* + * initialize child expressions + */ + tidstate->ss.ps.qual = + ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate); + + TidExprListCreate(tidstate); + + /* + * all done. + */ + return tidstate; +} |