summaryrefslogtreecommitdiffstats
path: root/src/backend/executor/nodeTidscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/executor/nodeTidscan.c')
-rw-r--r--src/backend/executor/nodeTidscan.c558
1 files changed, 558 insertions, 0 deletions
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
new file mode 100644
index 0000000..4116d1f
--- /dev/null
+++ b/src/backend/executor/nodeTidscan.c
@@ -0,0 +1,558 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeTidscan.c
+ * Routines to support direct tid scans of relations
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/executor/nodeTidscan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+/*
+ * INTERFACE ROUTINES
+ *
+ * ExecTidScan scans a relation using tids
+ * ExecInitTidScan creates and initializes state info.
+ * ExecReScanTidScan rescans the tid relation.
+ * ExecEndTidScan releases all storage.
+ */
+#include "postgres.h"
+
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "catalog/pg_type.h"
+#include "executor/execdebug.h"
+#include "executor/nodeTidscan.h"
+#include "lib/qunique.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "storage/bufmgr.h"
+#include "utils/array.h"
+#include "utils/rel.h"
+
+
+#define IsCTIDVar(node) \
+ ((node) != NULL && \
+ IsA((node), Var) && \
+ ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
+ ((Var *) (node))->varlevelsup == 0)
+
+/* one element in tss_tidexprs */
+typedef struct TidExpr
+{
+ ExprState *exprstate; /* ExprState for a TID-yielding subexpr */
+ bool isarray; /* if true, it yields tid[] not just tid */
+ CurrentOfExpr *cexpr; /* alternatively, we can have CURRENT OF */
+} TidExpr;
+
+static void TidExprListCreate(TidScanState *tidstate);
+static void TidListEval(TidScanState *tidstate);
+static int itemptr_comparator(const void *a, const void *b);
+static TupleTableSlot *TidNext(TidScanState *node);
+
+
+/*
+ * Extract the qual subexpressions that yield TIDs to search for,
+ * and compile them into ExprStates if they're ordinary expressions.
+ *
+ * CURRENT OF is a special case that we can't compile usefully;
+ * just drop it into the TidExpr list as-is.
+ */
+static void
+TidExprListCreate(TidScanState *tidstate)
+{
+ TidScan *node = (TidScan *) tidstate->ss.ps.plan;
+ ListCell *l;
+
+ tidstate->tss_tidexprs = NIL;
+ tidstate->tss_isCurrentOf = false;
+
+ foreach(l, node->tidquals)
+ {
+ Expr *expr = (Expr *) lfirst(l);
+ TidExpr *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr));
+
+ if (is_opclause(expr))
+ {
+ Node *arg1;
+ Node *arg2;
+
+ arg1 = get_leftop(expr);
+ arg2 = get_rightop(expr);
+ if (IsCTIDVar(arg1))
+ tidexpr->exprstate = ExecInitExpr((Expr *) arg2,
+ &tidstate->ss.ps);
+ else if (IsCTIDVar(arg2))
+ tidexpr->exprstate = ExecInitExpr((Expr *) arg1,
+ &tidstate->ss.ps);
+ else
+ elog(ERROR, "could not identify CTID variable");
+ tidexpr->isarray = false;
+ }
+ else if (expr && IsA(expr, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr *saex = (ScalarArrayOpExpr *) expr;
+
+ Assert(IsCTIDVar(linitial(saex->args)));
+ tidexpr->exprstate = ExecInitExpr(lsecond(saex->args),
+ &tidstate->ss.ps);
+ tidexpr->isarray = true;
+ }
+ else if (expr && IsA(expr, CurrentOfExpr))
+ {
+ CurrentOfExpr *cexpr = (CurrentOfExpr *) expr;
+
+ tidexpr->cexpr = cexpr;
+ tidstate->tss_isCurrentOf = true;
+ }
+ else
+ elog(ERROR, "could not identify CTID expression");
+
+ tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr);
+ }
+
+ /* CurrentOfExpr could never appear OR'd with something else */
+ Assert(list_length(tidstate->tss_tidexprs) == 1 ||
+ !tidstate->tss_isCurrentOf);
+}
+
+/*
+ * Compute the list of TIDs to be visited, by evaluating the expressions
+ * for them.
+ *
+ * (The result is actually an array, not a list.)
+ */
+static void
+TidListEval(TidScanState *tidstate)
+{
+ ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
+ TableScanDesc scan;
+ ItemPointerData *tidList;
+ int numAllocTids;
+ int numTids;
+ ListCell *l;
+
+ /*
+ * Start scan on-demand - initializing a scan isn't free (e.g. heap stats
+ * the size of the table), so it makes sense to delay that until needed -
+ * the node might never get executed.
+ */
+ if (tidstate->ss.ss_currentScanDesc == NULL)
+ tidstate->ss.ss_currentScanDesc =
+ table_beginscan_tid(tidstate->ss.ss_currentRelation,
+ tidstate->ss.ps.state->es_snapshot);
+ scan = tidstate->ss.ss_currentScanDesc;
+
+ /*
+ * We initialize the array with enough slots for the case that all quals
+ * are simple OpExprs or CurrentOfExprs. If there are any
+ * ScalarArrayOpExprs, we may have to enlarge the array.
+ */
+ numAllocTids = list_length(tidstate->tss_tidexprs);
+ tidList = (ItemPointerData *)
+ palloc(numAllocTids * sizeof(ItemPointerData));
+ numTids = 0;
+
+ foreach(l, tidstate->tss_tidexprs)
+ {
+ TidExpr *tidexpr = (TidExpr *) lfirst(l);
+ ItemPointer itemptr;
+ bool isNull;
+
+ if (tidexpr->exprstate && !tidexpr->isarray)
+ {
+ itemptr = (ItemPointer)
+ DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
+ econtext,
+ &isNull));
+ if (isNull)
+ continue;
+
+ /*
+ * We silently discard any TIDs that the AM considers invalid
+ * (E.g. for heap, they could be out of range at the time of scan
+ * start. Since we hold at least AccessShareLock on the table, it
+ * won't be possible for someone to truncate away the blocks we
+ * intend to visit.).
+ */
+ if (!table_tuple_tid_valid(scan, itemptr))
+ continue;
+
+ if (numTids >= numAllocTids)
+ {
+ numAllocTids *= 2;
+ tidList = (ItemPointerData *)
+ repalloc(tidList,
+ numAllocTids * sizeof(ItemPointerData));
+ }
+ tidList[numTids++] = *itemptr;
+ }
+ else if (tidexpr->exprstate && tidexpr->isarray)
+ {
+ Datum arraydatum;
+ ArrayType *itemarray;
+ Datum *ipdatums;
+ bool *ipnulls;
+ int ndatums;
+ int i;
+
+ arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate,
+ econtext,
+ &isNull);
+ if (isNull)
+ continue;
+ itemarray = DatumGetArrayTypeP(arraydatum);
+ deconstruct_array(itemarray,
+ TIDOID, sizeof(ItemPointerData), false, TYPALIGN_SHORT,
+ &ipdatums, &ipnulls, &ndatums);
+ if (numTids + ndatums > numAllocTids)
+ {
+ numAllocTids = numTids + ndatums;
+ tidList = (ItemPointerData *)
+ repalloc(tidList,
+ numAllocTids * sizeof(ItemPointerData));
+ }
+ for (i = 0; i < ndatums; i++)
+ {
+ if (ipnulls[i])
+ continue;
+
+ itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
+
+ if (!table_tuple_tid_valid(scan, itemptr))
+ continue;
+
+ tidList[numTids++] = *itemptr;
+ }
+ pfree(ipdatums);
+ pfree(ipnulls);
+ }
+ else
+ {
+ ItemPointerData cursor_tid;
+
+ Assert(tidexpr->cexpr);
+ if (execCurrentOf(tidexpr->cexpr, econtext,
+ RelationGetRelid(tidstate->ss.ss_currentRelation),
+ &cursor_tid))
+ {
+ if (numTids >= numAllocTids)
+ {
+ numAllocTids *= 2;
+ tidList = (ItemPointerData *)
+ repalloc(tidList,
+ numAllocTids * sizeof(ItemPointerData));
+ }
+ tidList[numTids++] = cursor_tid;
+ }
+ }
+ }
+
+ /*
+ * Sort the array of TIDs into order, and eliminate duplicates.
+ * Eliminating duplicates is necessary since we want OR semantics across
+ * the list. Sorting makes it easier to detect duplicates, and as a bonus
+ * ensures that we will visit the heap in the most efficient way.
+ */
+ if (numTids > 1)
+ {
+ /* CurrentOfExpr could never appear OR'd with something else */
+ Assert(!tidstate->tss_isCurrentOf);
+
+ qsort((void *) tidList, numTids, sizeof(ItemPointerData),
+ itemptr_comparator);
+ numTids = qunique(tidList, numTids, sizeof(ItemPointerData),
+ itemptr_comparator);
+ }
+
+ tidstate->tss_TidList = tidList;
+ tidstate->tss_NumTids = numTids;
+ tidstate->tss_TidPtr = -1;
+}
+
+/*
+ * qsort comparator for ItemPointerData items
+ */
+static int
+itemptr_comparator(const void *a, const void *b)
+{
+ const ItemPointerData *ipa = (const ItemPointerData *) a;
+ const ItemPointerData *ipb = (const ItemPointerData *) b;
+ BlockNumber ba = ItemPointerGetBlockNumber(ipa);
+ BlockNumber bb = ItemPointerGetBlockNumber(ipb);
+ OffsetNumber oa = ItemPointerGetOffsetNumber(ipa);
+ OffsetNumber ob = ItemPointerGetOffsetNumber(ipb);
+
+ if (ba < bb)
+ return -1;
+ if (ba > bb)
+ return 1;
+ if (oa < ob)
+ return -1;
+ if (oa > ob)
+ return 1;
+ return 0;
+}
+
+/* ----------------------------------------------------------------
+ * TidNext
+ *
+ * Retrieve a tuple from the TidScan node's currentRelation
+ * using the tids in the TidScanState information.
+ *
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+TidNext(TidScanState *node)
+{
+ EState *estate;
+ ScanDirection direction;
+ Snapshot snapshot;
+ TableScanDesc scan;
+ Relation heapRelation;
+ TupleTableSlot *slot;
+ ItemPointerData *tidList;
+ int numTids;
+ bool bBackward;
+
+ /*
+ * extract necessary information from tid scan node
+ */
+ estate = node->ss.ps.state;
+ direction = estate->es_direction;
+ snapshot = estate->es_snapshot;
+ heapRelation = node->ss.ss_currentRelation;
+ slot = node->ss.ss_ScanTupleSlot;
+
+ /*
+ * First time through, compute the list of TIDs to be visited
+ */
+ if (node->tss_TidList == NULL)
+ TidListEval(node);
+
+ scan = node->ss.ss_currentScanDesc;
+ tidList = node->tss_TidList;
+ numTids = node->tss_NumTids;
+
+ /*
+ * Initialize or advance scan position, depending on direction.
+ */
+ bBackward = ScanDirectionIsBackward(direction);
+ if (bBackward)
+ {
+ if (node->tss_TidPtr < 0)
+ {
+ /* initialize for backward scan */
+ node->tss_TidPtr = numTids - 1;
+ }
+ else
+ node->tss_TidPtr--;
+ }
+ else
+ {
+ if (node->tss_TidPtr < 0)
+ {
+ /* initialize for forward scan */
+ node->tss_TidPtr = 0;
+ }
+ else
+ node->tss_TidPtr++;
+ }
+
+ while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
+ {
+ ItemPointerData tid = tidList[node->tss_TidPtr];
+
+ /*
+ * For WHERE CURRENT OF, the tuple retrieved from the cursor might
+ * since have been updated; if so, we should fetch the version that is
+ * current according to our snapshot.
+ */
+ if (node->tss_isCurrentOf)
+ table_tuple_get_latest_tid(scan, &tid);
+
+ if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot))
+ return slot;
+
+ /* Bad TID or failed snapshot qual; try next */
+ if (bBackward)
+ node->tss_TidPtr--;
+ else
+ node->tss_TidPtr++;
+
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ /*
+ * if we get here it means the tid scan failed so we are at the end of the
+ * scan..
+ */
+ return ExecClearTuple(slot);
+}
+
+/*
+ * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+TidRecheck(TidScanState *node, TupleTableSlot *slot)
+{
+ /*
+ * XXX shouldn't we check here to make sure tuple matches TID list? In
+ * runtime-key case this is not certain, is it? However, in the WHERE
+ * CURRENT OF case it might not match anyway ...
+ */
+ return true;
+}
+
+
+/* ----------------------------------------------------------------
+ * ExecTidScan(node)
+ *
+ * Scans the relation using tids and returns
+ * the next qualifying tuple in the direction specified.
+ * We call the ExecScan() routine and pass it the appropriate
+ * access method functions.
+ *
+ * Conditions:
+ * -- the "cursor" maintained by the AMI is positioned at the tuple
+ * returned previously.
+ *
+ * Initial States:
+ * -- the relation indicated is opened for scanning so that the
+ * "cursor" is positioned before the first qualifying tuple.
+ * -- tss_TidPtr is -1.
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+ExecTidScan(PlanState *pstate)
+{
+ TidScanState *node = castNode(TidScanState, pstate);
+
+ return ExecScan(&node->ss,
+ (ExecScanAccessMtd) TidNext,
+ (ExecScanRecheckMtd) TidRecheck);
+}
+
+/* ----------------------------------------------------------------
+ * ExecReScanTidScan(node)
+ * ----------------------------------------------------------------
+ */
+void
+ExecReScanTidScan(TidScanState *node)
+{
+ if (node->tss_TidList)
+ pfree(node->tss_TidList);
+ node->tss_TidList = NULL;
+ node->tss_NumTids = 0;
+ node->tss_TidPtr = -1;
+
+ /* not really necessary, but seems good form */
+ if (node->ss.ss_currentScanDesc)
+ table_rescan(node->ss.ss_currentScanDesc, NULL);
+
+ ExecScanReScan(&node->ss);
+}
+
+/* ----------------------------------------------------------------
+ * ExecEndTidScan
+ *
+ * Releases any storage allocated through C routines.
+ * Returns nothing.
+ * ----------------------------------------------------------------
+ */
+void
+ExecEndTidScan(TidScanState *node)
+{
+ if (node->ss.ss_currentScanDesc)
+ table_endscan(node->ss.ss_currentScanDesc);
+
+ /*
+ * Free the exprcontext
+ */
+ ExecFreeExprContext(&node->ss.ps);
+
+ /*
+ * clear out tuple table slots
+ */
+ if (node->ss.ps.ps_ResultTupleSlot)
+ ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
+ ExecClearTuple(node->ss.ss_ScanTupleSlot);
+}
+
+/* ----------------------------------------------------------------
+ * ExecInitTidScan
+ *
+ * Initializes the tid scan's state information, creates
+ * scan keys, and opens the base and tid relations.
+ *
+ * Parameters:
+ * node: TidScan node produced by the planner.
+ * estate: the execution state initialized in InitPlan.
+ * ----------------------------------------------------------------
+ */
+TidScanState *
+ExecInitTidScan(TidScan *node, EState *estate, int eflags)
+{
+ TidScanState *tidstate;
+ Relation currentRelation;
+
+ /*
+ * create state structure
+ */
+ tidstate = makeNode(TidScanState);
+ tidstate->ss.ps.plan = (Plan *) node;
+ tidstate->ss.ps.state = estate;
+ tidstate->ss.ps.ExecProcNode = ExecTidScan;
+
+ /*
+ * Miscellaneous initialization
+ *
+ * create expression context for node
+ */
+ ExecAssignExprContext(estate, &tidstate->ss.ps);
+
+ /*
+ * mark tid list as not computed yet
+ */
+ tidstate->tss_TidList = NULL;
+ tidstate->tss_NumTids = 0;
+ tidstate->tss_TidPtr = -1;
+
+ /*
+ * open the scan relation
+ */
+ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
+
+ tidstate->ss.ss_currentRelation = currentRelation;
+ tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
+
+ /*
+ * get the scan type from the relation descriptor.
+ */
+ ExecInitScanTupleSlot(estate, &tidstate->ss,
+ RelationGetDescr(currentRelation),
+ table_slot_callbacks(currentRelation));
+
+ /*
+ * Initialize result type and projection.
+ */
+ ExecInitResultTypeTL(&tidstate->ss.ps);
+ ExecAssignScanProjectionInfo(&tidstate->ss);
+
+ /*
+ * initialize child expressions
+ */
+ tidstate->ss.ps.qual =
+ ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate);
+
+ TidExprListCreate(tidstate);
+
+ /*
+ * all done.
+ */
+ return tidstate;
+}