diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-14 19:16:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-14 19:16:24 +0000 |
commit | 2a0f262beff32ba86bcb58f3273214e5d0517c09 (patch) | |
tree | 24c0ad10dab36bbd5c22743d3c88c4e0ccd5bc65 /src/backend/executor | |
parent | Releasing progress-linux version 16.2-2~progress7.99u1. (diff) | |
download | postgresql-16-2a0f262beff32ba86bcb58f3273214e5d0517c09.tar.xz postgresql-16-2a0f262beff32ba86bcb58f3273214e5d0517c09.zip |
Merging upstream version 16.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/backend/executor/functions.c | 35 | ||||
-rw-r--r-- | src/backend/executor/nodeAppend.c | 31 | ||||
-rw-r--r-- | src/backend/executor/nodeBitmapHeapscan.c | 10 | ||||
-rw-r--r-- | src/backend/executor/nodeFunctionscan.c | 81 | ||||
-rw-r--r-- | src/backend/executor/nodeIndexonlyscan.c | 95 | ||||
-rw-r--r-- | src/backend/executor/nodeMemoize.c | 15 | ||||
-rw-r--r-- | src/backend/executor/nodeModifyTable.c | 46 | ||||
-rw-r--r-- | src/backend/executor/nodeWindowAgg.c | 8 |
8 files changed, 234 insertions, 87 deletions
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index f55424e..89fcd3e 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -743,11 +743,12 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK) * the rowtype column into multiple columns, since we have no way to * notify the caller that it should do that.) */ - fcache->returnsTuple = check_sql_fn_retval(queryTree_list, - rettype, - rettupdesc, - false, - &resulttlist); + fcache->returnsTuple = check_sql_fn_retval_ext(queryTree_list, + rettype, + rettupdesc, + procedureStruct->prokind, + false, + &resulttlist); /* * Construct a JunkFilter we can use to coerce the returned rowtype to the @@ -1609,6 +1610,21 @@ check_sql_fn_retval(List *queryTreeLists, bool insertDroppedCols, List **resultTargetList) { + /* Wrapper function to preserve ABI compatibility in released branches */ + return check_sql_fn_retval_ext(queryTreeLists, + rettype, rettupdesc, + PROKIND_FUNCTION, + insertDroppedCols, + resultTargetList); +} + +bool +check_sql_fn_retval_ext(List *queryTreeLists, + Oid rettype, TupleDesc rettupdesc, + char prokind, + bool insertDroppedCols, + List **resultTargetList) +{ bool is_tuple_result = false; Query *parse; ListCell *parse_cell; @@ -1625,7 +1641,7 @@ check_sql_fn_retval(List *queryTreeLists, /* * If it's declared to return VOID, we don't care what's in the function. - * (This takes care of the procedure case, as well.) + * (This takes care of procedures with no output parameters, as well.) */ if (rettype == VOIDOID) return false; @@ -1780,8 +1796,13 @@ check_sql_fn_retval(List *queryTreeLists, * or not the record type really matches. For the moment we rely on * runtime type checking to catch any discrepancy, but it'd be nice to * do better at parse time. + * + * We must *not* do this for a procedure, however. Procedures with + * output parameter(s) have rettype RECORD, and the CALL code expects + * to get results corresponding to the list of output parameters, even + * when there's just one parameter that's composite. */ - if (tlistlen == 1) + if (tlistlen == 1 && prokind != PROKIND_PROCEDURE) { TargetEntry *tle = (TargetEntry *) linitial(tlist); diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index 99818d3..338484b 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -1043,26 +1043,25 @@ ExecAppendAsyncEventWait(AppendState *node) } /* - * No need for further processing if there are no configured events - * other than the postmaster death event. + * If there are no configured events other than the postmaster death + * event, we don't need to wait or poll. */ if (GetNumRegisteredWaitEvents(node->as_eventset) == 1) + noccurred = 0; + else { - FreeWaitEventSet(node->as_eventset); - node->as_eventset = NULL; - return; - } + /* Return at most EVENT_BUFFER_SIZE events in one call. */ + if (nevents > EVENT_BUFFER_SIZE) + nevents = EVENT_BUFFER_SIZE; - /* Return at most EVENT_BUFFER_SIZE events in one call. */ - if (nevents > EVENT_BUFFER_SIZE) - nevents = EVENT_BUFFER_SIZE; - - /* - * If the timeout is -1, wait until at least one event occurs. If the - * timeout is 0, poll for events, but do not wait at all. - */ - noccurred = WaitEventSetWait(node->as_eventset, timeout, occurred_event, - nevents, WAIT_EVENT_APPEND_READY); + /* + * If the timeout is -1, wait until at least one event occurs. If + * the timeout is 0, poll for events, but do not wait at all. + */ + noccurred = WaitEventSetWait(node->as_eventset, timeout, + occurred_event, nevents, + WAIT_EVENT_APPEND_READY); + } } PG_FINALLY(); { diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index f35df0b..1cf0bbd 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -207,6 +207,11 @@ BitmapHeapNext(BitmapHeapScanState *node) BitmapAdjustPrefetchIterator(node, tbmres); + if (tbmres->ntuples >= 0) + node->exact_pages++; + else + node->lossy_pages++; + /* * We can skip fetching the heap page if we don't need any fields * from the heap, and the bitmap entries don't need rechecking, @@ -238,11 +243,6 @@ BitmapHeapNext(BitmapHeapScanState *node) continue; } - if (tbmres->ntuples >= 0) - node->exact_pages++; - else - node->lossy_pages++; - /* Adjust the prefetch target */ BitmapAdjustPrefetchTarget(node); } diff --git a/src/backend/executor/nodeFunctionscan.c b/src/backend/executor/nodeFunctionscan.c index dd06ef8..bf9acdf 100644 --- a/src/backend/executor/nodeFunctionscan.c +++ b/src/backend/executor/nodeFunctionscan.c @@ -344,8 +344,6 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags) Node *funcexpr = rtfunc->funcexpr; int colcount = rtfunc->funccolcount; FunctionScanPerFuncState *fs = &scanstate->funcstates[i]; - TypeFuncClass functypclass; - Oid funcrettype; TupleDesc tupdesc; fs->setexpr = @@ -362,39 +360,18 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags) fs->rowcount = -1; /* - * Now determine if the function returns a simple or composite type, - * and build an appropriate tupdesc. Note that in the composite case, - * the function may now return more columns than it did when the plan - * was made; we have to ignore any columns beyond "colcount". + * Now build a tupdesc showing the result type we expect from the + * function. If we have a coldeflist then that takes priority (note + * the parser enforces that there is one if the function's nominal + * output type is RECORD). Otherwise use get_expr_result_type. + * + * Note that if the function returns a named composite type, that may + * now contain more or different columns than it did when the plan was + * made. For both that and the RECORD case, we need to check tuple + * compatibility. ExecMakeTableFunctionResult handles some of this, + * and CheckVarSlotCompatibility provides a backstop. */ - functypclass = get_expr_result_type(funcexpr, - &funcrettype, - &tupdesc); - - if (functypclass == TYPEFUNC_COMPOSITE || - functypclass == TYPEFUNC_COMPOSITE_DOMAIN) - { - /* Composite data type, e.g. a table's row type */ - Assert(tupdesc); - Assert(tupdesc->natts >= colcount); - /* Must copy it out of typcache for safety */ - tupdesc = CreateTupleDescCopy(tupdesc); - } - else if (functypclass == TYPEFUNC_SCALAR) - { - /* Base data type, i.e. scalar */ - tupdesc = CreateTemplateTupleDesc(1); - TupleDescInitEntry(tupdesc, - (AttrNumber) 1, - NULL, /* don't care about the name here */ - funcrettype, - -1, - 0); - TupleDescInitEntryCollation(tupdesc, - (AttrNumber) 1, - exprCollation(funcexpr)); - } - else if (functypclass == TYPEFUNC_RECORD) + if (rtfunc->funccolnames != NIL) { tupdesc = BuildDescFromLists(rtfunc->funccolnames, rtfunc->funccoltypes, @@ -410,8 +387,40 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags) } else { - /* crummy error message, but parser should have caught this */ - elog(ERROR, "function in FROM has unsupported return type"); + TypeFuncClass functypclass; + Oid funcrettype; + + functypclass = get_expr_result_type(funcexpr, + &funcrettype, + &tupdesc); + + if (functypclass == TYPEFUNC_COMPOSITE || + functypclass == TYPEFUNC_COMPOSITE_DOMAIN) + { + /* Composite data type, e.g. a table's row type */ + Assert(tupdesc); + /* Must copy it out of typcache for safety */ + tupdesc = CreateTupleDescCopy(tupdesc); + } + else if (functypclass == TYPEFUNC_SCALAR) + { + /* Base data type, i.e. scalar */ + tupdesc = CreateTemplateTupleDesc(1); + TupleDescInitEntry(tupdesc, + (AttrNumber) 1, + NULL, /* don't care about the name here */ + funcrettype, + -1, + 0); + TupleDescInitEntryCollation(tupdesc, + (AttrNumber) 1, + exprCollation(funcexpr)); + } + else + { + /* crummy error message, but parser should have caught this */ + elog(ERROR, "function in FROM has unsupported return type"); + } } fs->tupdesc = tupdesc; diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 0b43a9b..45d1a67 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -35,19 +35,21 @@ #include "access/tableam.h" #include "access/tupdesc.h" #include "access/visibilitymap.h" +#include "catalog/pg_type.h" #include "executor/execdebug.h" #include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" #include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/predicate.h" +#include "utils/builtins.h" #include "utils/memutils.h" #include "utils/rel.h" static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node); -static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, - TupleDesc itupdesc); +static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot, + IndexTuple itup, TupleDesc itupdesc); /* ---------------------------------------------------------------- @@ -206,7 +208,7 @@ IndexOnlyNext(IndexOnlyScanState *node) ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false); } else if (scandesc->xs_itup) - StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc); + StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc); else elog(ERROR, "no data returned for index-only scan"); @@ -264,7 +266,8 @@ IndexOnlyNext(IndexOnlyScanState *node) * right now we don't need it elsewhere. */ static void -StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc) +StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot, + IndexTuple itup, TupleDesc itupdesc) { /* * Note: we must use the tupdesc supplied by the AM in index_deform_tuple, @@ -277,6 +280,37 @@ StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc) ExecClearTuple(slot); index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull); + + /* + * Copy all name columns stored as cstrings back into a NAMEDATALEN byte + * sized allocation. We mark this branch as unlikely as generally "name" + * is used only for the system catalogs and this would have to be a user + * query running on those or some other user table with an index on a name + * column. + */ + if (unlikely(node->ioss_NameCStringAttNums != NULL)) + { + int attcount = node->ioss_NameCStringCount; + + for (int idx = 0; idx < attcount; idx++) + { + int attnum = node->ioss_NameCStringAttNums[idx]; + Name name; + + /* skip null Datums */ + if (slot->tts_isnull[attnum]) + continue; + + /* allocate the NAMEDATALEN and copy the datum into that memory */ + name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory, + NAMEDATALEN); + + /* use namestrcpy to zero-pad all trailing bytes */ + namestrcpy(name, DatumGetCString(slot->tts_values[attnum])); + slot->tts_values[attnum] = NameGetDatum(name); + } + } + ExecStoreVirtualTuple(slot); } @@ -490,8 +524,11 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) { IndexOnlyScanState *indexstate; Relation currentRelation; + Relation indexRelation; LOCKMODE lockmode; TupleDesc tupDesc; + int indnkeyatts; + int namecount; /* * create state structure @@ -564,7 +601,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) /* Open the index relation. */ lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode; - indexstate->ioss_RelationDesc = index_open(node->indexid, lockmode); + indexRelation = index_open(node->indexid, lockmode); + indexstate->ioss_RelationDesc = indexRelation; /* * Initialize index-specific scan state @@ -577,7 +615,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) * build the index scan keys from the index qualification */ ExecIndexBuildScanKeys((PlanState *) indexstate, - indexstate->ioss_RelationDesc, + indexRelation, node->indexqual, false, &indexstate->ioss_ScanKeys, @@ -591,7 +629,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) * any ORDER BY exprs have to be turned into scankeys in the same way */ ExecIndexBuildScanKeys((PlanState *) indexstate, - indexstate->ioss_RelationDesc, + indexRelation, node->indexorderby, true, &indexstate->ioss_OrderByKeys, @@ -620,6 +658,49 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) indexstate->ioss_RuntimeContext = NULL; } + indexstate->ioss_NameCStringAttNums = NULL; + indnkeyatts = indexRelation->rd_index->indnkeyatts; + namecount = 0; + + /* + * The "name" type for btree uses text_ops which results in storing + * cstrings in the indexed keys rather than names. Here we detect that in + * a generic way in case other index AMs want to do the same optimization. + * Check for opclasses with an opcintype of NAMEOID and an index tuple + * descriptor with CSTRINGOID. If any of these are found, create an array + * marking the index attribute number of each of them. StoreIndexTuple() + * handles copying the name Datums into a NAMEDATALEN-byte allocation. + */ + + /* First, count the number of such index keys */ + for (int attnum = 0; attnum < indnkeyatts; attnum++) + { + if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID && + indexRelation->rd_opcintype[attnum] == NAMEOID) + namecount++; + } + + if (namecount > 0) + { + int idx = 0; + + /* + * Now create an array to mark the attribute numbers of the keys that + * need to be converted from cstring to name. + */ + indexstate->ioss_NameCStringAttNums = (AttrNumber *) + palloc(sizeof(AttrNumber) * namecount); + + for (int attnum = 0; attnum < indnkeyatts; attnum++) + { + if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID && + indexRelation->rd_opcintype[attnum] == NAMEOID) + indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum; + } + } + + indexstate->ioss_NameCStringCount = namecount; + /* * all done. */ diff --git a/src/backend/executor/nodeMemoize.c b/src/backend/executor/nodeMemoize.c index 262f797..3c54ca5 100644 --- a/src/backend/executor/nodeMemoize.c +++ b/src/backend/executor/nodeMemoize.c @@ -13,7 +13,7 @@ * Memoize nodes are intended to sit above parameterized nodes in the plan * tree in order to cache results from them. The intention here is that a * repeat scan with a parameter value that has already been seen by the node - * can fetch tuples from the cache rather than having to re-scan the outer + * can fetch tuples from the cache rather than having to re-scan the inner * node all over again. The query planner may choose to make use of one of * these when it thinks rescans for previously seen values are likely enough * to warrant adding the additional node. @@ -207,7 +207,6 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key) } } - ResetExprContext(econtext); MemoryContextSwitchTo(oldcontext); return murmurhash32(hashkey); } @@ -265,7 +264,6 @@ MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1, } } - ResetExprContext(econtext); MemoryContextSwitchTo(oldcontext); return match; } @@ -273,7 +271,7 @@ MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1, { econtext->ecxt_innertuple = tslot; econtext->ecxt_outertuple = pslot; - return ExecQualAndReset(mstate->cache_eq_expr, econtext); + return ExecQual(mstate->cache_eq_expr, econtext); } } @@ -694,9 +692,18 @@ static TupleTableSlot * ExecMemoize(PlanState *pstate) { MemoizeState *node = castNode(MemoizeState, pstate); + ExprContext *econtext = node->ss.ps.ps_ExprContext; PlanState *outerNode; TupleTableSlot *slot; + CHECK_FOR_INTERRUPTS(); + + /* + * Reset per-tuple memory context to free any expression evaluation + * storage allocated in the previous tuple cycle. + */ + ResetExprContext(econtext); + switch (node->mstatus) { case MEMO_CACHE_LOOKUP: diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index c84caee..3a1f2ba 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -2966,8 +2966,29 @@ lmerge_matched: case TM_SelfModified: /* - * The SQL standard disallows this for MERGE. + * The target tuple was already updated or deleted by the + * current command, or by a later command in the current + * transaction. The former case is explicitly disallowed by + * the SQL standard for MERGE, which insists that the MERGE + * join condition should not join a target row to more than + * one source row. + * + * The latter case arises if the tuple is modified by a + * command in a BEFORE trigger, or perhaps by a command in a + * volatile function used in the query. In such situations we + * should not ignore the MERGE action, but it is equally + * unsafe to proceed. We don't want to discard the original + * MERGE action while keeping the triggered actions based on + * it; and it would be no better to allow the original MERGE + * action while discarding the updates that it triggered. So + * throwing an error is the only safe course. */ + if (context->tmfd.cmax != estate->es_output_cid) + ereport(ERROR, + (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION), + errmsg("tuple to be updated or deleted was already modified by an operation triggered by the current command"), + errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows."))); + if (TransactionIdIsCurrentTransactionId(context->tmfd.xmax)) ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), @@ -2975,6 +2996,7 @@ lmerge_matched: errmsg("%s command cannot affect row a second time", "MERGE"), errhint("Ensure that not more than one source row matches any one target row."))); + /* This shouldn't happen */ elog(ERROR, "attempted to update or delete invisible tuple"); break; @@ -3083,19 +3105,27 @@ lmerge_matched: /* * This can be reached when following an update * chain from a tuple updated by another session, - * reaching a tuple that was already updated in - * this transaction. If previously modified by - * this command, ignore the redundant update, - * otherwise error out. - * - * See also response to TM_SelfModified in - * ExecUpdate(). + * reaching a tuple that was already updated or + * deleted by the current command, or by a later + * command in the current transaction. As above, + * this should always be treated as an error. */ if (context->tmfd.cmax != estate->es_output_cid) ereport(ERROR, (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION), errmsg("tuple to be updated or deleted was already modified by an operation triggered by the current command"), errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows."))); + + if (TransactionIdIsCurrentTransactionId(context->tmfd.xmax)) + ereport(ERROR, + (errcode(ERRCODE_CARDINALITY_VIOLATION), + /* translator: %s is a SQL command name */ + errmsg("%s command cannot affect row a second time", + "MERGE"), + errhint("Ensure that not more than one source row matches any one target row."))); + + /* This shouldn't happen */ + elog(ERROR, "attempted to update or delete invisible tuple"); return false; default: diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 310ac23..7b57079 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -2399,6 +2399,9 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) winstate->ss.ps.state = estate; winstate->ss.ps.ExecProcNode = ExecWindowAgg; + /* copy frame options to state node for easy access */ + winstate->frameOptions = frameOptions; + /* * Create expression contexts. We need two, one for per-input-tuple * processing and one for per-output-tuple processing. We cheat a little @@ -2649,9 +2652,6 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) /* Set the status to running */ winstate->status = WINDOWAGG_RUN; - /* copy frame options to state node for easy access */ - winstate->frameOptions = frameOptions; - /* initialize frame bound offset expressions */ winstate->startOffset = ExecInitExpr((Expr *) node->startOffset, (PlanState *) winstate); @@ -2802,7 +2802,7 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc, /* * Figure out whether we want to use the moving-aggregate implementation, - * and collect the right set of fields from the pg_attribute entry. + * and collect the right set of fields from the pg_aggregate entry. * * It's possible that an aggregate would supply a safe moving-aggregate * implementation and an unsafe normal one, in which case our hand is |