diff options
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/Makefile | 7 | ||||
-rw-r--r-- | src/backend/utils/adt/int8.c | 15 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonpath_exec.c | 3 | ||||
-rw-r--r-- | src/backend/utils/adt/ruleutils.c | 89 | ||||
-rw-r--r-- | src/backend/utils/adt/timestamp.c | 92 | ||||
-rw-r--r-- | src/backend/utils/adt/windowfuncs.c | 29 | ||||
-rw-r--r-- | src/backend/utils/adt/xid8funcs.c | 30 | ||||
-rw-r--r-- | src/backend/utils/cache/catcache.c | 143 | ||||
-rw-r--r-- | src/backend/utils/fmgr/funcapi.c | 10 | ||||
-rw-r--r-- | src/backend/utils/misc/postgresql.conf.sample | 6 | ||||
-rw-r--r-- | src/backend/utils/misc/ps_status.c | 35 | ||||
-rw-r--r-- | src/backend/utils/mmgr/dsa.c | 10 | ||||
-rw-r--r-- | src/backend/utils/sort/logtape.c | 6 |
13 files changed, 351 insertions, 124 deletions
diff --git a/src/backend/utils/Makefile b/src/backend/utils/Makefile index deb9016..4299735 100644 --- a/src/backend/utils/Makefile +++ b/src/backend/utils/Makefile @@ -38,9 +38,12 @@ all: distprep probes.h generated-header-symlinks distprep: fmgr-stamp errcodes.h -.PHONY: generated-header-symlinks +.PHONY: generated-header-symlinks submake-adt-headers -generated-header-symlinks: $(top_builddir)/src/include/utils/header-stamp $(top_builddir)/src/include/utils/probes.h +generated-header-symlinks: $(top_builddir)/src/include/utils/header-stamp $(top_builddir)/src/include/utils/probes.h submake-adt-headers + +submake-adt-headers: + $(MAKE) -C adt jsonpath_gram.h $(SUBDIRS:%=%-recursive): fmgr-stamp errcodes.h diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index 123b4eb..41fbeec 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -833,6 +833,21 @@ int8inc_support(PG_FUNCTION_ARGS) SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; MonotonicFunction monotonic = MONOTONICFUNC_NONE; int frameOptions = req->window_clause->frameOptions; + WindowFunc *wfunc = req->window_func; + + if (list_length(wfunc->args) == 1) + { + Node *expr = eval_const_expressions(NULL, linitial(wfunc->args)); + + /* + * Due to the Node representation of WindowClause runConditions in + * version prior to v17, we need to insist that the count arg is + * Const to allow safe application of the runCondition + * optimization. + */ + if (!IsA(expr, Const)) + PG_RETURN_POINTER(NULL); + } /* No ORDER BY clause then all rows are peers */ if (req->window_clause->orderClause == NIL) diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c index 2d0599b..5c25bc1 100644 --- a/src/backend/utils/adt/jsonpath_exec.c +++ b/src/backend/utils/adt/jsonpath_exec.c @@ -1232,6 +1232,9 @@ executeBoolItem(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonPathBool res; JsonPathBool res2; + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + if (!canHaveNext && jspHasNext(jsp)) elog(ERROR, "boolean jsonpath item cannot have next item"); diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 834e170..f01cc25 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -352,8 +352,7 @@ static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags, bool attrsOnly, bool missing_ok); static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand, int prettyFlags, bool missing_ok); -static text *pg_get_expr_worker(text *expr, Oid relid, const char *relname, - int prettyFlags); +static text *pg_get_expr_worker(text *expr, Oid relid, int prettyFlags); static int print_function_arguments(StringInfo buf, HeapTuple proctup, bool print_table_args, bool print_defaults); static void print_function_rettype(StringInfo buf, HeapTuple proctup); @@ -2599,6 +2598,11 @@ decompile_column_index_array(Datum column_index_array, Oid relId, * partial indexes, column default expressions, etc. We also support * Var-free expressions, for which the OID can be InvalidOid. * + * If the OID is nonzero but not actually valid, don't throw an error, + * just return NULL. This is a bit questionable, but it's what we've + * done historically, and it can help avoid unwanted failures when + * examining catalog entries for just-deleted relations. + * * We expect this function to work, or throw a reasonably clean error, * for any node tree that can appear in a catalog pg_node_tree column. * Query trees, such as those appearing in pg_rewrite.ev_action, are @@ -2611,29 +2615,16 @@ pg_get_expr(PG_FUNCTION_ARGS) { text *expr = PG_GETARG_TEXT_PP(0); Oid relid = PG_GETARG_OID(1); + text *result; int prettyFlags; - char *relname; prettyFlags = PRETTYFLAG_INDENT; - if (OidIsValid(relid)) - { - /* Get the name for the relation */ - relname = get_rel_name(relid); - - /* - * If the OID isn't actually valid, don't throw an error, just return - * NULL. This is a bit questionable, but it's what we've done - * historically, and it can help avoid unwanted failures when - * examining catalog entries for just-deleted relations. - */ - if (relname == NULL) - PG_RETURN_NULL(); - } + result = pg_get_expr_worker(expr, relid, prettyFlags); + if (result) + PG_RETURN_TEXT_P(result); else - relname = NULL; - - PG_RETURN_TEXT_P(pg_get_expr_worker(expr, relid, relname, prettyFlags)); + PG_RETURN_NULL(); } Datum @@ -2642,33 +2633,27 @@ pg_get_expr_ext(PG_FUNCTION_ARGS) text *expr = PG_GETARG_TEXT_PP(0); Oid relid = PG_GETARG_OID(1); bool pretty = PG_GETARG_BOOL(2); + text *result; int prettyFlags; - char *relname; prettyFlags = GET_PRETTY_FLAGS(pretty); - if (OidIsValid(relid)) - { - /* Get the name for the relation */ - relname = get_rel_name(relid); - /* See notes above */ - if (relname == NULL) - PG_RETURN_NULL(); - } + result = pg_get_expr_worker(expr, relid, prettyFlags); + if (result) + PG_RETURN_TEXT_P(result); else - relname = NULL; - - PG_RETURN_TEXT_P(pg_get_expr_worker(expr, relid, relname, prettyFlags)); + PG_RETURN_NULL(); } static text * -pg_get_expr_worker(text *expr, Oid relid, const char *relname, int prettyFlags) +pg_get_expr_worker(text *expr, Oid relid, int prettyFlags) { Node *node; Node *tst; Relids relids; List *context; char *exprstr; + Relation rel = NULL; char *str; /* Convert input pg_node_tree (really TEXT) object to C string */ @@ -2713,9 +2698,19 @@ pg_get_expr_worker(text *expr, Oid relid, const char *relname, int prettyFlags) errmsg("expression contains variables"))); } - /* Prepare deparse context if needed */ + /* + * Prepare deparse context if needed. If we are deparsing with a relid, + * we need to transiently open and lock the rel, to make sure it won't go + * away underneath us. (set_relation_column_names would lock it anyway, + * so this isn't really introducing any new behavior.) + */ if (OidIsValid(relid)) - context = deparse_context_for(relname, relid); + { + rel = try_relation_open(relid, AccessShareLock); + if (rel == NULL) + return NULL; + context = deparse_context_for(RelationGetRelationName(rel), relid); + } else context = NIL; @@ -2723,6 +2718,9 @@ pg_get_expr_worker(text *expr, Oid relid, const char *relname, int prettyFlags) str = deparse_expression_pretty(node, context, false, false, prettyFlags, 0); + if (rel != NULL) + relation_close(rel, AccessShareLock); + return string_to_text(str); } @@ -4975,8 +4973,11 @@ set_deparse_plan(deparse_namespace *dpns, Plan *plan) * For a WorkTableScan, locate the parent RecursiveUnion plan node and use * that as INNER referent. * - * For MERGE, make the inner tlist point to the merge source tlist, which - * is same as the targetlist that the ModifyTable's source plan provides. + * For MERGE, pretend the ModifyTable's source plan (its outer plan) is + * INNER referent. This is the join from the target relation to the data + * source, and all INNER_VAR Vars in other parts of the query refer to its + * targetlist. + * * For ON CONFLICT .. UPDATE we just need the inner tlist to point to the * excluded expression's tlist. (Similar to the SubqueryScan we don't want * to reuse OUTER, it's used for RETURNING in some modify table cases, @@ -4991,17 +4992,17 @@ set_deparse_plan(deparse_namespace *dpns, Plan *plan) dpns->inner_plan = find_recursive_union(dpns, (WorkTableScan *) plan); else if (IsA(plan, ModifyTable)) - dpns->inner_plan = plan; - else - dpns->inner_plan = innerPlan(plan); - - if (IsA(plan, ModifyTable)) { if (((ModifyTable *) plan)->operation == CMD_MERGE) - dpns->inner_tlist = dpns->outer_tlist; + dpns->inner_plan = outerPlan(plan); else - dpns->inner_tlist = ((ModifyTable *) plan)->exclRelTlist; + dpns->inner_plan = plan; } + else + dpns->inner_plan = innerPlan(plan); + + if (IsA(plan, ModifyTable) && ((ModifyTable *) plan)->operation == CMD_INSERT) + dpns->inner_tlist = ((ModifyTable *) plan)->exclRelTlist; else if (dpns->inner_plan) dpns->inner_tlist = dpns->inner_plan->targetlist; else diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 238a40d..fd92287 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -2932,7 +2932,10 @@ timestamp_pl_interval(PG_FUNCTION_ARGS) (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); - tm->tm_mon += span->month; + if (pg_add_s32_overflow(tm->tm_mon, span->month, &tm->tm_mon)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); if (tm->tm_mon > MONTHS_PER_YEAR) { tm->tm_year += (tm->tm_mon - 1) / MONTHS_PER_YEAR; @@ -2984,7 +2987,10 @@ timestamp_pl_interval(PG_FUNCTION_ARGS) errmsg("timestamp out of range"))); } - timestamp += span->time; + if (pg_add_s64_overflow(timestamp, span->time, ×tamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); if (!IS_VALID_TIMESTAMP(timestamp)) ereport(ERROR, @@ -3052,7 +3058,10 @@ timestamptz_pl_interval_internal(TimestampTz timestamp, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); - tm->tm_mon += span->month; + if (pg_add_s32_overflow(tm->tm_mon, span->month, &tm->tm_mon)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); if (tm->tm_mon > MONTHS_PER_YEAR) { tm->tm_year += (tm->tm_mon - 1) / MONTHS_PER_YEAR; @@ -3111,7 +3120,10 @@ timestamptz_pl_interval_internal(TimestampTz timestamp, errmsg("timestamp out of range"))); } - timestamp += span->time; + if (pg_add_s64_overflow(timestamp, span->time, ×tamp)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); if (!IS_VALID_TIMESTAMP(timestamp)) ereport(ERROR, @@ -3989,8 +4001,9 @@ timestamp_bin(PG_FUNCTION_ARGS) Timestamp timestamp = PG_GETARG_TIMESTAMP(1); Timestamp origin = PG_GETARG_TIMESTAMP(2); Timestamp result, - tm_diff, stride_usecs, + tm_diff, + tm_modulo, tm_delta; if (TIMESTAMP_NOT_FINITE(timestamp)) @@ -4006,24 +4019,40 @@ timestamp_bin(PG_FUNCTION_ARGS) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("timestamps cannot be binned into intervals containing months or years"))); - stride_usecs = stride->day * USECS_PER_DAY + stride->time; + if (unlikely(pg_mul_s64_overflow(stride->day, USECS_PER_DAY, &stride_usecs)) || + unlikely(pg_add_s64_overflow(stride_usecs, stride->time, &stride_usecs))) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); if (stride_usecs <= 0) ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("stride must be greater than zero"))); - tm_diff = timestamp - origin; - tm_delta = tm_diff - tm_diff % stride_usecs; + if (unlikely(pg_sub_s64_overflow(timestamp, origin, &tm_diff))) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + /* These calculations cannot overflow */ + tm_modulo = tm_diff % stride_usecs; + tm_delta = tm_diff - tm_modulo; + result = origin + tm_delta; /* - * Make sure the returned timestamp is at the start of the bin, even if - * the origin is in the future. + * We want to round towards -infinity, not 0, when tm_diff is negative and + * not a multiple of stride_usecs. This adjustment *can* cause overflow, + * since the result might now be out of the range origin .. timestamp. */ - if (origin > timestamp && stride_usecs > 1) - tm_delta -= stride_usecs; - - result = origin + tm_delta; + if (tm_modulo < 0) + { + if (unlikely(pg_sub_s64_overflow(result, stride_usecs, &result)) || + !IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } PG_RETURN_TIMESTAMP(result); } @@ -4174,6 +4203,7 @@ timestamptz_bin(PG_FUNCTION_ARGS) TimestampTz result, stride_usecs, tm_diff, + tm_modulo, tm_delta; if (TIMESTAMP_NOT_FINITE(timestamp)) @@ -4189,24 +4219,40 @@ timestamptz_bin(PG_FUNCTION_ARGS) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("timestamps cannot be binned into intervals containing months or years"))); - stride_usecs = stride->day * USECS_PER_DAY + stride->time; + if (unlikely(pg_mul_s64_overflow(stride->day, USECS_PER_DAY, &stride_usecs)) || + unlikely(pg_add_s64_overflow(stride_usecs, stride->time, &stride_usecs))) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); if (stride_usecs <= 0) ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("stride must be greater than zero"))); - tm_diff = timestamp - origin; - tm_delta = tm_diff - tm_diff % stride_usecs; + if (unlikely(pg_sub_s64_overflow(timestamp, origin, &tm_diff))) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("interval out of range"))); + + /* These calculations cannot overflow */ + tm_modulo = tm_diff % stride_usecs; + tm_delta = tm_diff - tm_modulo; + result = origin + tm_delta; /* - * Make sure the returned timestamp is at the start of the bin, even if - * the origin is in the future. + * We want to round towards -infinity, not 0, when tm_diff is negative and + * not a multiple of stride_usecs. This adjustment *can* cause overflow, + * since the result might now be out of the range origin .. timestamp. */ - if (origin > timestamp && stride_usecs > 1) - tm_delta -= stride_usecs; - - result = origin + tm_delta; + if (tm_modulo < 0) + { + if (unlikely(pg_sub_s64_overflow(result, stride_usecs, &result)) || + !IS_VALID_TIMESTAMP(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + } PG_RETURN_TIMESTAMPTZ(result); } diff --git a/src/backend/utils/adt/windowfuncs.c b/src/backend/utils/adt/windowfuncs.c index b87a624..0c7cc55 100644 --- a/src/backend/utils/adt/windowfuncs.c +++ b/src/backend/utils/adt/windowfuncs.c @@ -14,6 +14,7 @@ #include "postgres.h" #include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" #include "utils/builtins.h" #include "windowapi.h" @@ -486,13 +487,29 @@ window_ntile_support(PG_FUNCTION_ARGS) if (IsA(rawreq, SupportRequestWFuncMonotonic)) { SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + WindowFunc *wfunc = req->window_func; - /* - * ntile() is monotonically increasing as the number of buckets cannot - * change after the first call - */ - req->monotonic = MONOTONICFUNC_INCREASING; - PG_RETURN_POINTER(req); + if (list_length(wfunc->args) == 1) + { + Node *expr = eval_const_expressions(NULL, linitial(wfunc->args)); + + /* + * Due to the Node representation of WindowClause runConditions in + * version prior to v17, we need to insist that ntile arg is Const + * to allow safe application of the runCondition optimization. + */ + if (IsA(expr, Const)) + { + /* + * ntile() is monotonically increasing as the number of + * buckets cannot change after the first call + */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + } + + PG_RETURN_POINTER(NULL); } if (IsA(rawreq, SupportRequestOptimizeWindowClause)) diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c index 06ae940..6fbfb3a 100644 --- a/src/backend/utils/adt/xid8funcs.c +++ b/src/backend/utils/adt/xid8funcs.c @@ -98,11 +98,12 @@ StaticAssertDecl(MAX_BACKENDS * 2 <= PG_SNAPSHOT_MAX_NXIP, static bool TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) { - uint32 xid_epoch = EpochFromFullTransactionId(fxid); TransactionId xid = XidFromFullTransactionId(fxid); uint32 now_epoch; TransactionId now_epoch_next_xid; FullTransactionId now_fullxid; + TransactionId oldest_xid; + FullTransactionId oldest_fxid; now_fullxid = ReadNextFullTransactionId(); now_epoch_next_xid = XidFromFullTransactionId(now_fullxid); @@ -135,17 +136,24 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) Assert(LWLockHeldByMe(XactTruncationLock)); /* - * If the transaction ID has wrapped around, it's definitely too old to - * determine the commit status. Otherwise, we can compare it to - * ShmemVariableCache->oldestClogXid to determine whether the relevant - * CLOG entry is guaranteed to still exist. + * If fxid is not older than ShmemVariableCache->oldestClogXid, the + * relevant CLOG entry is guaranteed to still exist. Convert + * ShmemVariableCache->oldestClogXid into a FullTransactionId to compare + * it with fxid. Determine the right epoch knowing that oldest_fxid + * shouldn't be more than 2^31 older than now_fullxid. */ - if (xid_epoch + 1 < now_epoch - || (xid_epoch + 1 == now_epoch && xid < now_epoch_next_xid) - || TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid)) - return false; - - return true; + oldest_xid = ShmemVariableCache->oldestClogXid; + Assert(TransactionIdPrecedesOrEquals(oldest_xid, now_epoch_next_xid)); + if (oldest_xid <= now_epoch_next_xid) + { + oldest_fxid = FullTransactionIdFromEpochAndXid(now_epoch, oldest_xid); + } + else + { + Assert(now_epoch > 0); + oldest_fxid = FullTransactionIdFromEpochAndXid(now_epoch - 1, oldest_xid); + } + return !FullTransactionIdPrecedes(fxid, oldest_fxid); } /* diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index e85e564..a2ebb50 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -89,6 +89,8 @@ static void CatCachePrintStats(int code, Datum arg); #endif static void CatCacheRemoveCTup(CatCache *cache, CatCTup *ct); static void CatCacheRemoveCList(CatCache *cache, CatCList *cl); +static void RehashCatCache(CatCache *cp); +static void RehashCatCacheLists(CatCache *cp); static void CatalogCacheInitializeCache(CatCache *cache); static CatCTup *CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, SysScanDesc scandesc, @@ -393,6 +395,7 @@ CatCachePrintStats(int code, Datum arg) long cc_neg_hits = 0; long cc_newloads = 0; long cc_invals = 0; + long cc_nlists = 0; long cc_lsearches = 0; long cc_lhits = 0; @@ -402,7 +405,7 @@ CatCachePrintStats(int code, Datum arg) if (cache->cc_ntup == 0 && cache->cc_searches == 0) continue; /* don't print unused caches */ - elog(DEBUG2, "catcache %s/%u: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lsrch, %ld lhits", + elog(DEBUG2, "catcache %s/%u: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %d lists, %ld lsrch, %ld lhits", cache->cc_relname, cache->cc_indexoid, cache->cc_ntup, @@ -414,6 +417,7 @@ CatCachePrintStats(int code, Datum arg) cache->cc_searches - cache->cc_hits - cache->cc_neg_hits - cache->cc_newloads, cache->cc_searches - cache->cc_hits - cache->cc_neg_hits, cache->cc_invals, + cache->cc_nlist, cache->cc_lsearches, cache->cc_lhits); cc_searches += cache->cc_searches; @@ -421,10 +425,11 @@ CatCachePrintStats(int code, Datum arg) cc_neg_hits += cache->cc_neg_hits; cc_newloads += cache->cc_newloads; cc_invals += cache->cc_invals; + cc_nlists += cache->cc_nlist; cc_lsearches += cache->cc_lsearches; cc_lhits += cache->cc_lhits; } - elog(DEBUG2, "catcache totals: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lsrch, %ld lhits", + elog(DEBUG2, "catcache totals: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lists, %ld lsrch, %ld lhits", CacheHdr->ch_ntup, cc_searches, cc_hits, @@ -434,6 +439,7 @@ CatCachePrintStats(int code, Datum arg) cc_searches - cc_hits - cc_neg_hits - cc_newloads, cc_searches - cc_hits - cc_neg_hits, cc_invals, + cc_nlists, cc_lsearches, cc_lhits); } @@ -522,6 +528,8 @@ CatCacheRemoveCList(CatCache *cache, CatCList *cl) cache->cc_keyno, cl->keys); pfree(cl); + + --cache->cc_nlist; } @@ -560,14 +568,19 @@ CatCacheInvalidate(CatCache *cache, uint32 hashValue) * Invalidate *all* CatCLists in this cache; it's too hard to tell which * searches might still be correct, so just zap 'em all. */ - dlist_foreach_modify(iter, &cache->cc_lists) + for (int i = 0; i < cache->cc_nlbuckets; i++) { - CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + dlist_head *bucket = &cache->cc_lbucket[i]; - if (cl->refcount > 0) - cl->dead = true; - else - CatCacheRemoveCList(cache, cl); + dlist_foreach_modify(iter, bucket) + { + CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + + if (cl->refcount > 0) + cl->dead = true; + else + CatCacheRemoveCList(cache, cl); + } } /* @@ -640,14 +653,19 @@ ResetCatalogCache(CatCache *cache) int i; /* Remove each list in this cache, or at least mark it dead */ - dlist_foreach_modify(iter, &cache->cc_lists) + for (i = 0; i < cache->cc_nlbuckets; i++) { - CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + dlist_head *bucket = &cache->cc_lbucket[i]; - if (cl->refcount > 0) - cl->dead = true; - else - CatCacheRemoveCList(cache, cl); + dlist_foreach_modify(iter, bucket) + { + CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + + if (cl->refcount > 0) + cl->dead = true; + else + CatCacheRemoveCList(cache, cl); + } } /* Remove each tuple in this cache, or at least mark it dead */ @@ -812,6 +830,12 @@ InitCatCache(int id, cp->cc_bucket = palloc0(nbuckets * sizeof(dlist_head)); /* + * Many catcaches never receive any list searches. Therefore, we don't + * allocate the cc_lbuckets till we get a list search. + */ + cp->cc_lbucket = NULL; + + /* * initialize the cache's relation information for the relation * corresponding to this cache, and initialize some of the new cache's * other internal fields. But don't open the relation yet. @@ -823,7 +847,9 @@ InitCatCache(int id, cp->cc_relisshared = false; /* temporary */ cp->cc_tupdesc = (TupleDesc) NULL; cp->cc_ntup = 0; + cp->cc_nlist = 0; cp->cc_nbuckets = nbuckets; + cp->cc_nlbuckets = 0; cp->cc_nkeys = nkeys; for (i = 0; i < nkeys; ++i) cp->cc_keyno[i] = key[i]; @@ -886,6 +912,44 @@ RehashCatCache(CatCache *cp) } /* + * Enlarge a catcache's list storage, doubling the number of buckets. + */ +static void +RehashCatCacheLists(CatCache *cp) +{ + dlist_head *newbucket; + int newnbuckets; + int i; + + elog(DEBUG1, "rehashing catalog cache id %d for %s; %d lists, %d buckets", + cp->id, cp->cc_relname, cp->cc_nlist, cp->cc_nlbuckets); + + /* Allocate a new, larger, hash table. */ + newnbuckets = cp->cc_nlbuckets * 2; + newbucket = (dlist_head *) MemoryContextAllocZero(CacheMemoryContext, newnbuckets * sizeof(dlist_head)); + + /* Move all entries from old hash table to new. */ + for (i = 0; i < cp->cc_nlbuckets; i++) + { + dlist_mutable_iter iter; + + dlist_foreach_modify(iter, &cp->cc_lbucket[i]) + { + CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + int hashIndex = HASH_INDEX(cl->hash_value, newnbuckets); + + dlist_delete(iter.cur); + dlist_push_head(&newbucket[hashIndex], &cl->cache_elem); + } + } + + /* Switch to the new array. */ + pfree(cp->cc_lbucket); + cp->cc_nlbuckets = newnbuckets; + cp->cc_lbucket = newbucket; +} + +/* * CatalogCacheInitializeCache * * This function does final initialization of a catcache: obtain the tuple @@ -1527,7 +1591,9 @@ SearchCatCacheList(CatCache *cache, Datum v4 = 0; /* dummy last-column value */ Datum arguments[CATCACHE_MAXKEYS]; uint32 lHashValue; + Index lHashIndex; dlist_iter iter; + dlist_head *lbucket; CatCList *cl; CatCTup *ct; List *volatile ctlist; @@ -1541,7 +1607,7 @@ SearchCatCacheList(CatCache *cache, /* * one-time startup overhead for each cache */ - if (cache->cc_tupdesc == NULL) + if (unlikely(cache->cc_tupdesc == NULL)) CatalogCacheInitializeCache(cache); Assert(nkeys > 0 && nkeys < cache->cc_nkeys); @@ -1557,11 +1623,36 @@ SearchCatCacheList(CatCache *cache, arguments[3] = v4; /* - * compute a hash value of the given keys for faster search. We don't - * presently divide the CatCList items into buckets, but this still lets - * us skip non-matching items quickly most of the time. + * If we haven't previously done a list search in this cache, create the + * bucket header array; otherwise, consider whether it's time to enlarge + * it. + */ + if (cache->cc_lbucket == NULL) + { + /* Arbitrary initial size --- must be a power of 2 */ + int nbuckets = 16; + + cache->cc_lbucket = (dlist_head *) + MemoryContextAllocZero(CacheMemoryContext, + nbuckets * sizeof(dlist_head)); + /* Don't set cc_nlbuckets if we get OOM allocating cc_lbucket */ + cache->cc_nlbuckets = nbuckets; + } + else + { + /* + * If the hash table has become too full, enlarge the buckets array. + * Quite arbitrarily, we enlarge when fill factor > 2. + */ + if (cache->cc_nlist > cache->cc_nlbuckets * 2) + RehashCatCacheLists(cache); + } + + /* + * Find the hash bucket in which to look for the CatCList. */ lHashValue = CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4); + lHashIndex = HASH_INDEX(lHashValue, cache->cc_nlbuckets); /* * scan the items until we find a match or exhaust our list @@ -1569,7 +1660,8 @@ SearchCatCacheList(CatCache *cache, * Note: it's okay to use dlist_foreach here, even though we modify the * dlist within the loop, because we don't continue the loop afterwards. */ - dlist_foreach(iter, &cache->cc_lists) + lbucket = &cache->cc_lbucket[lHashIndex]; + dlist_foreach(iter, lbucket) { cl = dlist_container(CatCList, cache_elem, iter.cur); @@ -1589,13 +1681,13 @@ SearchCatCacheList(CatCache *cache, continue; /* - * We found a matching list. Move the list to the front of the - * cache's list-of-lists, to speed subsequent searches. (We do not + * We found a matching list. Move the list to the front of the list + * for its hashbucket, so as to speed subsequent searches. (We do not * move the members to the fronts of their hashbucket lists, however, * since there's no point in that unless they are searched for * individually.) */ - dlist_move_head(&cache->cc_lists, &cl->cache_elem); + dlist_move_head(lbucket, &cl->cache_elem); /* Bump the list's refcount and return it */ ResourceOwnerEnlargeCatCacheListRefs(CurrentResourceOwner); @@ -1806,7 +1898,12 @@ SearchCatCacheList(CatCache *cache, } Assert(i == nmembers); - dlist_push_head(&cache->cc_lists, &cl->cache_elem); + /* + * Add the CatCList to the appropriate bucket, and count it. + */ + dlist_push_head(lbucket, &cl->cache_elem); + + cache->cc_nlist++; /* Finally, bump the list's refcount and return it */ cl->refcount++; diff --git a/src/backend/utils/fmgr/funcapi.c b/src/backend/utils/fmgr/funcapi.c index 24683bb..4e925d5 100644 --- a/src/backend/utils/fmgr/funcapi.c +++ b/src/backend/utils/fmgr/funcapi.c @@ -287,6 +287,13 @@ get_call_result_type(FunctionCallInfo fcinfo, /* * get_expr_result_type * As above, but work from a calling expression node tree + * + * Beware of using this on the funcexpr of a RTE that has a coldeflist. + * The correct conclusion in such cases is always that the function returns + * RECORD with the columns defined by the coldeflist fields (funccolnames etc). + * If it does not, it's the executor's responsibility to catch the discrepancy + * at runtime; but code processing the query in advance of that point might + * come to inconsistent conclusions if it checks the actual expression. */ TypeFuncClass get_expr_result_type(Node *expr, @@ -537,7 +544,8 @@ internal_get_result_type(Oid funcid, * if noError is true, else throws error. * * This is a simpler version of get_expr_result_type() for use when the caller - * is only interested in determinate rowtype results. + * is only interested in determinate rowtype results. As with that function, + * beware of using this on the funcexpr of a RTE that has a coldeflist. */ TupleDesc get_expr_result_tupdesc(Node *expr, bool noError) diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index f7add12..904e918 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -193,9 +193,9 @@ #effective_io_concurrency = 1 # 1-1000; 0 disables prefetching #maintenance_io_concurrency = 10 # 1-1000; 0 disables prefetching #max_worker_processes = 8 # (change requires restart) -#max_parallel_workers_per_gather = 2 # taken from max_parallel_workers -#max_parallel_maintenance_workers = 2 # taken from max_parallel_workers -#max_parallel_workers = 8 # maximum number of max_worker_processes that +#max_parallel_workers_per_gather = 2 # limited by max_parallel_workers +#max_parallel_maintenance_workers = 2 # limited by max_parallel_workers +#max_parallel_workers = 8 # number of max_worker_processes that # can be used in parallel operations #parallel_leader_participation = on #old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate diff --git a/src/backend/utils/misc/ps_status.c b/src/backend/utils/misc/ps_status.c index 104c01d..b364902 100644 --- a/src/backend/utils/misc/ps_status.c +++ b/src/backend/utils/misc/ps_status.c @@ -109,7 +109,8 @@ static char **save_argv; * (The original argv[] will not be overwritten by this routine, but may be * overwritten during init_ps_display. Also, the physical location of the * environment strings may be moved, so this should be called before any code - * that might try to hang onto a getenv() result.) + * that might try to hang onto a getenv() result. But see hack for musl + * within.) * * Note that in case of failure this cannot call elog() as that is not * initialized yet. We rely on write_stderr() instead. @@ -124,7 +125,7 @@ save_ps_display_args(int argc, char **argv) /* * If we're going to overwrite the argv area, count the available space. - * Also move the environment to make additional room. + * Also move the environment strings to make additional room. */ { char *end_of_area = NULL; @@ -153,7 +154,33 @@ save_ps_display_args(int argc, char **argv) for (i = 0; environ[i] != NULL; i++) { if (end_of_area + 1 == environ[i]) - end_of_area = environ[i] + strlen(environ[i]); + { + /* + * The musl dynamic linker keeps a static pointer to the + * initial value of LD_LIBRARY_PATH, if that is defined in the + * process's environment. Therefore, we must not overwrite the + * value of that setting and thus cannot advance end_of_area + * beyond it. Musl does not define any identifying compiler + * symbol, so we have to do this unless we see a symbol + * identifying a Linux libc we know is safe. + */ +#if defined(__linux__) && (!defined(__GLIBC__) && !defined(__UCLIBC__)) + if (strncmp(environ[i], "LD_LIBRARY_PATH=", 16) == 0) + { + /* + * We can overwrite the name, but stop at the equals sign. + * Future loop iterations will not find any more + * contiguous space, but we don't break early because we + * need to count the total number of environ[] entries. + */ + end_of_area = environ[i] + 15; + } + else +#endif + { + end_of_area = environ[i] + strlen(environ[i]); + } + } } ps_buffer = argv[0]; @@ -185,7 +212,7 @@ save_ps_display_args(int argc, char **argv) * If we're going to change the original argv[] then make a copy for * argument parsing purposes. * - * (NB: do NOT think to remove the copying of argv[], even though + * NB: do NOT think to remove the copying of argv[], even though * postmaster.c finishes looking at argv[] long before we ever consider * changing the ps display. On some platforms, getopt() keeps pointers * into the argv array, and will get horribly confused when it is diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c index 8d1aace..bc6c76b 100644 --- a/src/backend/utils/mmgr/dsa.c +++ b/src/backend/utils/mmgr/dsa.c @@ -1105,9 +1105,13 @@ dsa_dump(dsa_area *area) { dsa_segment_index segment_index; - fprintf(stderr, - " segment bin %zu (at least %d contiguous pages free):\n", - i, 1 << (i - 1)); + if (i == 0) + fprintf(stderr, + " segment bin %zu (no contiguous free pages):\n", i); + else + fprintf(stderr, + " segment bin %zu (at least %d contiguous pages free):\n", + i, 1 << (i - 1)); segment_index = area->control->segment_bins[i]; while (segment_index != DSA_SEGMENT_INDEX_NONE) { diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c index f31878b..14375b0 100644 --- a/src/backend/utils/sort/logtape.c +++ b/src/backend/utils/sort/logtape.c @@ -1174,10 +1174,8 @@ LogicalTapeTell(LogicalTape *lt, long *blocknum, int *offset) } /* - * Obtain total disk space currently used by a LogicalTapeSet, in blocks. - * - * This should not be called while there are open write buffers; otherwise it - * may not account for buffered data. + * Obtain total disk space currently used by a LogicalTapeSet, in blocks. Does + * not account for open write buffer, if any. */ long LogicalTapeSetBlocks(LogicalTapeSet *lts) |