summaryrefslogtreecommitdiffstats
path: root/src/backend/utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/Makefile7
-rw-r--r--src/backend/utils/adt/int8.c15
-rw-r--r--src/backend/utils/adt/jsonpath_exec.c3
-rw-r--r--src/backend/utils/adt/ruleutils.c89
-rw-r--r--src/backend/utils/adt/timestamp.c92
-rw-r--r--src/backend/utils/adt/windowfuncs.c29
-rw-r--r--src/backend/utils/adt/xid8funcs.c30
-rw-r--r--src/backend/utils/cache/catcache.c143
-rw-r--r--src/backend/utils/fmgr/funcapi.c10
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample6
-rw-r--r--src/backend/utils/misc/ps_status.c35
-rw-r--r--src/backend/utils/mmgr/dsa.c10
-rw-r--r--src/backend/utils/sort/logtape.c6
13 files changed, 351 insertions, 124 deletions
diff --git a/src/backend/utils/Makefile b/src/backend/utils/Makefile
index deb9016..4299735 100644
--- a/src/backend/utils/Makefile
+++ b/src/backend/utils/Makefile
@@ -38,9 +38,12 @@ all: distprep probes.h generated-header-symlinks
distprep: fmgr-stamp errcodes.h
-.PHONY: generated-header-symlinks
+.PHONY: generated-header-symlinks submake-adt-headers
-generated-header-symlinks: $(top_builddir)/src/include/utils/header-stamp $(top_builddir)/src/include/utils/probes.h
+generated-header-symlinks: $(top_builddir)/src/include/utils/header-stamp $(top_builddir)/src/include/utils/probes.h submake-adt-headers
+
+submake-adt-headers:
+ $(MAKE) -C adt jsonpath_gram.h
$(SUBDIRS:%=%-recursive): fmgr-stamp errcodes.h
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index 123b4eb..41fbeec 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -833,6 +833,21 @@ int8inc_support(PG_FUNCTION_ARGS)
SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq;
MonotonicFunction monotonic = MONOTONICFUNC_NONE;
int frameOptions = req->window_clause->frameOptions;
+ WindowFunc *wfunc = req->window_func;
+
+ if (list_length(wfunc->args) == 1)
+ {
+ Node *expr = eval_const_expressions(NULL, linitial(wfunc->args));
+
+ /*
+ * Due to the Node representation of WindowClause runConditions in
+ * version prior to v17, we need to insist that the count arg is
+ * Const to allow safe application of the runCondition
+ * optimization.
+ */
+ if (!IsA(expr, Const))
+ PG_RETURN_POINTER(NULL);
+ }
/* No ORDER BY clause then all rows are peers */
if (req->window_clause->orderClause == NIL)
diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c
index 2d0599b..5c25bc1 100644
--- a/src/backend/utils/adt/jsonpath_exec.c
+++ b/src/backend/utils/adt/jsonpath_exec.c
@@ -1232,6 +1232,9 @@ executeBoolItem(JsonPathExecContext *cxt, JsonPathItem *jsp,
JsonPathBool res;
JsonPathBool res2;
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
if (!canHaveNext && jspHasNext(jsp))
elog(ERROR, "boolean jsonpath item cannot have next item");
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 834e170..f01cc25 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -352,8 +352,7 @@ static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags,
bool attrsOnly, bool missing_ok);
static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
int prettyFlags, bool missing_ok);
-static text *pg_get_expr_worker(text *expr, Oid relid, const char *relname,
- int prettyFlags);
+static text *pg_get_expr_worker(text *expr, Oid relid, int prettyFlags);
static int print_function_arguments(StringInfo buf, HeapTuple proctup,
bool print_table_args, bool print_defaults);
static void print_function_rettype(StringInfo buf, HeapTuple proctup);
@@ -2599,6 +2598,11 @@ decompile_column_index_array(Datum column_index_array, Oid relId,
* partial indexes, column default expressions, etc. We also support
* Var-free expressions, for which the OID can be InvalidOid.
*
+ * If the OID is nonzero but not actually valid, don't throw an error,
+ * just return NULL. This is a bit questionable, but it's what we've
+ * done historically, and it can help avoid unwanted failures when
+ * examining catalog entries for just-deleted relations.
+ *
* We expect this function to work, or throw a reasonably clean error,
* for any node tree that can appear in a catalog pg_node_tree column.
* Query trees, such as those appearing in pg_rewrite.ev_action, are
@@ -2611,29 +2615,16 @@ pg_get_expr(PG_FUNCTION_ARGS)
{
text *expr = PG_GETARG_TEXT_PP(0);
Oid relid = PG_GETARG_OID(1);
+ text *result;
int prettyFlags;
- char *relname;
prettyFlags = PRETTYFLAG_INDENT;
- if (OidIsValid(relid))
- {
- /* Get the name for the relation */
- relname = get_rel_name(relid);
-
- /*
- * If the OID isn't actually valid, don't throw an error, just return
- * NULL. This is a bit questionable, but it's what we've done
- * historically, and it can help avoid unwanted failures when
- * examining catalog entries for just-deleted relations.
- */
- if (relname == NULL)
- PG_RETURN_NULL();
- }
+ result = pg_get_expr_worker(expr, relid, prettyFlags);
+ if (result)
+ PG_RETURN_TEXT_P(result);
else
- relname = NULL;
-
- PG_RETURN_TEXT_P(pg_get_expr_worker(expr, relid, relname, prettyFlags));
+ PG_RETURN_NULL();
}
Datum
@@ -2642,33 +2633,27 @@ pg_get_expr_ext(PG_FUNCTION_ARGS)
text *expr = PG_GETARG_TEXT_PP(0);
Oid relid = PG_GETARG_OID(1);
bool pretty = PG_GETARG_BOOL(2);
+ text *result;
int prettyFlags;
- char *relname;
prettyFlags = GET_PRETTY_FLAGS(pretty);
- if (OidIsValid(relid))
- {
- /* Get the name for the relation */
- relname = get_rel_name(relid);
- /* See notes above */
- if (relname == NULL)
- PG_RETURN_NULL();
- }
+ result = pg_get_expr_worker(expr, relid, prettyFlags);
+ if (result)
+ PG_RETURN_TEXT_P(result);
else
- relname = NULL;
-
- PG_RETURN_TEXT_P(pg_get_expr_worker(expr, relid, relname, prettyFlags));
+ PG_RETURN_NULL();
}
static text *
-pg_get_expr_worker(text *expr, Oid relid, const char *relname, int prettyFlags)
+pg_get_expr_worker(text *expr, Oid relid, int prettyFlags)
{
Node *node;
Node *tst;
Relids relids;
List *context;
char *exprstr;
+ Relation rel = NULL;
char *str;
/* Convert input pg_node_tree (really TEXT) object to C string */
@@ -2713,9 +2698,19 @@ pg_get_expr_worker(text *expr, Oid relid, const char *relname, int prettyFlags)
errmsg("expression contains variables")));
}
- /* Prepare deparse context if needed */
+ /*
+ * Prepare deparse context if needed. If we are deparsing with a relid,
+ * we need to transiently open and lock the rel, to make sure it won't go
+ * away underneath us. (set_relation_column_names would lock it anyway,
+ * so this isn't really introducing any new behavior.)
+ */
if (OidIsValid(relid))
- context = deparse_context_for(relname, relid);
+ {
+ rel = try_relation_open(relid, AccessShareLock);
+ if (rel == NULL)
+ return NULL;
+ context = deparse_context_for(RelationGetRelationName(rel), relid);
+ }
else
context = NIL;
@@ -2723,6 +2718,9 @@ pg_get_expr_worker(text *expr, Oid relid, const char *relname, int prettyFlags)
str = deparse_expression_pretty(node, context, false, false,
prettyFlags, 0);
+ if (rel != NULL)
+ relation_close(rel, AccessShareLock);
+
return string_to_text(str);
}
@@ -4975,8 +4973,11 @@ set_deparse_plan(deparse_namespace *dpns, Plan *plan)
* For a WorkTableScan, locate the parent RecursiveUnion plan node and use
* that as INNER referent.
*
- * For MERGE, make the inner tlist point to the merge source tlist, which
- * is same as the targetlist that the ModifyTable's source plan provides.
+ * For MERGE, pretend the ModifyTable's source plan (its outer plan) is
+ * INNER referent. This is the join from the target relation to the data
+ * source, and all INNER_VAR Vars in other parts of the query refer to its
+ * targetlist.
+ *
* For ON CONFLICT .. UPDATE we just need the inner tlist to point to the
* excluded expression's tlist. (Similar to the SubqueryScan we don't want
* to reuse OUTER, it's used for RETURNING in some modify table cases,
@@ -4991,17 +4992,17 @@ set_deparse_plan(deparse_namespace *dpns, Plan *plan)
dpns->inner_plan = find_recursive_union(dpns,
(WorkTableScan *) plan);
else if (IsA(plan, ModifyTable))
- dpns->inner_plan = plan;
- else
- dpns->inner_plan = innerPlan(plan);
-
- if (IsA(plan, ModifyTable))
{
if (((ModifyTable *) plan)->operation == CMD_MERGE)
- dpns->inner_tlist = dpns->outer_tlist;
+ dpns->inner_plan = outerPlan(plan);
else
- dpns->inner_tlist = ((ModifyTable *) plan)->exclRelTlist;
+ dpns->inner_plan = plan;
}
+ else
+ dpns->inner_plan = innerPlan(plan);
+
+ if (IsA(plan, ModifyTable) && ((ModifyTable *) plan)->operation == CMD_INSERT)
+ dpns->inner_tlist = ((ModifyTable *) plan)->exclRelTlist;
else if (dpns->inner_plan)
dpns->inner_tlist = dpns->inner_plan->targetlist;
else
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 238a40d..fd92287 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -2932,7 +2932,10 @@ timestamp_pl_interval(PG_FUNCTION_ARGS)
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
- tm->tm_mon += span->month;
+ if (pg_add_s32_overflow(tm->tm_mon, span->month, &tm->tm_mon))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
if (tm->tm_mon > MONTHS_PER_YEAR)
{
tm->tm_year += (tm->tm_mon - 1) / MONTHS_PER_YEAR;
@@ -2984,7 +2987,10 @@ timestamp_pl_interval(PG_FUNCTION_ARGS)
errmsg("timestamp out of range")));
}
- timestamp += span->time;
+ if (pg_add_s64_overflow(timestamp, span->time, &timestamp))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
if (!IS_VALID_TIMESTAMP(timestamp))
ereport(ERROR,
@@ -3052,7 +3058,10 @@ timestamptz_pl_interval_internal(TimestampTz timestamp,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
- tm->tm_mon += span->month;
+ if (pg_add_s32_overflow(tm->tm_mon, span->month, &tm->tm_mon))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
if (tm->tm_mon > MONTHS_PER_YEAR)
{
tm->tm_year += (tm->tm_mon - 1) / MONTHS_PER_YEAR;
@@ -3111,7 +3120,10 @@ timestamptz_pl_interval_internal(TimestampTz timestamp,
errmsg("timestamp out of range")));
}
- timestamp += span->time;
+ if (pg_add_s64_overflow(timestamp, span->time, &timestamp))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
if (!IS_VALID_TIMESTAMP(timestamp))
ereport(ERROR,
@@ -3989,8 +4001,9 @@ timestamp_bin(PG_FUNCTION_ARGS)
Timestamp timestamp = PG_GETARG_TIMESTAMP(1);
Timestamp origin = PG_GETARG_TIMESTAMP(2);
Timestamp result,
- tm_diff,
stride_usecs,
+ tm_diff,
+ tm_modulo,
tm_delta;
if (TIMESTAMP_NOT_FINITE(timestamp))
@@ -4006,24 +4019,40 @@ timestamp_bin(PG_FUNCTION_ARGS)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("timestamps cannot be binned into intervals containing months or years")));
- stride_usecs = stride->day * USECS_PER_DAY + stride->time;
+ if (unlikely(pg_mul_s64_overflow(stride->day, USECS_PER_DAY, &stride_usecs)) ||
+ unlikely(pg_add_s64_overflow(stride_usecs, stride->time, &stride_usecs)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
if (stride_usecs <= 0)
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("stride must be greater than zero")));
- tm_diff = timestamp - origin;
- tm_delta = tm_diff - tm_diff % stride_usecs;
+ if (unlikely(pg_sub_s64_overflow(timestamp, origin, &tm_diff)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ /* These calculations cannot overflow */
+ tm_modulo = tm_diff % stride_usecs;
+ tm_delta = tm_diff - tm_modulo;
+ result = origin + tm_delta;
/*
- * Make sure the returned timestamp is at the start of the bin, even if
- * the origin is in the future.
+ * We want to round towards -infinity, not 0, when tm_diff is negative and
+ * not a multiple of stride_usecs. This adjustment *can* cause overflow,
+ * since the result might now be out of the range origin .. timestamp.
*/
- if (origin > timestamp && stride_usecs > 1)
- tm_delta -= stride_usecs;
-
- result = origin + tm_delta;
+ if (tm_modulo < 0)
+ {
+ if (unlikely(pg_sub_s64_overflow(result, stride_usecs, &result)) ||
+ !IS_VALID_TIMESTAMP(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
PG_RETURN_TIMESTAMP(result);
}
@@ -4174,6 +4203,7 @@ timestamptz_bin(PG_FUNCTION_ARGS)
TimestampTz result,
stride_usecs,
tm_diff,
+ tm_modulo,
tm_delta;
if (TIMESTAMP_NOT_FINITE(timestamp))
@@ -4189,24 +4219,40 @@ timestamptz_bin(PG_FUNCTION_ARGS)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("timestamps cannot be binned into intervals containing months or years")));
- stride_usecs = stride->day * USECS_PER_DAY + stride->time;
+ if (unlikely(pg_mul_s64_overflow(stride->day, USECS_PER_DAY, &stride_usecs)) ||
+ unlikely(pg_add_s64_overflow(stride_usecs, stride->time, &stride_usecs)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
if (stride_usecs <= 0)
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("stride must be greater than zero")));
- tm_diff = timestamp - origin;
- tm_delta = tm_diff - tm_diff % stride_usecs;
+ if (unlikely(pg_sub_s64_overflow(timestamp, origin, &tm_diff)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ /* These calculations cannot overflow */
+ tm_modulo = tm_diff % stride_usecs;
+ tm_delta = tm_diff - tm_modulo;
+ result = origin + tm_delta;
/*
- * Make sure the returned timestamp is at the start of the bin, even if
- * the origin is in the future.
+ * We want to round towards -infinity, not 0, when tm_diff is negative and
+ * not a multiple of stride_usecs. This adjustment *can* cause overflow,
+ * since the result might now be out of the range origin .. timestamp.
*/
- if (origin > timestamp && stride_usecs > 1)
- tm_delta -= stride_usecs;
-
- result = origin + tm_delta;
+ if (tm_modulo < 0)
+ {
+ if (unlikely(pg_sub_s64_overflow(result, stride_usecs, &result)) ||
+ !IS_VALID_TIMESTAMP(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
PG_RETURN_TIMESTAMPTZ(result);
}
diff --git a/src/backend/utils/adt/windowfuncs.c b/src/backend/utils/adt/windowfuncs.c
index b87a624..0c7cc55 100644
--- a/src/backend/utils/adt/windowfuncs.c
+++ b/src/backend/utils/adt/windowfuncs.c
@@ -14,6 +14,7 @@
#include "postgres.h"
#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
#include "utils/builtins.h"
#include "windowapi.h"
@@ -486,13 +487,29 @@ window_ntile_support(PG_FUNCTION_ARGS)
if (IsA(rawreq, SupportRequestWFuncMonotonic))
{
SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq;
+ WindowFunc *wfunc = req->window_func;
- /*
- * ntile() is monotonically increasing as the number of buckets cannot
- * change after the first call
- */
- req->monotonic = MONOTONICFUNC_INCREASING;
- PG_RETURN_POINTER(req);
+ if (list_length(wfunc->args) == 1)
+ {
+ Node *expr = eval_const_expressions(NULL, linitial(wfunc->args));
+
+ /*
+ * Due to the Node representation of WindowClause runConditions in
+ * version prior to v17, we need to insist that ntile arg is Const
+ * to allow safe application of the runCondition optimization.
+ */
+ if (IsA(expr, Const))
+ {
+ /*
+ * ntile() is monotonically increasing as the number of
+ * buckets cannot change after the first call
+ */
+ req->monotonic = MONOTONICFUNC_INCREASING;
+ PG_RETURN_POINTER(req);
+ }
+ }
+
+ PG_RETURN_POINTER(NULL);
}
if (IsA(rawreq, SupportRequestOptimizeWindowClause))
diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c
index 06ae940..6fbfb3a 100644
--- a/src/backend/utils/adt/xid8funcs.c
+++ b/src/backend/utils/adt/xid8funcs.c
@@ -98,11 +98,12 @@ StaticAssertDecl(MAX_BACKENDS * 2 <= PG_SNAPSHOT_MAX_NXIP,
static bool
TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid)
{
- uint32 xid_epoch = EpochFromFullTransactionId(fxid);
TransactionId xid = XidFromFullTransactionId(fxid);
uint32 now_epoch;
TransactionId now_epoch_next_xid;
FullTransactionId now_fullxid;
+ TransactionId oldest_xid;
+ FullTransactionId oldest_fxid;
now_fullxid = ReadNextFullTransactionId();
now_epoch_next_xid = XidFromFullTransactionId(now_fullxid);
@@ -135,17 +136,24 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid)
Assert(LWLockHeldByMe(XactTruncationLock));
/*
- * If the transaction ID has wrapped around, it's definitely too old to
- * determine the commit status. Otherwise, we can compare it to
- * ShmemVariableCache->oldestClogXid to determine whether the relevant
- * CLOG entry is guaranteed to still exist.
+ * If fxid is not older than ShmemVariableCache->oldestClogXid, the
+ * relevant CLOG entry is guaranteed to still exist. Convert
+ * ShmemVariableCache->oldestClogXid into a FullTransactionId to compare
+ * it with fxid. Determine the right epoch knowing that oldest_fxid
+ * shouldn't be more than 2^31 older than now_fullxid.
*/
- if (xid_epoch + 1 < now_epoch
- || (xid_epoch + 1 == now_epoch && xid < now_epoch_next_xid)
- || TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid))
- return false;
-
- return true;
+ oldest_xid = ShmemVariableCache->oldestClogXid;
+ Assert(TransactionIdPrecedesOrEquals(oldest_xid, now_epoch_next_xid));
+ if (oldest_xid <= now_epoch_next_xid)
+ {
+ oldest_fxid = FullTransactionIdFromEpochAndXid(now_epoch, oldest_xid);
+ }
+ else
+ {
+ Assert(now_epoch > 0);
+ oldest_fxid = FullTransactionIdFromEpochAndXid(now_epoch - 1, oldest_xid);
+ }
+ return !FullTransactionIdPrecedes(fxid, oldest_fxid);
}
/*
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index e85e564..a2ebb50 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -89,6 +89,8 @@ static void CatCachePrintStats(int code, Datum arg);
#endif
static void CatCacheRemoveCTup(CatCache *cache, CatCTup *ct);
static void CatCacheRemoveCList(CatCache *cache, CatCList *cl);
+static void RehashCatCache(CatCache *cp);
+static void RehashCatCacheLists(CatCache *cp);
static void CatalogCacheInitializeCache(CatCache *cache);
static CatCTup *CatalogCacheCreateEntry(CatCache *cache,
HeapTuple ntp, SysScanDesc scandesc,
@@ -393,6 +395,7 @@ CatCachePrintStats(int code, Datum arg)
long cc_neg_hits = 0;
long cc_newloads = 0;
long cc_invals = 0;
+ long cc_nlists = 0;
long cc_lsearches = 0;
long cc_lhits = 0;
@@ -402,7 +405,7 @@ CatCachePrintStats(int code, Datum arg)
if (cache->cc_ntup == 0 && cache->cc_searches == 0)
continue; /* don't print unused caches */
- elog(DEBUG2, "catcache %s/%u: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lsrch, %ld lhits",
+ elog(DEBUG2, "catcache %s/%u: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %d lists, %ld lsrch, %ld lhits",
cache->cc_relname,
cache->cc_indexoid,
cache->cc_ntup,
@@ -414,6 +417,7 @@ CatCachePrintStats(int code, Datum arg)
cache->cc_searches - cache->cc_hits - cache->cc_neg_hits - cache->cc_newloads,
cache->cc_searches - cache->cc_hits - cache->cc_neg_hits,
cache->cc_invals,
+ cache->cc_nlist,
cache->cc_lsearches,
cache->cc_lhits);
cc_searches += cache->cc_searches;
@@ -421,10 +425,11 @@ CatCachePrintStats(int code, Datum arg)
cc_neg_hits += cache->cc_neg_hits;
cc_newloads += cache->cc_newloads;
cc_invals += cache->cc_invals;
+ cc_nlists += cache->cc_nlist;
cc_lsearches += cache->cc_lsearches;
cc_lhits += cache->cc_lhits;
}
- elog(DEBUG2, "catcache totals: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lsrch, %ld lhits",
+ elog(DEBUG2, "catcache totals: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lists, %ld lsrch, %ld lhits",
CacheHdr->ch_ntup,
cc_searches,
cc_hits,
@@ -434,6 +439,7 @@ CatCachePrintStats(int code, Datum arg)
cc_searches - cc_hits - cc_neg_hits - cc_newloads,
cc_searches - cc_hits - cc_neg_hits,
cc_invals,
+ cc_nlists,
cc_lsearches,
cc_lhits);
}
@@ -522,6 +528,8 @@ CatCacheRemoveCList(CatCache *cache, CatCList *cl)
cache->cc_keyno, cl->keys);
pfree(cl);
+
+ --cache->cc_nlist;
}
@@ -560,14 +568,19 @@ CatCacheInvalidate(CatCache *cache, uint32 hashValue)
* Invalidate *all* CatCLists in this cache; it's too hard to tell which
* searches might still be correct, so just zap 'em all.
*/
- dlist_foreach_modify(iter, &cache->cc_lists)
+ for (int i = 0; i < cache->cc_nlbuckets; i++)
{
- CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur);
+ dlist_head *bucket = &cache->cc_lbucket[i];
- if (cl->refcount > 0)
- cl->dead = true;
- else
- CatCacheRemoveCList(cache, cl);
+ dlist_foreach_modify(iter, bucket)
+ {
+ CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur);
+
+ if (cl->refcount > 0)
+ cl->dead = true;
+ else
+ CatCacheRemoveCList(cache, cl);
+ }
}
/*
@@ -640,14 +653,19 @@ ResetCatalogCache(CatCache *cache)
int i;
/* Remove each list in this cache, or at least mark it dead */
- dlist_foreach_modify(iter, &cache->cc_lists)
+ for (i = 0; i < cache->cc_nlbuckets; i++)
{
- CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur);
+ dlist_head *bucket = &cache->cc_lbucket[i];
- if (cl->refcount > 0)
- cl->dead = true;
- else
- CatCacheRemoveCList(cache, cl);
+ dlist_foreach_modify(iter, bucket)
+ {
+ CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur);
+
+ if (cl->refcount > 0)
+ cl->dead = true;
+ else
+ CatCacheRemoveCList(cache, cl);
+ }
}
/* Remove each tuple in this cache, or at least mark it dead */
@@ -812,6 +830,12 @@ InitCatCache(int id,
cp->cc_bucket = palloc0(nbuckets * sizeof(dlist_head));
/*
+ * Many catcaches never receive any list searches. Therefore, we don't
+ * allocate the cc_lbuckets till we get a list search.
+ */
+ cp->cc_lbucket = NULL;
+
+ /*
* initialize the cache's relation information for the relation
* corresponding to this cache, and initialize some of the new cache's
* other internal fields. But don't open the relation yet.
@@ -823,7 +847,9 @@ InitCatCache(int id,
cp->cc_relisshared = false; /* temporary */
cp->cc_tupdesc = (TupleDesc) NULL;
cp->cc_ntup = 0;
+ cp->cc_nlist = 0;
cp->cc_nbuckets = nbuckets;
+ cp->cc_nlbuckets = 0;
cp->cc_nkeys = nkeys;
for (i = 0; i < nkeys; ++i)
cp->cc_keyno[i] = key[i];
@@ -886,6 +912,44 @@ RehashCatCache(CatCache *cp)
}
/*
+ * Enlarge a catcache's list storage, doubling the number of buckets.
+ */
+static void
+RehashCatCacheLists(CatCache *cp)
+{
+ dlist_head *newbucket;
+ int newnbuckets;
+ int i;
+
+ elog(DEBUG1, "rehashing catalog cache id %d for %s; %d lists, %d buckets",
+ cp->id, cp->cc_relname, cp->cc_nlist, cp->cc_nlbuckets);
+
+ /* Allocate a new, larger, hash table. */
+ newnbuckets = cp->cc_nlbuckets * 2;
+ newbucket = (dlist_head *) MemoryContextAllocZero(CacheMemoryContext, newnbuckets * sizeof(dlist_head));
+
+ /* Move all entries from old hash table to new. */
+ for (i = 0; i < cp->cc_nlbuckets; i++)
+ {
+ dlist_mutable_iter iter;
+
+ dlist_foreach_modify(iter, &cp->cc_lbucket[i])
+ {
+ CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur);
+ int hashIndex = HASH_INDEX(cl->hash_value, newnbuckets);
+
+ dlist_delete(iter.cur);
+ dlist_push_head(&newbucket[hashIndex], &cl->cache_elem);
+ }
+ }
+
+ /* Switch to the new array. */
+ pfree(cp->cc_lbucket);
+ cp->cc_nlbuckets = newnbuckets;
+ cp->cc_lbucket = newbucket;
+}
+
+/*
* CatalogCacheInitializeCache
*
* This function does final initialization of a catcache: obtain the tuple
@@ -1527,7 +1591,9 @@ SearchCatCacheList(CatCache *cache,
Datum v4 = 0; /* dummy last-column value */
Datum arguments[CATCACHE_MAXKEYS];
uint32 lHashValue;
+ Index lHashIndex;
dlist_iter iter;
+ dlist_head *lbucket;
CatCList *cl;
CatCTup *ct;
List *volatile ctlist;
@@ -1541,7 +1607,7 @@ SearchCatCacheList(CatCache *cache,
/*
* one-time startup overhead for each cache
*/
- if (cache->cc_tupdesc == NULL)
+ if (unlikely(cache->cc_tupdesc == NULL))
CatalogCacheInitializeCache(cache);
Assert(nkeys > 0 && nkeys < cache->cc_nkeys);
@@ -1557,11 +1623,36 @@ SearchCatCacheList(CatCache *cache,
arguments[3] = v4;
/*
- * compute a hash value of the given keys for faster search. We don't
- * presently divide the CatCList items into buckets, but this still lets
- * us skip non-matching items quickly most of the time.
+ * If we haven't previously done a list search in this cache, create the
+ * bucket header array; otherwise, consider whether it's time to enlarge
+ * it.
+ */
+ if (cache->cc_lbucket == NULL)
+ {
+ /* Arbitrary initial size --- must be a power of 2 */
+ int nbuckets = 16;
+
+ cache->cc_lbucket = (dlist_head *)
+ MemoryContextAllocZero(CacheMemoryContext,
+ nbuckets * sizeof(dlist_head));
+ /* Don't set cc_nlbuckets if we get OOM allocating cc_lbucket */
+ cache->cc_nlbuckets = nbuckets;
+ }
+ else
+ {
+ /*
+ * If the hash table has become too full, enlarge the buckets array.
+ * Quite arbitrarily, we enlarge when fill factor > 2.
+ */
+ if (cache->cc_nlist > cache->cc_nlbuckets * 2)
+ RehashCatCacheLists(cache);
+ }
+
+ /*
+ * Find the hash bucket in which to look for the CatCList.
*/
lHashValue = CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4);
+ lHashIndex = HASH_INDEX(lHashValue, cache->cc_nlbuckets);
/*
* scan the items until we find a match or exhaust our list
@@ -1569,7 +1660,8 @@ SearchCatCacheList(CatCache *cache,
* Note: it's okay to use dlist_foreach here, even though we modify the
* dlist within the loop, because we don't continue the loop afterwards.
*/
- dlist_foreach(iter, &cache->cc_lists)
+ lbucket = &cache->cc_lbucket[lHashIndex];
+ dlist_foreach(iter, lbucket)
{
cl = dlist_container(CatCList, cache_elem, iter.cur);
@@ -1589,13 +1681,13 @@ SearchCatCacheList(CatCache *cache,
continue;
/*
- * We found a matching list. Move the list to the front of the
- * cache's list-of-lists, to speed subsequent searches. (We do not
+ * We found a matching list. Move the list to the front of the list
+ * for its hashbucket, so as to speed subsequent searches. (We do not
* move the members to the fronts of their hashbucket lists, however,
* since there's no point in that unless they are searched for
* individually.)
*/
- dlist_move_head(&cache->cc_lists, &cl->cache_elem);
+ dlist_move_head(lbucket, &cl->cache_elem);
/* Bump the list's refcount and return it */
ResourceOwnerEnlargeCatCacheListRefs(CurrentResourceOwner);
@@ -1806,7 +1898,12 @@ SearchCatCacheList(CatCache *cache,
}
Assert(i == nmembers);
- dlist_push_head(&cache->cc_lists, &cl->cache_elem);
+ /*
+ * Add the CatCList to the appropriate bucket, and count it.
+ */
+ dlist_push_head(lbucket, &cl->cache_elem);
+
+ cache->cc_nlist++;
/* Finally, bump the list's refcount and return it */
cl->refcount++;
diff --git a/src/backend/utils/fmgr/funcapi.c b/src/backend/utils/fmgr/funcapi.c
index 24683bb..4e925d5 100644
--- a/src/backend/utils/fmgr/funcapi.c
+++ b/src/backend/utils/fmgr/funcapi.c
@@ -287,6 +287,13 @@ get_call_result_type(FunctionCallInfo fcinfo,
/*
* get_expr_result_type
* As above, but work from a calling expression node tree
+ *
+ * Beware of using this on the funcexpr of a RTE that has a coldeflist.
+ * The correct conclusion in such cases is always that the function returns
+ * RECORD with the columns defined by the coldeflist fields (funccolnames etc).
+ * If it does not, it's the executor's responsibility to catch the discrepancy
+ * at runtime; but code processing the query in advance of that point might
+ * come to inconsistent conclusions if it checks the actual expression.
*/
TypeFuncClass
get_expr_result_type(Node *expr,
@@ -537,7 +544,8 @@ internal_get_result_type(Oid funcid,
* if noError is true, else throws error.
*
* This is a simpler version of get_expr_result_type() for use when the caller
- * is only interested in determinate rowtype results.
+ * is only interested in determinate rowtype results. As with that function,
+ * beware of using this on the funcexpr of a RTE that has a coldeflist.
*/
TupleDesc
get_expr_result_tupdesc(Node *expr, bool noError)
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index f7add12..904e918 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -193,9 +193,9 @@
#effective_io_concurrency = 1 # 1-1000; 0 disables prefetching
#maintenance_io_concurrency = 10 # 1-1000; 0 disables prefetching
#max_worker_processes = 8 # (change requires restart)
-#max_parallel_workers_per_gather = 2 # taken from max_parallel_workers
-#max_parallel_maintenance_workers = 2 # taken from max_parallel_workers
-#max_parallel_workers = 8 # maximum number of max_worker_processes that
+#max_parallel_workers_per_gather = 2 # limited by max_parallel_workers
+#max_parallel_maintenance_workers = 2 # limited by max_parallel_workers
+#max_parallel_workers = 8 # number of max_worker_processes that
# can be used in parallel operations
#parallel_leader_participation = on
#old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate
diff --git a/src/backend/utils/misc/ps_status.c b/src/backend/utils/misc/ps_status.c
index 104c01d..b364902 100644
--- a/src/backend/utils/misc/ps_status.c
+++ b/src/backend/utils/misc/ps_status.c
@@ -109,7 +109,8 @@ static char **save_argv;
* (The original argv[] will not be overwritten by this routine, but may be
* overwritten during init_ps_display. Also, the physical location of the
* environment strings may be moved, so this should be called before any code
- * that might try to hang onto a getenv() result.)
+ * that might try to hang onto a getenv() result. But see hack for musl
+ * within.)
*
* Note that in case of failure this cannot call elog() as that is not
* initialized yet. We rely on write_stderr() instead.
@@ -124,7 +125,7 @@ save_ps_display_args(int argc, char **argv)
/*
* If we're going to overwrite the argv area, count the available space.
- * Also move the environment to make additional room.
+ * Also move the environment strings to make additional room.
*/
{
char *end_of_area = NULL;
@@ -153,7 +154,33 @@ save_ps_display_args(int argc, char **argv)
for (i = 0; environ[i] != NULL; i++)
{
if (end_of_area + 1 == environ[i])
- end_of_area = environ[i] + strlen(environ[i]);
+ {
+ /*
+ * The musl dynamic linker keeps a static pointer to the
+ * initial value of LD_LIBRARY_PATH, if that is defined in the
+ * process's environment. Therefore, we must not overwrite the
+ * value of that setting and thus cannot advance end_of_area
+ * beyond it. Musl does not define any identifying compiler
+ * symbol, so we have to do this unless we see a symbol
+ * identifying a Linux libc we know is safe.
+ */
+#if defined(__linux__) && (!defined(__GLIBC__) && !defined(__UCLIBC__))
+ if (strncmp(environ[i], "LD_LIBRARY_PATH=", 16) == 0)
+ {
+ /*
+ * We can overwrite the name, but stop at the equals sign.
+ * Future loop iterations will not find any more
+ * contiguous space, but we don't break early because we
+ * need to count the total number of environ[] entries.
+ */
+ end_of_area = environ[i] + 15;
+ }
+ else
+#endif
+ {
+ end_of_area = environ[i] + strlen(environ[i]);
+ }
+ }
}
ps_buffer = argv[0];
@@ -185,7 +212,7 @@ save_ps_display_args(int argc, char **argv)
* If we're going to change the original argv[] then make a copy for
* argument parsing purposes.
*
- * (NB: do NOT think to remove the copying of argv[], even though
+ * NB: do NOT think to remove the copying of argv[], even though
* postmaster.c finishes looking at argv[] long before we ever consider
* changing the ps display. On some platforms, getopt() keeps pointers
* into the argv array, and will get horribly confused when it is
diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c
index 8d1aace..bc6c76b 100644
--- a/src/backend/utils/mmgr/dsa.c
+++ b/src/backend/utils/mmgr/dsa.c
@@ -1105,9 +1105,13 @@ dsa_dump(dsa_area *area)
{
dsa_segment_index segment_index;
- fprintf(stderr,
- " segment bin %zu (at least %d contiguous pages free):\n",
- i, 1 << (i - 1));
+ if (i == 0)
+ fprintf(stderr,
+ " segment bin %zu (no contiguous free pages):\n", i);
+ else
+ fprintf(stderr,
+ " segment bin %zu (at least %d contiguous pages free):\n",
+ i, 1 << (i - 1));
segment_index = area->control->segment_bins[i];
while (segment_index != DSA_SEGMENT_INDEX_NONE)
{
diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c
index f31878b..14375b0 100644
--- a/src/backend/utils/sort/logtape.c
+++ b/src/backend/utils/sort/logtape.c
@@ -1174,10 +1174,8 @@ LogicalTapeTell(LogicalTape *lt, long *blocknum, int *offset)
}
/*
- * Obtain total disk space currently used by a LogicalTapeSet, in blocks.
- *
- * This should not be called while there are open write buffers; otherwise it
- * may not account for buffered data.
+ * Obtain total disk space currently used by a LogicalTapeSet, in blocks. Does
+ * not account for open write buffer, if any.
*/
long
LogicalTapeSetBlocks(LogicalTapeSet *lts)