Adding upstream version 14.5.upstream/14.5 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:15:05 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:15:05 +0000
commit: 46651ce6fe013220ed397add242004d764fc0153 (patch)
tree: 6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/optimizer/plan/planagg.c
parent: Initial commit. (diff)
download: postgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz
postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip
1 files changed, 513 insertions, 0 deletions
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
new file mode 100644
index 0000000..c1634d1
--- /dev/null
+++ b/src/backend/optimizer/plan/planagg.c
@@ -0,0 +1,513 @@
+/*-------------------------------------------------------------------------
+ *
+ * planagg.c
+ *	  Special planning for aggregate queries.
+ *
+ * This module tries to replace MIN/MAX aggregate functions by subqueries
+ * of the form
+ *		(SELECT col FROM tab
+ *		 WHERE col IS NOT NULL AND existing-quals
+ *		 ORDER BY col ASC/DESC
+ *		 LIMIT 1)
+ * Given a suitable index on tab.col, this can be much faster than the
+ * generic scan-all-the-rows aggregation plan.  We can handle multiple
+ * MIN/MAX aggregates by generating multiple subqueries, and their
+ * orderings can be different.  However, if the query contains any
+ * non-optimizable aggregates, there's no point since we'll have to
+ * scan all the rows anyway.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/optimizer/plan/planagg.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "catalog/pg_aggregate.h"
+#include "catalog/pg_type.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/planmain.h"
+#include "optimizer/subselect.h"
+#include "optimizer/tlist.h"
+#include "parser/parse_clause.h"
+#include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+static bool can_minmax_aggs(PlannerInfo *root, List **context);
+static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
+							  Oid eqop, Oid sortop, bool nulls_first);
+static void minmax_qp_callback(PlannerInfo *root, void *extra);
+static Oid	fetch_agg_sort_op(Oid aggfnoid);
+
+
+/*
+ * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
+ *
+ * Check to see whether the query contains MIN/MAX aggregate functions that
+ * might be optimizable via indexscans.  If it does, and all the aggregates
+ * are potentially optimizable, then create a MinMaxAggPath and add it to
+ * the (UPPERREL_GROUP_AGG, NULL) upperrel.
+ *
+ * This should be called by grouping_planner() just before it's ready to call
+ * query_planner(), because we generate indexscan paths by cloning the
+ * planner's state and invoking query_planner() on a modified version of
+ * the query parsetree.  Thus, all preprocessing needed before query_planner()
+ * must already be done.  This relies on the list of aggregates in
+ * root->agginfos, so preprocess_aggrefs() must have been called already, too.
+ */
+void
+preprocess_minmax_aggregates(PlannerInfo *root)
+{
+	Query	   *parse = root->parse;
+	FromExpr   *jtnode;
+	RangeTblRef *rtr;
+	RangeTblEntry *rte;
+	List	   *aggs_list;
+	RelOptInfo *grouped_rel;
+	ListCell   *lc;
+
+	/* minmax_aggs list should be empty at this point */
+	Assert(root->minmax_aggs == NIL);
+
+	/* Nothing to do if query has no aggregates */
+	if (!parse->hasAggs)
+		return;
+
+	Assert(!parse->setOperations);	/* shouldn't get here if a setop */
+	Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
+
+	/*
+	 * Reject unoptimizable cases.
+	 *
+	 * We don't handle GROUP BY or windowing, because our current
+	 * implementations of grouping require looking at all the rows anyway, and
+	 * so there's not much point in optimizing MIN/MAX.
+	 */
+	if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
+		parse->hasWindowFuncs)
+		return;
+
+	/*
+	 * Reject if query contains any CTEs; there's no way to build an indexscan
+	 * on one so we couldn't succeed here.  (If the CTEs are unreferenced,
+	 * that's not true, but it doesn't seem worth expending cycles to check.)
+	 */
+	if (parse->cteList)
+		return;
+
+	/*
+	 * We also restrict the query to reference exactly one table, since join
+	 * conditions can't be handled reasonably.  (We could perhaps handle a
+	 * query containing cartesian-product joins, but it hardly seems worth the
+	 * trouble.)  However, the single table could be buried in several levels
+	 * of FromExpr due to subqueries.  Note the "single" table could be an
+	 * inheritance parent, too, including the case of a UNION ALL subquery
+	 * that's been flattened to an appendrel.
+	 */
+	jtnode = parse->jointree;
+	while (IsA(jtnode, FromExpr))
+	{
+		if (list_length(jtnode->fromlist) != 1)
+			return;
+		jtnode = linitial(jtnode->fromlist);
+	}
+	if (!IsA(jtnode, RangeTblRef))
+		return;
+	rtr = (RangeTblRef *) jtnode;
+	rte = planner_rt_fetch(rtr->rtindex, root);
+	if (rte->rtekind == RTE_RELATION)
+		 /* ordinary relation, ok */ ;
+	else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
+		 /* flattened UNION ALL subquery, ok */ ;
+	else
+		return;
+
+	/*
+	 * Scan the tlist and HAVING qual to find all the aggregates and verify
+	 * all are MIN/MAX aggregates.  Stop as soon as we find one that isn't.
+	 */
+	aggs_list = NIL;
+	if (!can_minmax_aggs(root, &aggs_list))
+		return;
+
+	/*
+	 * OK, there is at least the possibility of performing the optimization.
+	 * Build an access path for each aggregate.  If any of the aggregates
+	 * prove to be non-indexable, give up; there is no point in optimizing
+	 * just some of them.
+	 */
+	foreach(lc, aggs_list)
+	{
+		MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
+		Oid			eqop;
+		bool		reverse;
+
+		/*
+		 * We'll need the equality operator that goes with the aggregate's
+		 * ordering operator.
+		 */
+		eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
+		if (!OidIsValid(eqop))	/* shouldn't happen */
+			elog(ERROR, "could not find equality operator for ordering operator %u",
+				 mminfo->aggsortop);
+
+		/*
+		 * We can use either an ordering that gives NULLS FIRST or one that
+		 * gives NULLS LAST; furthermore there's unlikely to be much
+		 * performance difference between them, so it doesn't seem worth
+		 * costing out both ways if we get a hit on the first one.  NULLS
+		 * FIRST is more likely to be available if the operator is a
+		 * reverse-sort operator, so try that first if reverse.
+		 */
+		if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
+			continue;
+		if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
+			continue;
+
+		/* No indexable path for this aggregate, so fail */
+		return;
+	}
+
+	/*
+	 * OK, we can do the query this way.  Prepare to create a MinMaxAggPath
+	 * node.
+	 *
+	 * First, create an output Param node for each agg.  (If we end up not
+	 * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
+	 * which is not worth worrying about.  We can't wait till create_plan time
+	 * to decide whether to make the Param, unfortunately.)
+	 */
+	foreach(lc, aggs_list)
+	{
+		MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
+
+		mminfo->param =
+			SS_make_initplan_output_param(root,
+										  exprType((Node *) mminfo->target),
+										  -1,
+										  exprCollation((Node *) mminfo->target));
+	}
+
+	/*
+	 * Create a MinMaxAggPath node with the appropriate estimated costs and
+	 * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
+	 * it will compete against the standard aggregate implementation.  (It
+	 * will likely always win, but we need not assume that here.)
+	 *
+	 * Note: grouping_planner won't have created this upperrel yet, but it's
+	 * fine for us to create it first.  We will not have inserted the correct
+	 * consider_parallel value in it, but MinMaxAggPath paths are currently
+	 * never parallel-safe anyway, so that doesn't matter.  Likewise, it
+	 * doesn't matter that we haven't filled FDW-related fields in the rel.
+	 * Also, because there are no rowmarks, we know that the processed_tlist
+	 * doesn't need to change anymore, so making the pathtarget now is safe.
+	 */
+	grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
+	add_path(grouped_rel, (Path *)
+			 create_minmaxagg_path(root, grouped_rel,
+								   create_pathtarget(root,
+													 root->processed_tlist),
+								   aggs_list,
+								   (List *) parse->havingQual));
+}
+
+/*
+ * can_minmax_aggs
+ *		Walk through all the aggregates in the query, and check
+ *		if they are all MIN/MAX aggregates.  If so, build a list of the
+ *		distinct aggregate calls in the tree.
+ *
+ * Returns false if a non-MIN/MAX aggregate is found, true otherwise.
+ *
+ * This does not descend into subqueries, and so should be used only after
+ * reduction of sublinks to subplans.  There mustn't be outer-aggregate
+ * references either.
+ */
+static bool
+can_minmax_aggs(PlannerInfo *root, List **context)
+{
+	ListCell   *lc;
+
+	foreach(lc, root->agginfos)
+	{
+		AggInfo    *agginfo = (AggInfo *) lfirst(lc);
+		Aggref	   *aggref = agginfo->representative_aggref;
+		Oid			aggsortop;
+		TargetEntry *curTarget;
+		MinMaxAggInfo *mminfo;
+
+		Assert(aggref->agglevelsup == 0);
+		if (list_length(aggref->args) != 1)
+			return false;		/* it couldn't be MIN/MAX */
+
+		/*
+		 * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
+		 * outcome if the aggsortop's operator class recognizes non-identical
+		 * values as equal.  For example, 4.0 and 4.00 are equal according to
+		 * numeric_ops, yet distinguishable.  If MIN() receives more than one
+		 * value equal to 4.0 and no value less than 4.0, it is unspecified
+		 * which of those equal values MIN() returns.  An ORDER BY expression
+		 * that differs for each of those equal values of the argument
+		 * expression makes the result predictable once again.  This is a
+		 * niche requirement, and we do not implement it with subquery paths.
+		 * In any case, this test lets us reject ordered-set aggregates
+		 * quickly.
+		 */
+		if (aggref->aggorder != NIL)
+			return false;
+		/* note: we do not care if DISTINCT is mentioned ... */
+
+		/*
+		 * We might implement the optimization when a FILTER clause is present
+		 * by adding the filter to the quals of the generated subquery.  For
+		 * now, just punt.
+		 */
+		if (aggref->aggfilter != NULL)
+			return false;
+
+		aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
+		if (!OidIsValid(aggsortop))
+			return false;		/* not a MIN/MAX aggregate */
+
+		curTarget = (TargetEntry *) linitial(aggref->args);
+
+		if (contain_mutable_functions((Node *) curTarget->expr))
+			return false;		/* not potentially indexable */
+
+		if (type_is_rowtype(exprType((Node *) curTarget->expr)))
+			return false;		/* IS NOT NULL would have weird semantics */
+
+		mminfo = makeNode(MinMaxAggInfo);
+		mminfo->aggfnoid = aggref->aggfnoid;
+		mminfo->aggsortop = aggsortop;
+		mminfo->target = curTarget->expr;
+		mminfo->subroot = NULL; /* don't compute path yet */
+		mminfo->path = NULL;
+		mminfo->pathcost = 0;
+		mminfo->param = NULL;
+
+		*context = lappend(*context, mminfo);
+	}
+	return true;
+}
+
+/*
+ * build_minmax_path
+ *		Given a MIN/MAX aggregate, try to build an indexscan Path it can be
+ *		optimized with.
+ *
+ * If successful, stash the best path in *mminfo and return true.
+ * Otherwise, return false.
+ */
+static bool
+build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
+				  Oid eqop, Oid sortop, bool nulls_first)
+{
+	PlannerInfo *subroot;
+	Query	   *parse;
+	TargetEntry *tle;
+	List	   *tlist;
+	NullTest   *ntest;
+	SortGroupClause *sortcl;
+	RelOptInfo *final_rel;
+	Path	   *sorted_path;
+	Cost		path_cost;
+	double		path_fraction;
+
+	/*
+	 * We are going to construct what is effectively a sub-SELECT query, so
+	 * clone the current query level's state and adjust it to make it look
+	 * like a subquery.  Any outer references will now be one level higher
+	 * than before.  (This means that when we are done, there will be no Vars
+	 * of level 1, which is why the subquery can become an initplan.)
+	 */
+	subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
+	memcpy(subroot, root, sizeof(PlannerInfo));
+	subroot->query_level++;
+	subroot->parent_root = root;
+	/* reset subplan-related stuff */
+	subroot->plan_params = NIL;
+	subroot->outer_params = NULL;
+	subroot->init_plans = NIL;
+	subroot->agginfos = NIL;
+	subroot->aggtransinfos = NIL;
+
+	subroot->parse = parse = copyObject(root->parse);
+	IncrementVarSublevelsUp((Node *) parse, 1, 1);
+
+	/* append_rel_list might contain outer Vars? */
+	subroot->append_rel_list = copyObject(root->append_rel_list);
+	IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
+	/* There shouldn't be any OJ info to translate, as yet */
+	Assert(subroot->join_info_list == NIL);
+	/* and we haven't made equivalence classes, either */
+	Assert(subroot->eq_classes == NIL);
+	/* and we haven't created PlaceHolderInfos, either */
+	Assert(subroot->placeholder_list == NIL);
+
+	/*----------
+	 * Generate modified query of the form
+	 *		(SELECT col FROM tab
+	 *		 WHERE col IS NOT NULL AND existing-quals
+	 *		 ORDER BY col ASC/DESC
+	 *		 LIMIT 1)
+	 *----------
+	 */
+	/* single tlist entry that is the aggregate target */
+	tle = makeTargetEntry(copyObject(mminfo->target),
+						  (AttrNumber) 1,
+						  pstrdup("agg_target"),
+						  false);
+	tlist = list_make1(tle);
+	subroot->processed_tlist = parse->targetList = tlist;
+
+	/* No HAVING, no DISTINCT, no aggregates anymore */
+	parse->havingQual = NULL;
+	subroot->hasHavingQual = false;
+	parse->distinctClause = NIL;
+	parse->hasDistinctOn = false;
+	parse->hasAggs = false;
+
+	/* Build "target IS NOT NULL" expression */
+	ntest = makeNode(NullTest);
+	ntest->nulltesttype = IS_NOT_NULL;
+	ntest->arg = copyObject(mminfo->target);
+	/* we checked it wasn't a rowtype in find_minmax_aggs_walker */
+	ntest->argisrow = false;
+	ntest->location = -1;
+
+	/* User might have had that in WHERE already */
+	if (!list_member((List *) parse->jointree->quals, ntest))
+		parse->jointree->quals = (Node *)
+			lcons(ntest, (List *) parse->jointree->quals);
+
+	/* Build suitable ORDER BY clause */
+	sortcl = makeNode(SortGroupClause);
+	sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
+	sortcl->eqop = eqop;
+	sortcl->sortop = sortop;
+	sortcl->nulls_first = nulls_first;
+	sortcl->hashable = false;	/* no need to make this accurate */
+	parse->sortClause = list_make1(sortcl);
+
+	/* set up expressions for LIMIT 1 */
+	parse->limitOffset = NULL;
+	parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+										   sizeof(int64),
+										   Int64GetDatum(1), false,
+										   FLOAT8PASSBYVAL);
+
+	/*
+	 * Generate the best paths for this query, telling query_planner that we
+	 * have LIMIT 1.
+	 */
+	subroot->tuple_fraction = 1.0;
+	subroot->limit_tuples = 1.0;
+
+	final_rel = query_planner(subroot, minmax_qp_callback, NULL);
+
+	/*
+	 * Since we didn't go through subquery_planner() to handle the subquery,
+	 * we have to do some of the same cleanup it would do, in particular cope
+	 * with params and initplans used within this subquery.  (This won't
+	 * matter if we end up not using the subplan.)
+	 */
+	SS_identify_outer_params(subroot);
+	SS_charge_for_initplans(subroot, final_rel);
+
+	/*
+	 * Get the best presorted path, that being the one that's cheapest for
+	 * fetching just one row.  If there's no such path, fail.
+	 */
+	if (final_rel->rows > 1.0)
+		path_fraction = 1.0 / final_rel->rows;
+	else
+		path_fraction = 1.0;
+
+	sorted_path =
+		get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
+												  subroot->query_pathkeys,
+												  NULL,
+												  path_fraction);
+	if (!sorted_path)
+		return false;
+
+	/*
+	 * The path might not return exactly what we want, so fix that.  (We
+	 * assume that this won't change any conclusions about which was the
+	 * cheapest path.)
+	 */
+	sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
+										   create_pathtarget(subroot,
+															 subroot->processed_tlist));
+
+	/*
+	 * Determine cost to get just the first row of the presorted path.
+	 *
+	 * Note: cost calculation here should match
+	 * compare_fractional_path_costs().
+	 */
+	path_cost = sorted_path->startup_cost +
+		path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
+
+	/* Save state for further processing */
+	mminfo->subroot = subroot;
+	mminfo->path = sorted_path;
+	mminfo->pathcost = path_cost;
+
+	return true;
+}
+
+/*
+ * Compute query_pathkeys and other pathkeys during query_planner()
+ */
+static void
+minmax_qp_callback(PlannerInfo *root, void *extra)
+{
+	root->group_pathkeys = NIL;
+	root->window_pathkeys = NIL;
+	root->distinct_pathkeys = NIL;
+
+	root->sort_pathkeys =
+		make_pathkeys_for_sortclauses(root,
+									  root->parse->sortClause,
+									  root->parse->targetList);
+
+	root->query_pathkeys = root->sort_pathkeys;
+}
+
+/*
+ * Get the OID of the sort operator, if any, associated with an aggregate.
+ * Returns InvalidOid if there is no such operator.
+ */
+static Oid
+fetch_agg_sort_op(Oid aggfnoid)
+{
+	HeapTuple	aggTuple;
+	Form_pg_aggregate aggform;
+	Oid			aggsortop;
+
+	/* fetch aggregate entry from pg_aggregate */
+	aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
+	if (!HeapTupleIsValid(aggTuple))
+		return InvalidOid;
+	aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+	aggsortop = aggform->aggsortop;
+	ReleaseSysCache(aggTuple);
+
+	return aggsortop;
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:15:05 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:15:05 +0000
commit	46651ce6fe013220ed397add242004d764fc0153 (patch)
tree	6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/optimizer/plan/planagg.c
parent	Initial commit. (diff)
download	postgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip