summaryrefslogtreecommitdiffstats
path: root/src/backend/optimizer/plan/planagg.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/optimizer/plan/planagg.c
parentInitial commit. (diff)
downloadpostgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz
postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/optimizer/plan/planagg.c')
-rw-r--r--src/backend/optimizer/plan/planagg.c513
1 files changed, 513 insertions, 0 deletions
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
new file mode 100644
index 0000000..c1634d1
--- /dev/null
+++ b/src/backend/optimizer/plan/planagg.c
@@ -0,0 +1,513 @@
+/*-------------------------------------------------------------------------
+ *
+ * planagg.c
+ * Special planning for aggregate queries.
+ *
+ * This module tries to replace MIN/MAX aggregate functions by subqueries
+ * of the form
+ * (SELECT col FROM tab
+ * WHERE col IS NOT NULL AND existing-quals
+ * ORDER BY col ASC/DESC
+ * LIMIT 1)
+ * Given a suitable index on tab.col, this can be much faster than the
+ * generic scan-all-the-rows aggregation plan. We can handle multiple
+ * MIN/MAX aggregates by generating multiple subqueries, and their
+ * orderings can be different. However, if the query contains any
+ * non-optimizable aggregates, there's no point since we'll have to
+ * scan all the rows anyway.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/plan/planagg.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "catalog/pg_aggregate.h"
+#include "catalog/pg_type.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/planmain.h"
+#include "optimizer/subselect.h"
+#include "optimizer/tlist.h"
+#include "parser/parse_clause.h"
+#include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+static bool can_minmax_aggs(PlannerInfo *root, List **context);
+static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
+ Oid eqop, Oid sortop, bool nulls_first);
+static void minmax_qp_callback(PlannerInfo *root, void *extra);
+static Oid fetch_agg_sort_op(Oid aggfnoid);
+
+
+/*
+ * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
+ *
+ * Check to see whether the query contains MIN/MAX aggregate functions that
+ * might be optimizable via indexscans. If it does, and all the aggregates
+ * are potentially optimizable, then create a MinMaxAggPath and add it to
+ * the (UPPERREL_GROUP_AGG, NULL) upperrel.
+ *
+ * This should be called by grouping_planner() just before it's ready to call
+ * query_planner(), because we generate indexscan paths by cloning the
+ * planner's state and invoking query_planner() on a modified version of
+ * the query parsetree. Thus, all preprocessing needed before query_planner()
+ * must already be done. This relies on the list of aggregates in
+ * root->agginfos, so preprocess_aggrefs() must have been called already, too.
+ */
+void
+preprocess_minmax_aggregates(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ FromExpr *jtnode;
+ RangeTblRef *rtr;
+ RangeTblEntry *rte;
+ List *aggs_list;
+ RelOptInfo *grouped_rel;
+ ListCell *lc;
+
+ /* minmax_aggs list should be empty at this point */
+ Assert(root->minmax_aggs == NIL);
+
+ /* Nothing to do if query has no aggregates */
+ if (!parse->hasAggs)
+ return;
+
+ Assert(!parse->setOperations); /* shouldn't get here if a setop */
+ Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
+
+ /*
+ * Reject unoptimizable cases.
+ *
+ * We don't handle GROUP BY or windowing, because our current
+ * implementations of grouping require looking at all the rows anyway, and
+ * so there's not much point in optimizing MIN/MAX.
+ */
+ if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
+ parse->hasWindowFuncs)
+ return;
+
+ /*
+ * Reject if query contains any CTEs; there's no way to build an indexscan
+ * on one so we couldn't succeed here. (If the CTEs are unreferenced,
+ * that's not true, but it doesn't seem worth expending cycles to check.)
+ */
+ if (parse->cteList)
+ return;
+
+ /*
+ * We also restrict the query to reference exactly one table, since join
+ * conditions can't be handled reasonably. (We could perhaps handle a
+ * query containing cartesian-product joins, but it hardly seems worth the
+ * trouble.) However, the single table could be buried in several levels
+ * of FromExpr due to subqueries. Note the "single" table could be an
+ * inheritance parent, too, including the case of a UNION ALL subquery
+ * that's been flattened to an appendrel.
+ */
+ jtnode = parse->jointree;
+ while (IsA(jtnode, FromExpr))
+ {
+ if (list_length(jtnode->fromlist) != 1)
+ return;
+ jtnode = linitial(jtnode->fromlist);
+ }
+ if (!IsA(jtnode, RangeTblRef))
+ return;
+ rtr = (RangeTblRef *) jtnode;
+ rte = planner_rt_fetch(rtr->rtindex, root);
+ if (rte->rtekind == RTE_RELATION)
+ /* ordinary relation, ok */ ;
+ else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
+ /* flattened UNION ALL subquery, ok */ ;
+ else
+ return;
+
+ /*
+ * Scan the tlist and HAVING qual to find all the aggregates and verify
+ * all are MIN/MAX aggregates. Stop as soon as we find one that isn't.
+ */
+ aggs_list = NIL;
+ if (!can_minmax_aggs(root, &aggs_list))
+ return;
+
+ /*
+ * OK, there is at least the possibility of performing the optimization.
+ * Build an access path for each aggregate. If any of the aggregates
+ * prove to be non-indexable, give up; there is no point in optimizing
+ * just some of them.
+ */
+ foreach(lc, aggs_list)
+ {
+ MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
+ Oid eqop;
+ bool reverse;
+
+ /*
+ * We'll need the equality operator that goes with the aggregate's
+ * ordering operator.
+ */
+ eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
+ if (!OidIsValid(eqop)) /* shouldn't happen */
+ elog(ERROR, "could not find equality operator for ordering operator %u",
+ mminfo->aggsortop);
+
+ /*
+ * We can use either an ordering that gives NULLS FIRST or one that
+ * gives NULLS LAST; furthermore there's unlikely to be much
+ * performance difference between them, so it doesn't seem worth
+ * costing out both ways if we get a hit on the first one. NULLS
+ * FIRST is more likely to be available if the operator is a
+ * reverse-sort operator, so try that first if reverse.
+ */
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
+ continue;
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
+ continue;
+
+ /* No indexable path for this aggregate, so fail */
+ return;
+ }
+
+ /*
+ * OK, we can do the query this way. Prepare to create a MinMaxAggPath
+ * node.
+ *
+ * First, create an output Param node for each agg. (If we end up not
+ * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
+ * which is not worth worrying about. We can't wait till create_plan time
+ * to decide whether to make the Param, unfortunately.)
+ */
+ foreach(lc, aggs_list)
+ {
+ MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
+
+ mminfo->param =
+ SS_make_initplan_output_param(root,
+ exprType((Node *) mminfo->target),
+ -1,
+ exprCollation((Node *) mminfo->target));
+ }
+
+ /*
+ * Create a MinMaxAggPath node with the appropriate estimated costs and
+ * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
+ * it will compete against the standard aggregate implementation. (It
+ * will likely always win, but we need not assume that here.)
+ *
+ * Note: grouping_planner won't have created this upperrel yet, but it's
+ * fine for us to create it first. We will not have inserted the correct
+ * consider_parallel value in it, but MinMaxAggPath paths are currently
+ * never parallel-safe anyway, so that doesn't matter. Likewise, it
+ * doesn't matter that we haven't filled FDW-related fields in the rel.
+ * Also, because there are no rowmarks, we know that the processed_tlist
+ * doesn't need to change anymore, so making the pathtarget now is safe.
+ */
+ grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
+ add_path(grouped_rel, (Path *)
+ create_minmaxagg_path(root, grouped_rel,
+ create_pathtarget(root,
+ root->processed_tlist),
+ aggs_list,
+ (List *) parse->havingQual));
+}
+
+/*
+ * can_minmax_aggs
+ * Walk through all the aggregates in the query, and check
+ * if they are all MIN/MAX aggregates. If so, build a list of the
+ * distinct aggregate calls in the tree.
+ *
+ * Returns false if a non-MIN/MAX aggregate is found, true otherwise.
+ *
+ * This does not descend into subqueries, and so should be used only after
+ * reduction of sublinks to subplans. There mustn't be outer-aggregate
+ * references either.
+ */
+static bool
+can_minmax_aggs(PlannerInfo *root, List **context)
+{
+ ListCell *lc;
+
+ foreach(lc, root->agginfos)
+ {
+ AggInfo *agginfo = (AggInfo *) lfirst(lc);
+ Aggref *aggref = agginfo->representative_aggref;
+ Oid aggsortop;
+ TargetEntry *curTarget;
+ MinMaxAggInfo *mminfo;
+
+ Assert(aggref->agglevelsup == 0);
+ if (list_length(aggref->args) != 1)
+ return false; /* it couldn't be MIN/MAX */
+
+ /*
+ * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
+ * outcome if the aggsortop's operator class recognizes non-identical
+ * values as equal. For example, 4.0 and 4.00 are equal according to
+ * numeric_ops, yet distinguishable. If MIN() receives more than one
+ * value equal to 4.0 and no value less than 4.0, it is unspecified
+ * which of those equal values MIN() returns. An ORDER BY expression
+ * that differs for each of those equal values of the argument
+ * expression makes the result predictable once again. This is a
+ * niche requirement, and we do not implement it with subquery paths.
+ * In any case, this test lets us reject ordered-set aggregates
+ * quickly.
+ */
+ if (aggref->aggorder != NIL)
+ return false;
+ /* note: we do not care if DISTINCT is mentioned ... */
+
+ /*
+ * We might implement the optimization when a FILTER clause is present
+ * by adding the filter to the quals of the generated subquery. For
+ * now, just punt.
+ */
+ if (aggref->aggfilter != NULL)
+ return false;
+
+ aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
+ if (!OidIsValid(aggsortop))
+ return false; /* not a MIN/MAX aggregate */
+
+ curTarget = (TargetEntry *) linitial(aggref->args);
+
+ if (contain_mutable_functions((Node *) curTarget->expr))
+ return false; /* not potentially indexable */
+
+ if (type_is_rowtype(exprType((Node *) curTarget->expr)))
+ return false; /* IS NOT NULL would have weird semantics */
+
+ mminfo = makeNode(MinMaxAggInfo);
+ mminfo->aggfnoid = aggref->aggfnoid;
+ mminfo->aggsortop = aggsortop;
+ mminfo->target = curTarget->expr;
+ mminfo->subroot = NULL; /* don't compute path yet */
+ mminfo->path = NULL;
+ mminfo->pathcost = 0;
+ mminfo->param = NULL;
+
+ *context = lappend(*context, mminfo);
+ }
+ return true;
+}
+
+/*
+ * build_minmax_path
+ * Given a MIN/MAX aggregate, try to build an indexscan Path it can be
+ * optimized with.
+ *
+ * If successful, stash the best path in *mminfo and return true.
+ * Otherwise, return false.
+ */
+static bool
+build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
+ Oid eqop, Oid sortop, bool nulls_first)
+{
+ PlannerInfo *subroot;
+ Query *parse;
+ TargetEntry *tle;
+ List *tlist;
+ NullTest *ntest;
+ SortGroupClause *sortcl;
+ RelOptInfo *final_rel;
+ Path *sorted_path;
+ Cost path_cost;
+ double path_fraction;
+
+ /*
+ * We are going to construct what is effectively a sub-SELECT query, so
+ * clone the current query level's state and adjust it to make it look
+ * like a subquery. Any outer references will now be one level higher
+ * than before. (This means that when we are done, there will be no Vars
+ * of level 1, which is why the subquery can become an initplan.)
+ */
+ subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
+ memcpy(subroot, root, sizeof(PlannerInfo));
+ subroot->query_level++;
+ subroot->parent_root = root;
+ /* reset subplan-related stuff */
+ subroot->plan_params = NIL;
+ subroot->outer_params = NULL;
+ subroot->init_plans = NIL;
+ subroot->agginfos = NIL;
+ subroot->aggtransinfos = NIL;
+
+ subroot->parse = parse = copyObject(root->parse);
+ IncrementVarSublevelsUp((Node *) parse, 1, 1);
+
+ /* append_rel_list might contain outer Vars? */
+ subroot->append_rel_list = copyObject(root->append_rel_list);
+ IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
+ /* There shouldn't be any OJ info to translate, as yet */
+ Assert(subroot->join_info_list == NIL);
+ /* and we haven't made equivalence classes, either */
+ Assert(subroot->eq_classes == NIL);
+ /* and we haven't created PlaceHolderInfos, either */
+ Assert(subroot->placeholder_list == NIL);
+
+ /*----------
+ * Generate modified query of the form
+ * (SELECT col FROM tab
+ * WHERE col IS NOT NULL AND existing-quals
+ * ORDER BY col ASC/DESC
+ * LIMIT 1)
+ *----------
+ */
+ /* single tlist entry that is the aggregate target */
+ tle = makeTargetEntry(copyObject(mminfo->target),
+ (AttrNumber) 1,
+ pstrdup("agg_target"),
+ false);
+ tlist = list_make1(tle);
+ subroot->processed_tlist = parse->targetList = tlist;
+
+ /* No HAVING, no DISTINCT, no aggregates anymore */
+ parse->havingQual = NULL;
+ subroot->hasHavingQual = false;
+ parse->distinctClause = NIL;
+ parse->hasDistinctOn = false;
+ parse->hasAggs = false;
+
+ /* Build "target IS NOT NULL" expression */
+ ntest = makeNode(NullTest);
+ ntest->nulltesttype = IS_NOT_NULL;
+ ntest->arg = copyObject(mminfo->target);
+ /* we checked it wasn't a rowtype in find_minmax_aggs_walker */
+ ntest->argisrow = false;
+ ntest->location = -1;
+
+ /* User might have had that in WHERE already */
+ if (!list_member((List *) parse->jointree->quals, ntest))
+ parse->jointree->quals = (Node *)
+ lcons(ntest, (List *) parse->jointree->quals);
+
+ /* Build suitable ORDER BY clause */
+ sortcl = makeNode(SortGroupClause);
+ sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
+ sortcl->eqop = eqop;
+ sortcl->sortop = sortop;
+ sortcl->nulls_first = nulls_first;
+ sortcl->hashable = false; /* no need to make this accurate */
+ parse->sortClause = list_make1(sortcl);
+
+ /* set up expressions for LIMIT 1 */
+ parse->limitOffset = NULL;
+ parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+ sizeof(int64),
+ Int64GetDatum(1), false,
+ FLOAT8PASSBYVAL);
+
+ /*
+ * Generate the best paths for this query, telling query_planner that we
+ * have LIMIT 1.
+ */
+ subroot->tuple_fraction = 1.0;
+ subroot->limit_tuples = 1.0;
+
+ final_rel = query_planner(subroot, minmax_qp_callback, NULL);
+
+ /*
+ * Since we didn't go through subquery_planner() to handle the subquery,
+ * we have to do some of the same cleanup it would do, in particular cope
+ * with params and initplans used within this subquery. (This won't
+ * matter if we end up not using the subplan.)
+ */
+ SS_identify_outer_params(subroot);
+ SS_charge_for_initplans(subroot, final_rel);
+
+ /*
+ * Get the best presorted path, that being the one that's cheapest for
+ * fetching just one row. If there's no such path, fail.
+ */
+ if (final_rel->rows > 1.0)
+ path_fraction = 1.0 / final_rel->rows;
+ else
+ path_fraction = 1.0;
+
+ sorted_path =
+ get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
+ subroot->query_pathkeys,
+ NULL,
+ path_fraction);
+ if (!sorted_path)
+ return false;
+
+ /*
+ * The path might not return exactly what we want, so fix that. (We
+ * assume that this won't change any conclusions about which was the
+ * cheapest path.)
+ */
+ sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
+ create_pathtarget(subroot,
+ subroot->processed_tlist));
+
+ /*
+ * Determine cost to get just the first row of the presorted path.
+ *
+ * Note: cost calculation here should match
+ * compare_fractional_path_costs().
+ */
+ path_cost = sorted_path->startup_cost +
+ path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
+
+ /* Save state for further processing */
+ mminfo->subroot = subroot;
+ mminfo->path = sorted_path;
+ mminfo->pathcost = path_cost;
+
+ return true;
+}
+
+/*
+ * Compute query_pathkeys and other pathkeys during query_planner()
+ */
+static void
+minmax_qp_callback(PlannerInfo *root, void *extra)
+{
+ root->group_pathkeys = NIL;
+ root->window_pathkeys = NIL;
+ root->distinct_pathkeys = NIL;
+
+ root->sort_pathkeys =
+ make_pathkeys_for_sortclauses(root,
+ root->parse->sortClause,
+ root->parse->targetList);
+
+ root->query_pathkeys = root->sort_pathkeys;
+}
+
+/*
+ * Get the OID of the sort operator, if any, associated with an aggregate.
+ * Returns InvalidOid if there is no such operator.
+ */
+static Oid
+fetch_agg_sort_op(Oid aggfnoid)
+{
+ HeapTuple aggTuple;
+ Form_pg_aggregate aggform;
+ Oid aggsortop;
+
+ /* fetch aggregate entry from pg_aggregate */
+ aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
+ if (!HeapTupleIsValid(aggTuple))
+ return InvalidOid;
+ aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+ aggsortop = aggform->aggsortop;
+ ReleaseSysCache(aggTuple);
+
+ return aggsortop;
+}