/*------------------------------------------------------------------------- * * planagg.c * Special planning for aggregate queries. * * This module tries to replace MIN/MAX aggregate functions by subqueries * of the form * (SELECT col FROM tab * WHERE col IS NOT NULL AND existing-quals * ORDER BY col ASC/DESC * LIMIT 1) * Given a suitable index on tab.col, this can be much faster than the * generic scan-all-the-rows aggregation plan. We can handle multiple * MIN/MAX aggregates by generating multiple subqueries, and their * orderings can be different. However, if the query contains any * non-optimizable aggregates, there's no point since we'll have to * scan all the rows anyway. * * * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/optimizer/plan/planagg.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/htup_details.h" #include "catalog/pg_aggregate.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" #include "optimizer/subselect.h" #include "optimizer/tlist.h" #include "parser/parse_clause.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" #include "utils/syscache.h" static bool can_minmax_aggs(PlannerInfo *root, List **context); static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, Oid eqop, Oid sortop, bool nulls_first); static void minmax_qp_callback(PlannerInfo *root, void *extra); static Oid fetch_agg_sort_op(Oid aggfnoid); /* * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates * * Check to see whether the query contains MIN/MAX aggregate functions that * might be optimizable via indexscans. If it does, and all the aggregates * are potentially optimizable, then create a MinMaxAggPath and add it to * the (UPPERREL_GROUP_AGG, NULL) upperrel. * * This should be called by grouping_planner() just before it's ready to call * query_planner(), because we generate indexscan paths by cloning the * planner's state and invoking query_planner() on a modified version of * the query parsetree. Thus, all preprocessing needed before query_planner() * must already be done. This relies on the list of aggregates in * root->agginfos, so preprocess_aggrefs() must have been called already, too. */ void preprocess_minmax_aggregates(PlannerInfo *root) { Query *parse = root->parse; FromExpr *jtnode; RangeTblRef *rtr; RangeTblEntry *rte; List *aggs_list; RelOptInfo *grouped_rel; ListCell *lc; /* minmax_aggs list should be empty at this point */ Assert(root->minmax_aggs == NIL); /* Nothing to do if query has no aggregates */ if (!parse->hasAggs) return; Assert(!parse->setOperations); /* shouldn't get here if a setop */ Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */ /* * Reject unoptimizable cases. * * We don't handle GROUP BY or windowing, because our current * implementations of grouping require looking at all the rows anyway, and * so there's not much point in optimizing MIN/MAX. */ if (parse->groupClause || list_length(parse->groupingSets) > 1 || parse->hasWindowFuncs) return; /* * Reject if query contains any CTEs; there's no way to build an indexscan * on one so we couldn't succeed here. (If the CTEs are unreferenced, * that's not true, but it doesn't seem worth expending cycles to check.) */ if (parse->cteList) return; /* * We also restrict the query to reference exactly one table, since join * conditions can't be handled reasonably. (We could perhaps handle a * query containing cartesian-product joins, but it hardly seems worth the * trouble.) However, the single table could be buried in several levels * of FromExpr due to subqueries. Note the "single" table could be an * inheritance parent, too, including the case of a UNION ALL subquery * that's been flattened to an appendrel. */ jtnode = parse->jointree; while (IsA(jtnode, FromExpr)) { if (list_length(jtnode->fromlist) != 1) return; jtnode = linitial(jtnode->fromlist); } if (!IsA(jtnode, RangeTblRef)) return; rtr = (RangeTblRef *) jtnode; rte = planner_rt_fetch(rtr->rtindex, root); if (rte->rtekind == RTE_RELATION) /* ordinary relation, ok */ ; else if (rte->rtekind == RTE_SUBQUERY && rte->inh) /* flattened UNION ALL subquery, ok */ ; else return; /* * Scan the tlist and HAVING qual to find all the aggregates and verify * all are MIN/MAX aggregates. Stop as soon as we find one that isn't. */ aggs_list = NIL; if (!can_minmax_aggs(root, &aggs_list)) return; /* * OK, there is at least the possibility of performing the optimization. * Build an access path for each aggregate. If any of the aggregates * prove to be non-indexable, give up; there is no point in optimizing * just some of them. */ foreach(lc, aggs_list) { MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); Oid eqop; bool reverse; /* * We'll need the equality operator that goes with the aggregate's * ordering operator. */ eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse); if (!OidIsValid(eqop)) /* shouldn't happen */ elog(ERROR, "could not find equality operator for ordering operator %u", mminfo->aggsortop); /* * We can use either an ordering that gives NULLS FIRST or one that * gives NULLS LAST; furthermore there's unlikely to be much * performance difference between them, so it doesn't seem worth * costing out both ways if we get a hit on the first one. NULLS * FIRST is more likely to be available if the operator is a * reverse-sort operator, so try that first if reverse. */ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse)) continue; if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse)) continue; /* No indexable path for this aggregate, so fail */ return; } /* * OK, we can do the query this way. Prepare to create a MinMaxAggPath * node. * * First, create an output Param node for each agg. (If we end up not * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg, * which is not worth worrying about. We can't wait till create_plan time * to decide whether to make the Param, unfortunately.) */ foreach(lc, aggs_list) { MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); mminfo->param = SS_make_initplan_output_param(root, exprType((Node *) mminfo->target), -1, exprCollation((Node *) mminfo->target)); } /* * Create a MinMaxAggPath node with the appropriate estimated costs and * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where * it will compete against the standard aggregate implementation. (It * will likely always win, but we need not assume that here.) * * Note: grouping_planner won't have created this upperrel yet, but it's * fine for us to create it first. We will not have inserted the correct * consider_parallel value in it, but MinMaxAggPath paths are currently * never parallel-safe anyway, so that doesn't matter. Likewise, it * doesn't matter that we haven't filled FDW-related fields in the rel. * Also, because there are no rowmarks, we know that the processed_tlist * doesn't need to change anymore, so making the pathtarget now is safe. */ grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); add_path(grouped_rel, (Path *) create_minmaxagg_path(root, grouped_rel, create_pathtarget(root, root->processed_tlist), aggs_list, (List *) parse->havingQual)); } /* * can_minmax_aggs * Walk through all the aggregates in the query, and check * if they are all MIN/MAX aggregates. If so, build a list of the * distinct aggregate calls in the tree. * * Returns false if a non-MIN/MAX aggregate is found, true otherwise. * * This does not descend into subqueries, and so should be used only after * reduction of sublinks to subplans. There mustn't be outer-aggregate * references either. */ static bool can_minmax_aggs(PlannerInfo *root, List **context) { ListCell *lc; foreach(lc, root->agginfos) { AggInfo *agginfo = (AggInfo *) lfirst(lc); Aggref *aggref = agginfo->representative_aggref; Oid aggsortop; TargetEntry *curTarget; MinMaxAggInfo *mminfo; Assert(aggref->agglevelsup == 0); if (list_length(aggref->args) != 1) return false; /* it couldn't be MIN/MAX */ /* * ORDER BY is usually irrelevant for MIN/MAX, but it can change the * outcome if the aggsortop's operator class recognizes non-identical * values as equal. For example, 4.0 and 4.00 are equal according to * numeric_ops, yet distinguishable. If MIN() receives more than one * value equal to 4.0 and no value less than 4.0, it is unspecified * which of those equal values MIN() returns. An ORDER BY expression * that differs for each of those equal values of the argument * expression makes the result predictable once again. This is a * niche requirement, and we do not implement it with subquery paths. * In any case, this test lets us reject ordered-set aggregates * quickly. */ if (aggref->aggorder != NIL) return false; /* note: we do not care if DISTINCT is mentioned ... */ /* * We might implement the optimization when a FILTER clause is present * by adding the filter to the quals of the generated subquery. For * now, just punt. */ if (aggref->aggfilter != NULL) return false; aggsortop = fetch_agg_sort_op(aggref->aggfnoid); if (!OidIsValid(aggsortop)) return false; /* not a MIN/MAX aggregate */ curTarget = (TargetEntry *) linitial(aggref->args); if (contain_mutable_functions((Node *) curTarget->expr)) return false; /* not potentially indexable */ if (type_is_rowtype(exprType((Node *) curTarget->expr))) return false; /* IS NOT NULL would have weird semantics */ mminfo = makeNode(MinMaxAggInfo); mminfo->aggfnoid = aggref->aggfnoid; mminfo->aggsortop = aggsortop; mminfo->target = curTarget->expr; mminfo->subroot = NULL; /* don't compute path yet */ mminfo->path = NULL; mminfo->pathcost = 0; mminfo->param = NULL; *context = lappend(*context, mminfo); } return true; } /* * build_minmax_path * Given a MIN/MAX aggregate, try to build an indexscan Path it can be * optimized with. * * If successful, stash the best path in *mminfo and return true. * Otherwise, return false. */ static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, Oid eqop, Oid sortop, bool nulls_first) { PlannerInfo *subroot; Query *parse; TargetEntry *tle; List *tlist; NullTest *ntest; SortGroupClause *sortcl; RelOptInfo *final_rel; Path *sorted_path; Cost path_cost; double path_fraction; /* * We are going to construct what is effectively a sub-SELECT query, so * clone the current query level's state and adjust it to make it look * like a subquery. Any outer references will now be one level higher * than before. (This means that when we are done, there will be no Vars * of level 1, which is why the subquery can become an initplan.) */ subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo)); memcpy(subroot, root, sizeof(PlannerInfo)); subroot->query_level++; subroot->parent_root = root; /* reset subplan-related stuff */ subroot->plan_params = NIL; subroot->outer_params = NULL; subroot->init_plans = NIL; subroot->agginfos = NIL; subroot->aggtransinfos = NIL; subroot->parse = parse = copyObject(root->parse); IncrementVarSublevelsUp((Node *) parse, 1, 1); /* append_rel_list might contain outer Vars? */ subroot->append_rel_list = copyObject(root->append_rel_list); IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1); /* There shouldn't be any OJ info to translate, as yet */ Assert(subroot->join_info_list == NIL); /* and we haven't made equivalence classes, either */ Assert(subroot->eq_classes == NIL); /* and we haven't created PlaceHolderInfos, either */ Assert(subroot->placeholder_list == NIL); /*---------- * Generate modified query of the form * (SELECT col FROM tab * WHERE col IS NOT NULL AND existing-quals * ORDER BY col ASC/DESC * LIMIT 1) *---------- */ /* single tlist entry that is the aggregate target */ tle = makeTargetEntry(copyObject(mminfo->target), (AttrNumber) 1, pstrdup("agg_target"), false); tlist = list_make1(tle); subroot->processed_tlist = parse->targetList = tlist; /* No HAVING, no DISTINCT, no aggregates anymore */ parse->havingQual = NULL; subroot->hasHavingQual = false; parse->distinctClause = NIL; parse->hasDistinctOn = false; parse->hasAggs = false; /* Build "target IS NOT NULL" expression */ ntest = makeNode(NullTest); ntest->nulltesttype = IS_NOT_NULL; ntest->arg = copyObject(mminfo->target); /* we checked it wasn't a rowtype in find_minmax_aggs_walker */ ntest->argisrow = false; ntest->location = -1; /* User might have had that in WHERE already */ if (!list_member((List *) parse->jointree->quals, ntest)) parse->jointree->quals = (Node *) lcons(ntest, (List *) parse->jointree->quals); /* Build suitable ORDER BY clause */ sortcl = makeNode(SortGroupClause); sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist); sortcl->eqop = eqop; sortcl->sortop = sortop; sortcl->nulls_first = nulls_first; sortcl->hashable = false; /* no need to make this accurate */ parse->sortClause = list_make1(sortcl); /* set up expressions for LIMIT 1 */ parse->limitOffset = NULL; parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, sizeof(int64), Int64GetDatum(1), false, FLOAT8PASSBYVAL); /* * Generate the best paths for this query, telling query_planner that we * have LIMIT 1. */ subroot->tuple_fraction = 1.0; subroot->limit_tuples = 1.0; final_rel = query_planner(subroot, minmax_qp_callback, NULL); /* * Since we didn't go through subquery_planner() to handle the subquery, * we have to do some of the same cleanup it would do, in particular cope * with params and initplans used within this subquery. (This won't * matter if we end up not using the subplan.) */ SS_identify_outer_params(subroot); SS_charge_for_initplans(subroot, final_rel); /* * Get the best presorted path, that being the one that's cheapest for * fetching just one row. If there's no such path, fail. */ if (final_rel->rows > 1.0) path_fraction = 1.0 / final_rel->rows; else path_fraction = 1.0; sorted_path = get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, subroot->query_pathkeys, NULL, path_fraction); if (!sorted_path) return false; /* * The path might not return exactly what we want, so fix that. (We * assume that this won't change any conclusions about which was the * cheapest path.) */ sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path, create_pathtarget(subroot, subroot->processed_tlist)); /* * Determine cost to get just the first row of the presorted path. * * Note: cost calculation here should match * compare_fractional_path_costs(). */ path_cost = sorted_path->startup_cost + path_fraction * (sorted_path->total_cost - sorted_path->startup_cost); /* Save state for further processing */ mminfo->subroot = subroot; mminfo->path = sorted_path; mminfo->pathcost = path_cost; return true; } /* * Compute query_pathkeys and other pathkeys during query_planner() */ static void minmax_qp_callback(PlannerInfo *root, void *extra) { root->group_pathkeys = NIL; root->window_pathkeys = NIL; root->distinct_pathkeys = NIL; root->sort_pathkeys = make_pathkeys_for_sortclauses(root, root->parse->sortClause, root->parse->targetList); root->query_pathkeys = root->sort_pathkeys; } /* * Get the OID of the sort operator, if any, associated with an aggregate. * Returns InvalidOid if there is no such operator. */ static Oid fetch_agg_sort_op(Oid aggfnoid) { HeapTuple aggTuple; Form_pg_aggregate aggform; Oid aggsortop; /* fetch aggregate entry from pg_aggregate */ aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid)); if (!HeapTupleIsValid(aggTuple)) return InvalidOid; aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); aggsortop = aggform->aggsortop; ReleaseSysCache(aggTuple); return aggsortop; }