diff options
Diffstat (limited to 'src/backend/optimizer/plan')
-rw-r--r-- | src/backend/optimizer/plan/Makefile | 25 | ||||
-rw-r--r-- | src/backend/optimizer/plan/README | 158 | ||||
-rw-r--r-- | src/backend/optimizer/plan/analyzejoins.c | 1127 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 7244 | ||||
-rw-r--r-- | src/backend/optimizer/plan/initsplan.c | 2752 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planagg.c | 513 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planmain.c | 284 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 7492 | ||||
-rw-r--r-- | src/backend/optimizer/plan/setrefs.c | 3398 | ||||
-rw-r--r-- | src/backend/optimizer/plan/subselect.c | 2999 |
10 files changed, 25992 insertions, 0 deletions
diff --git a/src/backend/optimizer/plan/Makefile b/src/backend/optimizer/plan/Makefile new file mode 100644 index 0000000..80ef162 --- /dev/null +++ b/src/backend/optimizer/plan/Makefile @@ -0,0 +1,25 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for optimizer/plan +# +# IDENTIFICATION +# src/backend/optimizer/plan/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/optimizer/plan +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = \ + analyzejoins.o \ + createplan.o \ + initsplan.o \ + planagg.o \ + planmain.o \ + planner.o \ + setrefs.o \ + subselect.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/plan/README b/src/backend/optimizer/plan/README new file mode 100644 index 0000000..013c0f9 --- /dev/null +++ b/src/backend/optimizer/plan/README @@ -0,0 +1,158 @@ +src/backend/optimizer/plan/README + +Subselects +========== + +Vadim B. Mikheev + + +From owner-pgsql-hackers@hub.org Fri Feb 13 09:01:19 1998 +Received: from renoir.op.net (root@renoir.op.net [209.152.193.4]) + by candle.pha.pa.us (8.8.5/8.8.5) with ESMTP id JAA11576 + for <maillist@candle.pha.pa.us>; Fri, 13 Feb 1998 09:01:17 -0500 (EST) +Received: from hub.org (hub.org [209.47.148.200]) by renoir.op.net (o1/$Revision: 1.14 $) with ESMTP id IAA09761 for <maillist@candle.pha.pa.us>; Fri, 13 Feb 1998 08:41:22 -0500 (EST) +Received: from localhost (majordom@localhost) by hub.org (8.8.8/8.7.5) with SMTP id IAA08135; Fri, 13 Feb 1998 08:40:17 -0500 (EST) +Received: by hub.org (TLB v0.10a (1.23 tibbs 1997/01/09 00:29:32)); Fri, 13 Feb 1998 08:38:42 -0500 (EST) +Received: (from majordom@localhost) by hub.org (8.8.8/8.7.5) id IAA06646 for pgsql-hackers-outgoing; Fri, 13 Feb 1998 08:38:35 -0500 (EST) +Received: from dune.krasnet.ru (dune.krasnet.ru [193.125.44.86]) by hub.org (8.8.8/8.7.5) with ESMTP id IAA04568 for <hackers@postgreSQL.org>; Fri, 13 Feb 1998 08:37:16 -0500 (EST) +Received: from sable.krasnoyarsk.su (dune.krasnet.ru [193.125.44.86]) + by dune.krasnet.ru (8.8.7/8.8.7) with ESMTP id UAA13717 + for <hackers@postgreSQL.org>; Fri, 13 Feb 1998 20:51:03 +0700 (KRS) + (envelope-from vadim@sable.krasnoyarsk.su) +Message-ID: <34E44FBA.D64E7997@sable.krasnoyarsk.su> +Date: Fri, 13 Feb 1998 20:50:50 +0700 +From: "Vadim B. Mikheev" <vadim@sable.krasnoyarsk.su> +Organization: ITTS (Krasnoyarsk) +X-Mailer: Mozilla 4.04 [en] (X11; I; FreeBSD 2.2.5-RELEASE i386) +MIME-Version: 1.0 +To: PostgreSQL Developers List <hackers@postgreSQL.org> +Subject: [HACKERS] Subselects are in CVS... +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +Sender: owner-pgsql-hackers@hub.org +Precedence: bulk +Status: OR + +This is some implementation notes and opened issues... + +First, implementation uses new type of parameters - PARAM_EXEC - to deal +with correlation Vars. When query_planner() is called, it first tries to +replace all upper queries Var referenced in current query with Param of +this type. Some global variables are used to keep mapping of Vars to +Params and Params to Vars. + +After this, all current query' SubLinks are processed: for each SubLink +found in query' qual union_planner() (old planner() function) will be +called to plan corresponding subselect (union_planner() calls +query_planner() for "simple" query and supports UNIONs). After subselect +are planned, optimizer knows about is this correlated, un-correlated or +_undirect_ correlated (references some grand-parent Vars but no parent +ones: uncorrelated from the parent' point of view) query. + +For uncorrelated and undirect correlated subqueries of EXPRession or +EXISTS type SubLinks will be replaced with "normal" clauses from +SubLink->Oper list (I changed this list to be list of EXPR nodes, +not just Oper ones). Right sides of these nodes are replaced with +PARAM_EXEC parameters. This is second use of new parameter type. +At run-time these parameters get value from result of subquery +evaluation (i.e. - from target list of subquery). Execution plan of +subquery itself becomes init plan of parent query. InitPlan knows +what parameters are to get values from subquery' results and will be +executed "on-demand" (for query select * from table where x > 0 and +y > (select max(a) from table_a) subquery will not be executed at all +if there are no tuples with x > 0 _and_ y is not used in index scan). + +SubLinks for subqueries of all other types are transformed into +new type of Expr node - SUBPLAN_EXPR. Expr->args are just correlation +variables from _parent_ query. Expr->oper is new SubPlan node. + +This node is used for InitPlan too. It keeps subquery range table, +indices of Params which are to get value from _parent_ query Vars +(i.e. - from Expr->args), indices of Params into which subquery' +results are to be substituted (this is for InitPlans), SubLink +and subquery' execution plan. + +Plan node was changed to know about dependencies on Params from +parent queries and InitPlans, to keep list of changed Params +(from the above) and so be re-scanned if this list is not NULL. +Also, added list of InitPlans (actually, all of them for current +query are in topmost plan node now) and other SubPlans (from +plan->qual) - to initialize them and let them know about changed +Params (from the list of their "interests"). + +After all SubLinks are processed, query_planner() calls qual' +canonificator and does "normal" work. By using Params optimizer +is mostly unchanged. + +Well, Executor. To get subplans re-evaluated without ExecutorStart() +and ExecutorEnd() (without opening and closing relations and indices +and without many palloc() and pfree() - this is what SQL-funcs does +on each call) ExecReScan() now supports most of Plan types... + +Explanation of EXPLAIN. + +vac=> explain select * from tmp where x >= (select max(x2) from test2 +where y2 = y and exists (select * from tempx where tx = x)); +NOTICE: QUERY PLAN: + +Seq Scan on tmp (cost=40.03 size=101 width=8) + SubPlan + ^^^^^^^ subquery is in Seq Scan' qual, its plan is below + -> Aggregate (cost=2.05 size=0 width=0) + InitPlan + ^^^^^^^^ EXISTS subsubquery is InitPlan of subquery + -> Seq Scan on tempx (cost=4.33 size=1 width=4) + -> Result (cost=2.05 size=0 width=0) + ^^^^^^ EXISTS subsubquery was transformed into Param + and so we have Result node here + -> Index Scan on test2 (cost=2.05 size=1 width=4) + + +Opened issues. + +1. No read permissions checking (easy, just not done yet). +2. readfuncs.c can't read subplan-s (easy, not critical, because of + we currently nowhere use ascii representation of execution plans). +3. ExecReScan() doesn't support all plan types. At least support for + MergeJoin has to be implemented. +4. Memory leaks in ExecReScan(). +5. I need in advice: if subquery introduced with NOT IN doesn't return + any tuples then qualification is failed, yes ? +6. Regression tests !!!!!!!!!!!!!!!!!!!! + (Could we use data/queries from MySQL' crash.me ? + Copyright-ed ? Could they give us rights ?) +7. Performance. + - Should be good when subquery is transformed into InitPlan. + - Something should be done for uncorrelated subqueries introduced + with ANY/ALL - keep thinking. Currently, subplan will be re-scanned + for each parent tuple - very slow... + +Results of some test. TMP is table with x,y (int4-s), x in 0-9, +y = 100 - x, 1000 tuples (10 duplicates of each tuple). TEST2 is table +with x2, y2 (int4-s), x2 in 1-99, y2 = 100 -x2, 10000 tuples (100 dups). + + Trying + +select * from tmp where x >= (select max(x2) from test2 where y2 = y); + + and + +begin; +select y as ty, max(x2) as mx into table tsub from test2, tmp +where y2 = y group by ty; +vacuum tsub; +select x, y from tmp, tsub where x >= mx and y = ty; +drop table tsub; +end; + + Without index on test2(y2): + +SubSelect -> 320 sec +Using temp table -> 32 sec + + Having index + +SubSelect -> 17 sec (2M of memory) +Using temp table -> 32 sec (12M of memory: -S 8192) + +Vadim diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c new file mode 100644 index 0000000..34efeee --- /dev/null +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -0,0 +1,1127 @@ +/*------------------------------------------------------------------------- + * + * analyzejoins.c + * Routines for simplifying joins after initial query analysis + * + * While we do a great deal of join simplification in prep/prepjointree.c, + * certain optimizations cannot be performed at that stage for lack of + * detailed information about the query. The routines here are invoked + * after initsplan.c has done its work, and can do additional join removal + * and simplification steps based on the information extracted. The penalty + * is that we have to work harder to clean up after ourselves when we modify + * the query, since the derived data structures have to be updated too. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/analyzejoins.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/joininfo.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/planmain.h" +#include "optimizer/tlist.h" +#include "utils/lsyscache.h" + +/* local functions */ +static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static void remove_rel_from_query(PlannerInfo *root, int relid, + Relids joinrelids); +static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); +static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); +static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, + List *clause_list); +static Oid distinct_col_search(int colno, List *colnos, List *opids); +static bool is_innerrel_unique_for(PlannerInfo *root, + Relids joinrelids, + Relids outerrelids, + RelOptInfo *innerrel, + JoinType jointype, + List *restrictlist); + + +/* + * remove_useless_joins + * Check for relations that don't actually need to be joined at all, + * and remove them from the query. + * + * We are passed the current joinlist and return the updated list. Other + * data structures that have to be updated are accessible via "root". + */ +List * +remove_useless_joins(PlannerInfo *root, List *joinlist) +{ + ListCell *lc; + + /* + * We are only interested in relations that are left-joined to, so we can + * scan the join_info_list to find them easily. + */ +restart: + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + int innerrelid; + int nremoved; + + /* Skip if not removable */ + if (!join_is_removable(root, sjinfo)) + continue; + + /* + * Currently, join_is_removable can only succeed when the sjinfo's + * righthand is a single baserel. Remove that rel from the query and + * joinlist. + */ + innerrelid = bms_singleton_member(sjinfo->min_righthand); + + remove_rel_from_query(root, innerrelid, + bms_union(sjinfo->min_lefthand, + sjinfo->min_righthand)); + + /* We verify that exactly one reference gets removed from joinlist */ + nremoved = 0; + joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved); + if (nremoved != 1) + elog(ERROR, "failed to find relation %d in joinlist", innerrelid); + + /* + * We can delete this SpecialJoinInfo from the list too, since it's no + * longer of interest. (Since we'll restart the foreach loop + * immediately, we don't bother with foreach_delete_current.) + */ + root->join_info_list = list_delete_cell(root->join_info_list, lc); + + /* + * Restart the scan. This is necessary to ensure we find all + * removable joins independently of ordering of the join_info_list + * (note that removal of attr_needed bits may make a join appear + * removable that did not before). + */ + goto restart; + } + + return joinlist; +} + +/* + * clause_sides_match_join + * Determine whether a join clause is of the right form to use in this join. + * + * We already know that the clause is a binary opclause referencing only the + * rels in the current join. The point here is to check whether it has the + * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr", + * rather than mixing outer and inner vars on either side. If it matches, + * we set the transient flag outer_is_left to identify which side is which. + */ +static inline bool +clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, + Relids innerrelids) +{ + if (bms_is_subset(rinfo->left_relids, outerrelids) && + bms_is_subset(rinfo->right_relids, innerrelids)) + { + /* lefthand side is outer */ + rinfo->outer_is_left = true; + return true; + } + else if (bms_is_subset(rinfo->left_relids, innerrelids) && + bms_is_subset(rinfo->right_relids, outerrelids)) + { + /* righthand side is outer */ + rinfo->outer_is_left = false; + return true; + } + return false; /* no good for these input relations */ +} + +/* + * join_is_removable + * Check whether we need not perform this special join at all, because + * it will just duplicate its left input. + * + * This is true for a left join for which the join condition cannot match + * more than one inner-side row. (There are other possibly interesting + * cases, but we don't have the infrastructure to prove them.) We also + * have to check that the inner side doesn't generate any variables needed + * above the join. + */ +static bool +join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) +{ + int innerrelid; + RelOptInfo *innerrel; + Relids joinrelids; + List *clause_list = NIL; + ListCell *l; + int attroff; + + /* + * Must be a non-delaying left join to a single baserel, else we aren't + * going to be able to do anything with it. + */ + if (sjinfo->jointype != JOIN_LEFT || + sjinfo->delay_upper_joins) + return false; + + if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) + return false; + + /* + * Never try to eliminate a left join to the query result rel. Although + * the case is syntactically impossible in standard SQL, MERGE will build + * a join tree that looks exactly like that. + */ + if (innerrelid == root->parse->resultRelation) + return false; + + innerrel = find_base_rel(root, innerrelid); + + /* + * Before we go to the effort of checking whether any innerrel variables + * are needed above the join, make a quick check to eliminate cases in + * which we will surely be unable to prove uniqueness of the innerrel. + */ + if (!rel_supports_distinctness(root, innerrel)) + return false; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + + /* + * We can't remove the join if any inner-rel attributes are used above the + * join. + * + * Note that this test only detects use of inner-rel attributes in higher + * join conditions and the target list. There might be such attributes in + * pushed-down conditions at this join, too. We check that case below. + * + * As a micro-optimization, it seems better to start with max_attr and + * count down rather than starting with min_attr and counting up, on the + * theory that the system attributes are somewhat less likely to be wanted + * and should be tested last. + */ + for (attroff = innerrel->max_attr - innerrel->min_attr; + attroff >= 0; + attroff--) + { + if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids)) + return false; + } + + /* + * Similarly check that the inner rel isn't needed by any PlaceHolderVars + * that will be used above the join. We only need to fail if such a PHV + * actually references some inner-rel attributes; but the correct check + * for that is relatively expensive, so we first check against ph_eval_at, + * which must mention the inner rel if the PHV uses any inner-rel attrs as + * non-lateral references. Note that if the PHV's syntactic scope is just + * the inner rel, we can't drop the rel even if the PHV is variable-free. + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + if (bms_overlap(phinfo->ph_lateral, innerrel->relids)) + return false; /* it references innerrel laterally */ + if (bms_is_subset(phinfo->ph_needed, joinrelids)) + continue; /* PHV is not used above the join */ + if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids)) + continue; /* it definitely doesn't reference innerrel */ + if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids)) + return false; /* there isn't any other place to eval PHV */ + if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr), + innerrel->relids)) + return false; /* it does reference innerrel */ + } + + /* + * Search for mergejoinable clauses that constrain the inner rel against + * either the outer rel or a pseudoconstant. If an operator is + * mergejoinable then it behaves like equality for some btree opclass, so + * it's what we want. The mergejoinability test also eliminates clauses + * containing volatile functions, which we couldn't depend on. + */ + foreach(l, innerrel->joininfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* + * If it's not a join clause for this outer join, we can't use it. + * Note that if the clause is pushed-down, then it is logically from + * above the outer join, even if it references no other rels (it might + * be from WHERE, for example). + */ + if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)) + { + /* + * If such a clause actually references the inner rel then join + * removal has to be disallowed. We have to check this despite + * the previous attr_needed checks because of the possibility of + * pushed-down clauses referencing the rel. + */ + if (bms_is_member(innerrelid, restrictinfo->clause_relids)) + return false; + continue; /* else, ignore; not useful here */ + } + + /* Ignore if it's not a mergejoinable clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause has the form "outer op inner" or "inner op outer", + * and if so mark which side is inner. + */ + if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand, + innerrel->relids)) + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* + * Now that we have the relevant equality join clauses, try to prove the + * innerrel distinct. + */ + if (rel_is_distinct_for(root, innerrel, clause_list)) + return true; + + /* + * Some day it would be nice to check for other methods of establishing + * distinctness. + */ + return false; +} + + +/* + * Remove the target relid from the planner's data structures, having + * determined that there is no need to include it in the query. + * + * We are not terribly thorough here. We must make sure that the rel is + * no longer treated as a baserel, and that attributes of other baserels + * are no longer marked as being needed at joins involving this rel. + * Also, join quals involving the rel have to be removed from the joininfo + * lists, but only if they belong to the outer join identified by joinrelids. + */ +static void +remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) +{ + RelOptInfo *rel = find_base_rel(root, relid); + List *joininfos; + Index rti; + ListCell *l; + + /* + * Mark the rel as "dead" to show it is no longer part of the join tree. + * (Removing it from the baserel array altogether seems too risky.) + */ + rel->reloptkind = RELOPT_DEADREL; + + /* + * Remove references to the rel from other baserels' attr_needed arrays. + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *otherrel = root->simple_rel_array[rti]; + int attroff; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (otherrel == NULL) + continue; + + Assert(otherrel->relid == rti); /* sanity check on array */ + + /* no point in processing target rel itself */ + if (otherrel == rel) + continue; + + for (attroff = otherrel->max_attr - otherrel->min_attr; + attroff >= 0; + attroff--) + { + otherrel->attr_needed[attroff] = + bms_del_member(otherrel->attr_needed[attroff], relid); + } + } + + /* + * Likewise remove references from SpecialJoinInfo data structures. + * + * This is relevant in case the outer join we're deleting is nested inside + * other outer joins: the upper joins' relid sets have to be adjusted. The + * RHS of the target outer join will be made empty here, but that's OK + * since caller will delete that SpecialJoinInfo entirely. + */ + foreach(l, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); + + sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid); + sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid); + sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid); + sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid); + } + + /* + * Likewise remove references from PlaceHolderVar data structures, + * removing any no-longer-needed placeholders entirely. + * + * Removal is a bit trickier than it might seem: we can remove PHVs that + * are used at the target rel and/or in the join qual, but not those that + * are used at join partner rels or above the join. It's not that easy to + * distinguish PHVs used at partner rels from those used in the join qual, + * since they will both have ph_needed sets that are subsets of + * joinrelids. However, a PHV used at a partner rel could not have the + * target rel in ph_eval_at, so we check that while deciding whether to + * remove or just update the PHV. There is no corresponding test in + * join_is_removable because it doesn't need to distinguish those cases. + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + Assert(!bms_is_member(relid, phinfo->ph_lateral)); + if (bms_is_subset(phinfo->ph_needed, joinrelids) && + bms_is_member(relid, phinfo->ph_eval_at)) + root->placeholder_list = foreach_delete_current(root->placeholder_list, + l); + else + { + phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid); + Assert(!bms_is_empty(phinfo->ph_eval_at)); + phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid); + } + } + + /* + * Remove any joinquals referencing the rel from the joininfo lists. + * + * In some cases, a joinqual has to be put back after deleting its + * reference to the target rel. This can occur for pseudoconstant and + * outerjoin-delayed quals, which can get marked as requiring the rel in + * order to force them to be evaluated at or above the join. We can't + * just discard them, though. Only quals that logically belonged to the + * outer join being discarded should be removed from the query. + * + * We must make a copy of the rel's old joininfo list before starting the + * loop, because otherwise remove_join_clause_from_rels would destroy the + * list while we're scanning it. + */ + joininfos = list_copy(rel->joininfo); + foreach(l, joininfos) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + + remove_join_clause_from_rels(root, rinfo, rinfo->required_relids); + + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) + { + /* Recheck that qual doesn't actually reference the target rel */ + Assert(!bms_is_member(relid, rinfo->clause_relids)); + + /* + * The required_relids probably aren't shared with anything else, + * but let's copy them just to be sure. + */ + rinfo->required_relids = bms_copy(rinfo->required_relids); + rinfo->required_relids = bms_del_member(rinfo->required_relids, + relid); + distribute_restrictinfo_to_rels(root, rinfo); + } + } + + /* + * There may be references to the rel in root->fkey_list, but if so, + * match_foreign_keys_to_quals() will get rid of them. + */ +} + +/* + * Remove any occurrences of the target relid from a joinlist structure. + * + * It's easiest to build a whole new list structure, so we handle it that + * way. Efficiency is not a big deal here. + * + * *nremoved is incremented by the number of occurrences removed (there + * should be exactly one, but the caller checks that). + */ +static List * +remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) +{ + List *result = NIL; + ListCell *jl; + + foreach(jl, joinlist) + { + Node *jlnode = (Node *) lfirst(jl); + + if (IsA(jlnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jlnode)->rtindex; + + if (varno == relid) + (*nremoved)++; + else + result = lappend(result, jlnode); + } + else if (IsA(jlnode, List)) + { + /* Recurse to handle subproblem */ + List *sublist; + + sublist = remove_rel_from_joinlist((List *) jlnode, + relid, nremoved); + /* Avoid including empty sub-lists in the result */ + if (sublist) + result = lappend(result, sublist); + } + else + { + elog(ERROR, "unrecognized joinlist node type: %d", + (int) nodeTag(jlnode)); + } + } + + return result; +} + + +/* + * reduce_unique_semijoins + * Check for semijoins that can be simplified to plain inner joins + * because the inner relation is provably unique for the join clauses. + * + * Ideally this would happen during reduce_outer_joins, but we don't have + * enough information at that point. + * + * To perform the strength reduction when applicable, we need only delete + * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't + * bother fixing the join type attributed to it in the query jointree, + * since that won't be consulted again.) + */ +void +reduce_unique_semijoins(PlannerInfo *root) +{ + ListCell *lc; + + /* + * Scan the join_info_list to find semijoins. + */ + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + int innerrelid; + RelOptInfo *innerrel; + Relids joinrelids; + List *restrictlist; + + /* + * Must be a non-delaying semijoin to a single baserel, else we aren't + * going to be able to do anything with it. (It's probably not + * possible for delay_upper_joins to be set on a semijoin, but we + * might as well check.) + */ + if (sjinfo->jointype != JOIN_SEMI || + sjinfo->delay_upper_joins) + continue; + + if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) + continue; + + innerrel = find_base_rel(root, innerrelid); + + /* + * Before we trouble to run generate_join_implied_equalities, make a + * quick check to eliminate cases in which we will surely be unable to + * prove uniqueness of the innerrel. + */ + if (!rel_supports_distinctness(root, innerrel)) + continue; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + + /* + * Since we're only considering a single-rel RHS, any join clauses it + * has must be clauses linking it to the semijoin's min_lefthand. We + * can also consider EC-derived join clauses. + */ + restrictlist = + list_concat(generate_join_implied_equalities(root, + joinrelids, + sjinfo->min_lefthand, + innerrel), + innerrel->joininfo); + + /* Test whether the innerrel is unique for those clauses. */ + if (!innerrel_is_unique(root, + joinrelids, sjinfo->min_lefthand, innerrel, + JOIN_SEMI, restrictlist, true)) + continue; + + /* OK, remove the SpecialJoinInfo from the list. */ + root->join_info_list = foreach_delete_current(root->join_info_list, lc); + } +} + + +/* + * rel_supports_distinctness + * Could the relation possibly be proven distinct on some set of columns? + * + * This is effectively a pre-checking function for rel_is_distinct_for(). + * It must return true if rel_is_distinct_for() could possibly return true + * with this rel, but it should not expend a lot of cycles. The idea is + * that callers can avoid doing possibly-expensive processing to compute + * rel_is_distinct_for()'s argument lists if the call could not possibly + * succeed. + */ +static bool +rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) +{ + /* We only know about baserels ... */ + if (rel->reloptkind != RELOPT_BASEREL) + return false; + if (rel->rtekind == RTE_RELATION) + { + /* + * For a plain relation, we only know how to prove uniqueness by + * reference to unique indexes. Make sure there's at least one + * suitable unique index. It must be immediately enforced, and not a + * partial index. (Keep these conditions in sync with + * relation_has_unique_index_for!) + */ + ListCell *lc; + + foreach(lc, rel->indexlist) + { + IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc); + + if (ind->unique && ind->immediate && ind->indpred == NIL) + return true; + } + } + else if (rel->rtekind == RTE_SUBQUERY) + { + Query *subquery = root->simple_rte_array[rel->relid]->subquery; + + /* Check if the subquery has any qualities that support distinctness */ + if (query_supports_distinctness(subquery)) + return true; + } + /* We have no proof rules for any other rtekinds. */ + return false; +} + +/* + * rel_is_distinct_for + * Does the relation return only distinct rows according to clause_list? + * + * clause_list is a list of join restriction clauses involving this rel and + * some other one. Return true if no two rows emitted by this rel could + * possibly join to the same row of the other rel. + * + * The caller must have already determined that each condition is a + * mergejoinable equality with an expression in this relation on one side, and + * an expression not involving this relation on the other. The transient + * outer_is_left flag is used to identify which side references this relation: + * left side if outer_is_left is false, right side if it is true. + * + * Note that the passed-in clause_list may be destructively modified! This + * is OK for current uses, because the clause_list is built by the caller for + * the sole purpose of passing to this function. + */ +static bool +rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) +{ + /* + * We could skip a couple of tests here if we assume all callers checked + * rel_supports_distinctness first, but it doesn't seem worth taking any + * risk for. + */ + if (rel->reloptkind != RELOPT_BASEREL) + return false; + if (rel->rtekind == RTE_RELATION) + { + /* + * Examine the indexes to see if we have a matching unique index. + * relation_has_unique_index_for automatically adds any usable + * restriction clauses for the rel, so we needn't do that here. + */ + if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) + return true; + } + else if (rel->rtekind == RTE_SUBQUERY) + { + Index relid = rel->relid; + Query *subquery = root->simple_rte_array[relid]->subquery; + List *colnos = NIL; + List *opids = NIL; + ListCell *l; + + /* + * Build the argument lists for query_is_distinct_for: a list of + * output column numbers that the query needs to be distinct over, and + * a list of equality operators that the output columns need to be + * distinct according to. + * + * (XXX we are not considering restriction clauses attached to the + * subquery; is that worth doing?) + */ + foreach(l, clause_list) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + Oid op; + Var *var; + + /* + * Get the equality operator we need uniqueness according to. + * (This might be a cross-type operator and thus not exactly the + * same operator the subquery would consider; that's all right + * since query_is_distinct_for can resolve such cases.) The + * caller's mergejoinability test should have selected only + * OpExprs. + */ + op = castNode(OpExpr, rinfo->clause)->opno; + + /* caller identified the inner side for us */ + if (rinfo->outer_is_left) + var = (Var *) get_rightop(rinfo->clause); + else + var = (Var *) get_leftop(rinfo->clause); + + /* + * We may ignore any RelabelType node above the operand. (There + * won't be more than one, since eval_const_expressions() has been + * applied already.) + */ + if (var && IsA(var, RelabelType)) + var = (Var *) ((RelabelType *) var)->arg; + + /* + * If inner side isn't a Var referencing a subquery output column, + * this clause doesn't help us. + */ + if (!var || !IsA(var, Var) || + var->varno != relid || var->varlevelsup != 0) + continue; + + colnos = lappend_int(colnos, var->varattno); + opids = lappend_oid(opids, op); + } + + if (query_is_distinct_for(subquery, colnos, opids)) + return true; + } + return false; +} + + +/* + * query_supports_distinctness - could the query possibly be proven distinct + * on some set of output columns? + * + * This is effectively a pre-checking function for query_is_distinct_for(). + * It must return true if query_is_distinct_for() could possibly return true + * with this query, but it should not expend a lot of cycles. The idea is + * that callers can avoid doing possibly-expensive processing to compute + * query_is_distinct_for()'s argument lists if the call could not possibly + * succeed. + */ +bool +query_supports_distinctness(Query *query) +{ + /* SRFs break distinctness except with DISTINCT, see below */ + if (query->hasTargetSRFs && query->distinctClause == NIL) + return false; + + /* check for features we can prove distinctness with */ + if (query->distinctClause != NIL || + query->groupClause != NIL || + query->groupingSets != NIL || + query->hasAggs || + query->havingQual || + query->setOperations) + return true; + + return false; +} + +/* + * query_is_distinct_for - does query never return duplicates of the + * specified columns? + * + * query is a not-yet-planned subquery (in current usage, it's always from + * a subquery RTE, which the planner avoids scribbling on). + * + * colnos is an integer list of output column numbers (resno's). We are + * interested in whether rows consisting of just these columns are certain + * to be distinct. "Distinctness" is defined according to whether the + * corresponding upper-level equality operators listed in opids would think + * the values are distinct. (Note: the opids entries could be cross-type + * operators, and thus not exactly the equality operators that the subquery + * would use itself. We use equality_ops_are_compatible() to check + * compatibility. That looks at btree or hash opfamily membership, and so + * should give trustworthy answers for all operators that we might need + * to deal with here.) + */ +bool +query_is_distinct_for(Query *query, List *colnos, List *opids) +{ + ListCell *l; + Oid opid; + + Assert(list_length(colnos) == list_length(opids)); + + /* + * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the + * columns in the DISTINCT clause appear in colnos and operator semantics + * match. This is true even if there are SRFs in the DISTINCT columns or + * elsewhere in the tlist. + */ + if (query->distinctClause) + { + foreach(l, query->distinctClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sgc, + query->targetList); + + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !equality_ops_are_compatible(opid, sgc->eqop)) + break; /* exit early if no match */ + } + if (l == NULL) /* had matches for all? */ + return true; + } + + /* + * Otherwise, a set-returning function in the query's targetlist can + * result in returning duplicate rows, despite any grouping that might + * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY + * columns, it would be safe because they'd be expanded before grouping. + * But it doesn't currently seem worth the effort to check for that.) + */ + if (query->hasTargetSRFs) + return false; + + /* + * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all + * the grouped columns appear in colnos and operator semantics match. + */ + if (query->groupClause && !query->groupingSets) + { + foreach(l, query->groupClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sgc, + query->targetList); + + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !equality_ops_are_compatible(opid, sgc->eqop)) + break; /* exit early if no match */ + } + if (l == NULL) /* had matches for all? */ + return true; + } + else if (query->groupingSets) + { + /* + * If we have grouping sets with expressions, we probably don't have + * uniqueness and analysis would be hard. Punt. + */ + if (query->groupClause) + return false; + + /* + * If we have no groupClause (therefore no grouping expressions), we + * might have one or many empty grouping sets. If there's just one, + * then we're returning only one row and are certainly unique. But + * otherwise, we know we're certainly not unique. + */ + if (list_length(query->groupingSets) == 1 && + ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY) + return true; + else + return false; + } + else + { + /* + * If we have no GROUP BY, but do have aggregates or HAVING, then the + * result is at most one row so it's surely unique, for any operators. + */ + if (query->hasAggs || query->havingQual) + return true; + } + + /* + * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row, + * except with ALL. + */ + if (query->setOperations) + { + SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations); + + Assert(topop->op != SETOP_NONE); + + if (!topop->all) + { + ListCell *lg; + + /* We're good if all the nonjunk output columns are in colnos */ + lg = list_head(topop->groupClauses); + foreach(l, query->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + SortGroupClause *sgc; + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + + /* non-resjunk columns should have grouping clauses */ + Assert(lg != NULL); + sgc = (SortGroupClause *) lfirst(lg); + lg = lnext(topop->groupClauses, lg); + + opid = distinct_col_search(tle->resno, colnos, opids); + if (!OidIsValid(opid) || + !equality_ops_are_compatible(opid, sgc->eqop)) + break; /* exit early if no match */ + } + if (l == NULL) /* had matches for all? */ + return true; + } + } + + /* + * XXX Are there any other cases in which we can easily see the result + * must be distinct? + * + * If you do add more smarts to this function, be sure to update + * query_supports_distinctness() to match. + */ + + return false; +} + +/* + * distinct_col_search - subroutine for query_is_distinct_for + * + * If colno is in colnos, return the corresponding element of opids, + * else return InvalidOid. (Ordinarily colnos would not contain duplicates, + * but if it does, we arbitrarily select the first match.) + */ +static Oid +distinct_col_search(int colno, List *colnos, List *opids) +{ + ListCell *lc1, + *lc2; + + forboth(lc1, colnos, lc2, opids) + { + if (colno == lfirst_int(lc1)) + return lfirst_oid(lc2); + } + return InvalidOid; +} + + +/* + * innerrel_is_unique + * Check if the innerrel provably contains at most one tuple matching any + * tuple from the outerrel, based on join clauses in the 'restrictlist'. + * + * We need an actual RelOptInfo for the innerrel, but it's sufficient to + * identify the outerrel by its Relids. This asymmetry supports use of this + * function before joinrels have been built. (The caller is expected to + * also supply the joinrelids, just to save recalculating that.) + * + * The proof must be made based only on clauses that will be "joinquals" + * rather than "otherquals" at execution. For an inner join there's no + * difference; but if the join is outer, we must ignore pushed-down quals, + * as those will become "otherquals". Note that this means the answer might + * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the + * answer without regard to that, callers must take care not to call this + * with jointypes that would be classified differently by IS_OUTER_JOIN(). + * + * The actual proof is undertaken by is_innerrel_unique_for(); this function + * is a frontend that is mainly concerned with caching the answers. + * In particular, the force_cache argument allows overriding the internal + * heuristic about whether to cache negative answers; it should be "true" + * if making an inquiry that is not part of the normal bottom-up join search + * sequence. + */ +bool +innerrel_is_unique(PlannerInfo *root, + Relids joinrelids, + Relids outerrelids, + RelOptInfo *innerrel, + JoinType jointype, + List *restrictlist, + bool force_cache) +{ + MemoryContext old_context; + ListCell *lc; + + /* Certainly can't prove uniqueness when there are no joinclauses */ + if (restrictlist == NIL) + return false; + + /* + * Make a quick check to eliminate cases in which we will surely be unable + * to prove uniqueness of the innerrel. + */ + if (!rel_supports_distinctness(root, innerrel)) + return false; + + /* + * Query the cache to see if we've managed to prove that innerrel is + * unique for any subset of this outerrel. We don't need an exact match, + * as extra outerrels can't make the innerrel any less unique (or more + * formally, the restrictlist for a join to a superset outerrel must be a + * superset of the conditions we successfully used before). + */ + foreach(lc, innerrel->unique_for_rels) + { + Relids unique_for_rels = (Relids) lfirst(lc); + + if (bms_is_subset(unique_for_rels, outerrelids)) + return true; /* Success! */ + } + + /* + * Conversely, we may have already determined that this outerrel, or some + * superset thereof, cannot prove this innerrel to be unique. + */ + foreach(lc, innerrel->non_unique_for_rels) + { + Relids unique_for_rels = (Relids) lfirst(lc); + + if (bms_is_subset(outerrelids, unique_for_rels)) + return false; + } + + /* No cached information, so try to make the proof. */ + if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel, + jointype, restrictlist)) + { + /* + * Cache the positive result for future probes, being sure to keep it + * in the planner_cxt even if we are working in GEQO. + * + * Note: one might consider trying to isolate the minimal subset of + * the outerrels that proved the innerrel unique. But it's not worth + * the trouble, because the planner builds up joinrels incrementally + * and so we'll see the minimally sufficient outerrels before any + * supersets of them anyway. + */ + old_context = MemoryContextSwitchTo(root->planner_cxt); + innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, + bms_copy(outerrelids)); + MemoryContextSwitchTo(old_context); + + return true; /* Success! */ + } + else + { + /* + * None of the join conditions for outerrel proved innerrel unique, so + * we can safely reject this outerrel or any subset of it in future + * checks. + * + * However, in normal planning mode, caching this knowledge is totally + * pointless; it won't be queried again, because we build up joinrels + * from smaller to larger. It is useful in GEQO mode, where the + * knowledge can be carried across successive planning attempts; and + * it's likely to be useful when using join-search plugins, too. Hence + * cache when join_search_private is non-NULL. (Yeah, that's a hack, + * but it seems reasonable.) + * + * Also, allow callers to override that heuristic and force caching; + * that's useful for reduce_unique_semijoins, which calls here before + * the normal join search starts. + */ + if (force_cache || root->join_search_private) + { + old_context = MemoryContextSwitchTo(root->planner_cxt); + innerrel->non_unique_for_rels = + lappend(innerrel->non_unique_for_rels, + bms_copy(outerrelids)); + MemoryContextSwitchTo(old_context); + } + + return false; + } +} + +/* + * is_innerrel_unique_for + * Check if the innerrel provably contains at most one tuple matching any + * tuple from the outerrel, based on join clauses in the 'restrictlist'. + */ +static bool +is_innerrel_unique_for(PlannerInfo *root, + Relids joinrelids, + Relids outerrelids, + RelOptInfo *innerrel, + JoinType jointype, + List *restrictlist) +{ + List *clause_list = NIL; + ListCell *lc; + + /* + * Search for mergejoinable clauses that constrain the inner rel against + * the outer rel. If an operator is mergejoinable then it behaves like + * equality for some btree opclass, so it's what we want. The + * mergejoinability test also eliminates clauses containing volatile + * functions, which we couldn't depend on. + */ + foreach(lc, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + /* + * As noted above, if it's a pushed-down clause and we're at an outer + * join, we can't use it. + */ + if (IS_OUTER_JOIN(jointype) && + RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)) + continue; + + /* Ignore if it's not a mergejoinable clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause has the form "outer op inner" or "inner op outer", + * and if so mark which side is inner. + */ + if (!clause_sides_match_join(restrictinfo, outerrelids, + innerrel->relids)) + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* Let rel_is_distinct_for() do the hard work */ + return rel_is_distinct_for(root, innerrel, clause_list); +} diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c new file mode 100644 index 0000000..1bc59c9 --- /dev/null +++ b/src/backend/optimizer/plan/createplan.c @@ -0,0 +1,7244 @@ +/*------------------------------------------------------------------------- + * + * createplan.c + * Routines to create the desired plan for processing a query. + * Planning is complete, we just need to convert the selected + * Path into a Plan. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/createplan.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "access/sysattr.h" +#include "catalog/pg_class.h" +#include "foreign/fdwapi.h" +#include "miscadmin.h" +#include "nodes/extensible.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/paramassign.h" +#include "optimizer/paths.h" +#include "optimizer/placeholder.h" +#include "optimizer/plancat.h" +#include "optimizer/planmain.h" +#include "optimizer/prep.h" +#include "optimizer/restrictinfo.h" +#include "optimizer/subselect.h" +#include "optimizer/tlist.h" +#include "parser/parse_clause.h" +#include "parser/parsetree.h" +#include "partitioning/partprune.h" +#include "utils/lsyscache.h" + + +/* + * Flag bits that can appear in the flags argument of create_plan_recurse(). + * These can be OR-ed together. + * + * CP_EXACT_TLIST specifies that the generated plan node must return exactly + * the tlist specified by the path's pathtarget (this overrides both + * CP_SMALL_TLIST and CP_LABEL_TLIST, if those are set). Otherwise, the + * plan node is allowed to return just the Vars and PlaceHolderVars needed + * to evaluate the pathtarget. + * + * CP_SMALL_TLIST specifies that a narrower tlist is preferred. This is + * passed down by parent nodes such as Sort and Hash, which will have to + * store the returned tuples. + * + * CP_LABEL_TLIST specifies that the plan node must return columns matching + * any sortgrouprefs specified in its pathtarget, with appropriate + * ressortgroupref labels. This is passed down by parent nodes such as Sort + * and Group, which need these values to be available in their inputs. + * + * CP_IGNORE_TLIST specifies that the caller plans to replace the targetlist, + * and therefore it doesn't matter a bit what target list gets generated. + */ +#define CP_EXACT_TLIST 0x0001 /* Plan must return specified tlist */ +#define CP_SMALL_TLIST 0x0002 /* Prefer narrower tlists */ +#define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ +#define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ + + +static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, + int flags); +static Plan *create_scan_plan(PlannerInfo *root, Path *best_path, + int flags); +static List *build_path_tlist(PlannerInfo *root, Path *path); +static bool use_physical_tlist(PlannerInfo *root, Path *path, int flags); +static List *get_gating_quals(PlannerInfo *root, List *quals); +static Plan *create_gating_plan(PlannerInfo *root, Path *path, Plan *plan, + List *gating_quals); +static Plan *create_join_plan(PlannerInfo *root, JoinPath *best_path); +static bool mark_async_capable_plan(Plan *plan, Path *path); +static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path, + int flags); +static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, + int flags); +static Result *create_group_result_plan(PlannerInfo *root, + GroupResultPath *best_path); +static ProjectSet *create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path); +static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path, + int flags); +static Memoize *create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, + int flags); +static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path, + int flags); +static Gather *create_gather_plan(PlannerInfo *root, GatherPath *best_path); +static Plan *create_projection_plan(PlannerInfo *root, + ProjectionPath *best_path, + int flags); +static Plan *inject_projection_plan(Plan *subplan, List *tlist, bool parallel_safe); +static Sort *create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags); +static IncrementalSort *create_incrementalsort_plan(PlannerInfo *root, + IncrementalSortPath *best_path, int flags); +static Group *create_group_plan(PlannerInfo *root, GroupPath *best_path); +static Unique *create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, + int flags); +static Agg *create_agg_plan(PlannerInfo *root, AggPath *best_path); +static Plan *create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path); +static Result *create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path); +static WindowAgg *create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path); +static SetOp *create_setop_plan(PlannerInfo *root, SetOpPath *best_path, + int flags); +static RecursiveUnion *create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path); +static LockRows *create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, + int flags); +static ModifyTable *create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path); +static Limit *create_limit_plan(PlannerInfo *root, LimitPath *best_path, + int flags); +static SeqScan *create_seqscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses); +static SampleScan *create_samplescan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses); +static Scan *create_indexscan_plan(PlannerInfo *root, IndexPath *best_path, + List *tlist, List *scan_clauses, bool indexonly); +static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root, + BitmapHeapPath *best_path, + List *tlist, List *scan_clauses); +static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, + List **qual, List **indexqual, List **indexECs); +static void bitmap_subplan_mark_shared(Plan *plan); +static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path, + List *tlist, List *scan_clauses); +static TidRangeScan *create_tidrangescan_plan(PlannerInfo *root, + TidRangePath *best_path, + List *tlist, + List *scan_clauses); +static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root, + SubqueryScanPath *best_path, + List *tlist, List *scan_clauses); +static FunctionScan *create_functionscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses); +static ValuesScan *create_valuesscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses); +static TableFuncScan *create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses); +static CteScan *create_ctescan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses); +static NamedTuplestoreScan *create_namedtuplestorescan_plan(PlannerInfo *root, + Path *best_path, List *tlist, List *scan_clauses); +static Result *create_resultscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses); +static WorkTableScan *create_worktablescan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses); +static ForeignScan *create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, + List *tlist, List *scan_clauses); +static CustomScan *create_customscan_plan(PlannerInfo *root, + CustomPath *best_path, + List *tlist, List *scan_clauses); +static NestLoop *create_nestloop_plan(PlannerInfo *root, NestPath *best_path); +static MergeJoin *create_mergejoin_plan(PlannerInfo *root, MergePath *best_path); +static HashJoin *create_hashjoin_plan(PlannerInfo *root, HashPath *best_path); +static Node *replace_nestloop_params(PlannerInfo *root, Node *expr); +static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root); +static void fix_indexqual_references(PlannerInfo *root, IndexPath *index_path, + List **stripped_indexquals_p, + List **fixed_indexquals_p); +static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path); +static Node *fix_indexqual_clause(PlannerInfo *root, + IndexOptInfo *index, int indexcol, + Node *clause, List *indexcolnos); +static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); +static List *get_switched_clauses(List *clauses, Relids outerrelids); +static List *order_qual_clauses(PlannerInfo *root, List *clauses); +static void copy_generic_path_info(Plan *dest, Path *src); +static void copy_plan_costsize(Plan *dest, Plan *src); +static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, + double limit_tuples); +static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); +static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid, + TableSampleClause *tsc); +static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, + Oid indexid, List *indexqual, List *indexqualorig, + List *indexorderby, List *indexorderbyorig, + List *indexorderbyops, + ScanDirection indexscandir); +static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual, + Index scanrelid, Oid indexid, + List *indexqual, List *recheckqual, + List *indexorderby, + List *indextlist, + ScanDirection indexscandir); +static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid, + List *indexqual, + List *indexqualorig); +static BitmapHeapScan *make_bitmap_heapscan(List *qptlist, + List *qpqual, + Plan *lefttree, + List *bitmapqualorig, + Index scanrelid); +static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid, + List *tidquals); +static TidRangeScan *make_tidrangescan(List *qptlist, List *qpqual, + Index scanrelid, List *tidrangequals); +static SubqueryScan *make_subqueryscan(List *qptlist, + List *qpqual, + Index scanrelid, + Plan *subplan); +static FunctionScan *make_functionscan(List *qptlist, List *qpqual, + Index scanrelid, List *functions, bool funcordinality); +static ValuesScan *make_valuesscan(List *qptlist, List *qpqual, + Index scanrelid, List *values_lists); +static TableFuncScan *make_tablefuncscan(List *qptlist, List *qpqual, + Index scanrelid, TableFunc *tablefunc); +static CteScan *make_ctescan(List *qptlist, List *qpqual, + Index scanrelid, int ctePlanId, int cteParam); +static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual, + Index scanrelid, char *enrname); +static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual, + Index scanrelid, int wtParam); +static RecursiveUnion *make_recursive_union(List *tlist, + Plan *lefttree, + Plan *righttree, + int wtParam, + List *distinctList, + long numGroups); +static BitmapAnd *make_bitmap_and(List *bitmapplans); +static BitmapOr *make_bitmap_or(List *bitmapplans); +static NestLoop *make_nestloop(List *tlist, + List *joinclauses, List *otherclauses, List *nestParams, + Plan *lefttree, Plan *righttree, + JoinType jointype, bool inner_unique); +static HashJoin *make_hashjoin(List *tlist, + List *joinclauses, List *otherclauses, + List *hashclauses, + List *hashoperators, List *hashcollations, + List *hashkeys, + Plan *lefttree, Plan *righttree, + JoinType jointype, bool inner_unique); +static Hash *make_hash(Plan *lefttree, + List *hashkeys, + Oid skewTable, + AttrNumber skewColumn, + bool skewInherit); +static MergeJoin *make_mergejoin(List *tlist, + List *joinclauses, List *otherclauses, + List *mergeclauses, + Oid *mergefamilies, + Oid *mergecollations, + int *mergestrategies, + bool *mergenullsfirst, + Plan *lefttree, Plan *righttree, + JoinType jointype, bool inner_unique, + bool skip_mark_restore); +static Sort *make_sort(Plan *lefttree, int numCols, + AttrNumber *sortColIdx, Oid *sortOperators, + Oid *collations, bool *nullsFirst); +static IncrementalSort *make_incrementalsort(Plan *lefttree, + int numCols, int nPresortedCols, + AttrNumber *sortColIdx, Oid *sortOperators, + Oid *collations, bool *nullsFirst); +static Plan *prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, + Relids relids, + const AttrNumber *reqColIdx, + bool adjust_tlist_in_place, + int *p_numsortkeys, + AttrNumber **p_sortColIdx, + Oid **p_sortOperators, + Oid **p_collations, + bool **p_nullsFirst); +static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys, + Relids relids); +static IncrementalSort *make_incrementalsort_from_pathkeys(Plan *lefttree, + List *pathkeys, Relids relids, int nPresortedCols); +static Sort *make_sort_from_groupcols(List *groupcls, + AttrNumber *grpColIdx, + Plan *lefttree); +static Material *make_material(Plan *lefttree); +static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators, + Oid *collations, List *param_exprs, + bool singlerow, bool binary_mode, + uint32 est_entries, Bitmapset *keyparamids); +static WindowAgg *make_windowagg(List *tlist, Index winref, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, + int frameOptions, Node *startOffset, Node *endOffset, + Oid startInRangeFunc, Oid endInRangeFunc, + Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, + List *runCondition, List *qual, bool topWindow, + Plan *lefttree); +static Group *make_group(List *tlist, List *qual, int numGroupCols, + AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, + Plan *lefttree); +static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList); +static Unique *make_unique_from_pathkeys(Plan *lefttree, + List *pathkeys, int numCols); +static Gather *make_gather(List *qptlist, List *qpqual, + int nworkers, int rescan_param, bool single_copy, Plan *subplan); +static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, + List *distinctList, AttrNumber flagColIdx, int firstFlag, + long numGroups); +static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam); +static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan); +static ProjectSet *make_project_set(List *tlist, Plan *subplan); +static ModifyTable *make_modifytable(PlannerInfo *root, Plan *subplan, + CmdType operation, bool canSetTag, + Index nominalRelation, Index rootRelation, + bool partColsUpdated, + List *resultRelations, + List *updateColnosLists, + List *withCheckOptionLists, List *returningLists, + List *rowMarks, OnConflictExpr *onconflict, + List *mergeActionList, int epqParam); +static GatherMerge *create_gather_merge_plan(PlannerInfo *root, + GatherMergePath *best_path); + + +/* + * create_plan + * Creates the access plan for a query by recursively processing the + * desired tree of pathnodes, starting at the node 'best_path'. For + * every pathnode found, we create a corresponding plan node containing + * appropriate id, target list, and qualification information. + * + * The tlists and quals in the plan tree are still in planner format, + * ie, Vars still correspond to the parser's numbering. This will be + * fixed later by setrefs.c. + * + * best_path is the best access path + * + * Returns a Plan tree. + */ +Plan * +create_plan(PlannerInfo *root, Path *best_path) +{ + Plan *plan; + + /* plan_params should not be in use in current query level */ + Assert(root->plan_params == NIL); + + /* Initialize this module's workspace in PlannerInfo */ + root->curOuterRels = NULL; + root->curOuterParams = NIL; + + /* Recursively process the path tree, demanding the correct tlist result */ + plan = create_plan_recurse(root, best_path, CP_EXACT_TLIST); + + /* + * Make sure the topmost plan node's targetlist exposes the original + * column names and other decorative info. Targetlists generated within + * the planner don't bother with that stuff, but we must have it on the + * top-level tlist seen at execution time. However, ModifyTable plan + * nodes don't have a tlist matching the querytree targetlist. + */ + if (!IsA(plan, ModifyTable)) + apply_tlist_labeling(plan->targetlist, root->processed_tlist); + + /* + * Attach any initPlans created in this query level to the topmost plan + * node. (In principle the initplans could go in any plan node at or + * above where they're referenced, but there seems no reason to put them + * any lower than the topmost node for the query level. Also, see + * comments for SS_finalize_plan before you try to change this.) + */ + SS_attach_initplans(root, plan); + + /* Check we successfully assigned all NestLoopParams to plan nodes */ + if (root->curOuterParams != NIL) + elog(ERROR, "failed to assign all NestLoopParams to plan nodes"); + + /* + * Reset plan_params to ensure param IDs used for nestloop params are not + * re-used later + */ + root->plan_params = NIL; + + return plan; +} + +/* + * create_plan_recurse + * Recursive guts of create_plan(). + */ +static Plan * +create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) +{ + Plan *plan; + + /* Guard against stack overflow due to overly complex plans */ + check_stack_depth(); + + switch (best_path->pathtype) + { + case T_SeqScan: + case T_SampleScan: + case T_IndexScan: + case T_IndexOnlyScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_TidRangeScan: + case T_SubqueryScan: + case T_FunctionScan: + case T_TableFuncScan: + case T_ValuesScan: + case T_CteScan: + case T_WorkTableScan: + case T_NamedTuplestoreScan: + case T_ForeignScan: + case T_CustomScan: + plan = create_scan_plan(root, best_path, flags); + break; + case T_HashJoin: + case T_MergeJoin: + case T_NestLoop: + plan = create_join_plan(root, + (JoinPath *) best_path); + break; + case T_Append: + plan = create_append_plan(root, + (AppendPath *) best_path, + flags); + break; + case T_MergeAppend: + plan = create_merge_append_plan(root, + (MergeAppendPath *) best_path, + flags); + break; + case T_Result: + if (IsA(best_path, ProjectionPath)) + { + plan = create_projection_plan(root, + (ProjectionPath *) best_path, + flags); + } + else if (IsA(best_path, MinMaxAggPath)) + { + plan = (Plan *) create_minmaxagg_plan(root, + (MinMaxAggPath *) best_path); + } + else if (IsA(best_path, GroupResultPath)) + { + plan = (Plan *) create_group_result_plan(root, + (GroupResultPath *) best_path); + } + else + { + /* Simple RTE_RESULT base relation */ + Assert(IsA(best_path, Path)); + plan = create_scan_plan(root, best_path, flags); + } + break; + case T_ProjectSet: + plan = (Plan *) create_project_set_plan(root, + (ProjectSetPath *) best_path); + break; + case T_Material: + plan = (Plan *) create_material_plan(root, + (MaterialPath *) best_path, + flags); + break; + case T_Memoize: + plan = (Plan *) create_memoize_plan(root, + (MemoizePath *) best_path, + flags); + break; + case T_Unique: + if (IsA(best_path, UpperUniquePath)) + { + plan = (Plan *) create_upper_unique_plan(root, + (UpperUniquePath *) best_path, + flags); + } + else + { + Assert(IsA(best_path, UniquePath)); + plan = create_unique_plan(root, + (UniquePath *) best_path, + flags); + } + break; + case T_Gather: + plan = (Plan *) create_gather_plan(root, + (GatherPath *) best_path); + break; + case T_Sort: + plan = (Plan *) create_sort_plan(root, + (SortPath *) best_path, + flags); + break; + case T_IncrementalSort: + plan = (Plan *) create_incrementalsort_plan(root, + (IncrementalSortPath *) best_path, + flags); + break; + case T_Group: + plan = (Plan *) create_group_plan(root, + (GroupPath *) best_path); + break; + case T_Agg: + if (IsA(best_path, GroupingSetsPath)) + plan = create_groupingsets_plan(root, + (GroupingSetsPath *) best_path); + else + { + Assert(IsA(best_path, AggPath)); + plan = (Plan *) create_agg_plan(root, + (AggPath *) best_path); + } + break; + case T_WindowAgg: + plan = (Plan *) create_windowagg_plan(root, + (WindowAggPath *) best_path); + break; + case T_SetOp: + plan = (Plan *) create_setop_plan(root, + (SetOpPath *) best_path, + flags); + break; + case T_RecursiveUnion: + plan = (Plan *) create_recursiveunion_plan(root, + (RecursiveUnionPath *) best_path); + break; + case T_LockRows: + plan = (Plan *) create_lockrows_plan(root, + (LockRowsPath *) best_path, + flags); + break; + case T_ModifyTable: + plan = (Plan *) create_modifytable_plan(root, + (ModifyTablePath *) best_path); + break; + case T_Limit: + plan = (Plan *) create_limit_plan(root, + (LimitPath *) best_path, + flags); + break; + case T_GatherMerge: + plan = (Plan *) create_gather_merge_plan(root, + (GatherMergePath *) best_path); + break; + default: + elog(ERROR, "unrecognized node type: %d", + (int) best_path->pathtype); + plan = NULL; /* keep compiler quiet */ + break; + } + + return plan; +} + +/* + * create_scan_plan + * Create a scan plan for the parent relation of 'best_path'. + */ +static Plan * +create_scan_plan(PlannerInfo *root, Path *best_path, int flags) +{ + RelOptInfo *rel = best_path->parent; + List *scan_clauses; + List *gating_clauses; + List *tlist; + Plan *plan; + + /* + * Extract the relevant restriction clauses from the parent relation. The + * executor must apply all these restrictions during the scan, except for + * pseudoconstants which we'll take care of below. + * + * If this is a plain indexscan or index-only scan, we need not consider + * restriction clauses that are implied by the index's predicate, so use + * indrestrictinfo not baserestrictinfo. Note that we can't do that for + * bitmap indexscans, since there's not necessarily a single index + * involved; but it doesn't matter since create_bitmap_scan_plan() will be + * able to get rid of such clauses anyway via predicate proof. + */ + switch (best_path->pathtype) + { + case T_IndexScan: + case T_IndexOnlyScan: + scan_clauses = castNode(IndexPath, best_path)->indexinfo->indrestrictinfo; + break; + default: + scan_clauses = rel->baserestrictinfo; + break; + } + + /* + * If this is a parameterized scan, we also need to enforce all the join + * clauses available from the outer relation(s). + * + * For paranoia's sake, don't modify the stored baserestrictinfo list. + */ + if (best_path->param_info) + scan_clauses = list_concat_copy(scan_clauses, + best_path->param_info->ppi_clauses); + + /* + * Detect whether we have any pseudoconstant quals to deal with. Then, if + * we'll need a gating Result node, it will be able to project, so there + * are no requirements on the child's tlist. + */ + gating_clauses = get_gating_quals(root, scan_clauses); + if (gating_clauses) + flags = 0; + + /* + * For table scans, rather than using the relation targetlist (which is + * only those Vars actually needed by the query), we prefer to generate a + * tlist containing all Vars in order. This will allow the executor to + * optimize away projection of the table tuples, if possible. + * + * But if the caller is going to ignore our tlist anyway, then don't + * bother generating one at all. We use an exact equality test here, so + * that this only applies when CP_IGNORE_TLIST is the only flag set. + */ + if (flags == CP_IGNORE_TLIST) + { + tlist = NULL; + } + else if (use_physical_tlist(root, best_path, flags)) + { + if (best_path->pathtype == T_IndexOnlyScan) + { + /* For index-only scan, the preferred tlist is the index's */ + tlist = copyObject(((IndexPath *) best_path)->indexinfo->indextlist); + + /* + * Transfer sortgroupref data to the replacement tlist, if + * requested (use_physical_tlist checked that this will work). + */ + if (flags & CP_LABEL_TLIST) + apply_pathtarget_labeling_to_tlist(tlist, best_path->pathtarget); + } + else + { + tlist = build_physical_tlist(root, rel); + if (tlist == NIL) + { + /* Failed because of dropped cols, so use regular method */ + tlist = build_path_tlist(root, best_path); + } + else + { + /* As above, transfer sortgroupref data to replacement tlist */ + if (flags & CP_LABEL_TLIST) + apply_pathtarget_labeling_to_tlist(tlist, best_path->pathtarget); + } + } + } + else + { + tlist = build_path_tlist(root, best_path); + } + + switch (best_path->pathtype) + { + case T_SeqScan: + plan = (Plan *) create_seqscan_plan(root, + best_path, + tlist, + scan_clauses); + break; + + case T_SampleScan: + plan = (Plan *) create_samplescan_plan(root, + best_path, + tlist, + scan_clauses); + break; + + case T_IndexScan: + plan = (Plan *) create_indexscan_plan(root, + (IndexPath *) best_path, + tlist, + scan_clauses, + false); + break; + + case T_IndexOnlyScan: + plan = (Plan *) create_indexscan_plan(root, + (IndexPath *) best_path, + tlist, + scan_clauses, + true); + break; + + case T_BitmapHeapScan: + plan = (Plan *) create_bitmap_scan_plan(root, + (BitmapHeapPath *) best_path, + tlist, + scan_clauses); + break; + + case T_TidScan: + plan = (Plan *) create_tidscan_plan(root, + (TidPath *) best_path, + tlist, + scan_clauses); + break; + + case T_TidRangeScan: + plan = (Plan *) create_tidrangescan_plan(root, + (TidRangePath *) best_path, + tlist, + scan_clauses); + break; + + case T_SubqueryScan: + plan = (Plan *) create_subqueryscan_plan(root, + (SubqueryScanPath *) best_path, + tlist, + scan_clauses); + break; + + case T_FunctionScan: + plan = (Plan *) create_functionscan_plan(root, + best_path, + tlist, + scan_clauses); + break; + + case T_TableFuncScan: + plan = (Plan *) create_tablefuncscan_plan(root, + best_path, + tlist, + scan_clauses); + break; + + case T_ValuesScan: + plan = (Plan *) create_valuesscan_plan(root, + best_path, + tlist, + scan_clauses); + break; + + case T_CteScan: + plan = (Plan *) create_ctescan_plan(root, + best_path, + tlist, + scan_clauses); + break; + + case T_NamedTuplestoreScan: + plan = (Plan *) create_namedtuplestorescan_plan(root, + best_path, + tlist, + scan_clauses); + break; + + case T_Result: + plan = (Plan *) create_resultscan_plan(root, + best_path, + tlist, + scan_clauses); + break; + + case T_WorkTableScan: + plan = (Plan *) create_worktablescan_plan(root, + best_path, + tlist, + scan_clauses); + break; + + case T_ForeignScan: + plan = (Plan *) create_foreignscan_plan(root, + (ForeignPath *) best_path, + tlist, + scan_clauses); + break; + + case T_CustomScan: + plan = (Plan *) create_customscan_plan(root, + (CustomPath *) best_path, + tlist, + scan_clauses); + break; + + default: + elog(ERROR, "unrecognized node type: %d", + (int) best_path->pathtype); + plan = NULL; /* keep compiler quiet */ + break; + } + + /* + * If there are any pseudoconstant clauses attached to this node, insert a + * gating Result node that evaluates the pseudoconstants as one-time + * quals. + */ + if (gating_clauses) + plan = create_gating_plan(root, best_path, plan, gating_clauses); + + return plan; +} + +/* + * Build a target list (ie, a list of TargetEntry) for the Path's output. + * + * This is almost just make_tlist_from_pathtarget(), but we also have to + * deal with replacing nestloop params. + */ +static List * +build_path_tlist(PlannerInfo *root, Path *path) +{ + List *tlist = NIL; + Index *sortgrouprefs = path->pathtarget->sortgrouprefs; + int resno = 1; + ListCell *v; + + foreach(v, path->pathtarget->exprs) + { + Node *node = (Node *) lfirst(v); + TargetEntry *tle; + + /* + * If it's a parameterized path, there might be lateral references in + * the tlist, which need to be replaced with Params. There's no need + * to remake the TargetEntry nodes, so apply this to each list item + * separately. + */ + if (path->param_info) + node = replace_nestloop_params(root, node); + + tle = makeTargetEntry((Expr *) node, + resno, + NULL, + false); + if (sortgrouprefs) + tle->ressortgroupref = sortgrouprefs[resno - 1]; + + tlist = lappend(tlist, tle); + resno++; + } + return tlist; +} + +/* + * use_physical_tlist + * Decide whether to use a tlist matching relation structure, + * rather than only those Vars actually referenced. + */ +static bool +use_physical_tlist(PlannerInfo *root, Path *path, int flags) +{ + RelOptInfo *rel = path->parent; + int i; + ListCell *lc; + + /* + * Forget it if either exact tlist or small tlist is demanded. + */ + if (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST)) + return false; + + /* + * We can do this for real relation scans, subquery scans, function scans, + * tablefunc scans, values scans, and CTE scans (but not for, eg, joins). + */ + if (rel->rtekind != RTE_RELATION && + rel->rtekind != RTE_SUBQUERY && + rel->rtekind != RTE_FUNCTION && + rel->rtekind != RTE_TABLEFUNC && + rel->rtekind != RTE_VALUES && + rel->rtekind != RTE_CTE) + return false; + + /* + * Can't do it with inheritance cases either (mainly because Append + * doesn't project; this test may be unnecessary now that + * create_append_plan instructs its children to return an exact tlist). + */ + if (rel->reloptkind != RELOPT_BASEREL) + return false; + + /* + * Also, don't do it to a CustomPath; the premise that we're extracting + * columns from a simple physical tuple is unlikely to hold for those. + * (When it does make sense, the custom path creator can set up the path's + * pathtarget that way.) + */ + if (IsA(path, CustomPath)) + return false; + + /* + * If a bitmap scan's tlist is empty, keep it as-is. This may allow the + * executor to skip heap page fetches, and in any case, the benefit of + * using a physical tlist instead would be minimal. + */ + if (IsA(path, BitmapHeapPath) && + path->pathtarget->exprs == NIL) + return false; + + /* + * Can't do it if any system columns or whole-row Vars are requested. + * (This could possibly be fixed but would take some fragile assumptions + * in setrefs.c, I think.) + */ + for (i = rel->min_attr; i <= 0; i++) + { + if (!bms_is_empty(rel->attr_needed[i - rel->min_attr])) + return false; + } + + /* + * Can't do it if the rel is required to emit any placeholder expressions, + * either. + */ + foreach(lc, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); + + if (bms_nonempty_difference(phinfo->ph_needed, rel->relids) && + bms_is_subset(phinfo->ph_eval_at, rel->relids)) + return false; + } + + /* + * For an index-only scan, the "physical tlist" is the index's indextlist. + * We can only return that without a projection if all the index's columns + * are returnable. + */ + if (path->pathtype == T_IndexOnlyScan) + { + IndexOptInfo *indexinfo = ((IndexPath *) path)->indexinfo; + + for (i = 0; i < indexinfo->ncolumns; i++) + { + if (!indexinfo->canreturn[i]) + return false; + } + } + + /* + * Also, can't do it if CP_LABEL_TLIST is specified and path is requested + * to emit any sort/group columns that are not simple Vars. (If they are + * simple Vars, they should appear in the physical tlist, and + * apply_pathtarget_labeling_to_tlist will take care of getting them + * labeled again.) We also have to check that no two sort/group columns + * are the same Var, else that element of the physical tlist would need + * conflicting ressortgroupref labels. + */ + if ((flags & CP_LABEL_TLIST) && path->pathtarget->sortgrouprefs) + { + Bitmapset *sortgroupatts = NULL; + + i = 0; + foreach(lc, path->pathtarget->exprs) + { + Expr *expr = (Expr *) lfirst(lc); + + if (path->pathtarget->sortgrouprefs[i]) + { + if (expr && IsA(expr, Var)) + { + int attno = ((Var *) expr)->varattno; + + attno -= FirstLowInvalidHeapAttributeNumber; + if (bms_is_member(attno, sortgroupatts)) + return false; + sortgroupatts = bms_add_member(sortgroupatts, attno); + } + else + return false; + } + i++; + } + } + + return true; +} + +/* + * get_gating_quals + * See if there are pseudoconstant quals in a node's quals list + * + * If the node's quals list includes any pseudoconstant quals, + * return just those quals. + */ +static List * +get_gating_quals(PlannerInfo *root, List *quals) +{ + /* No need to look if we know there are no pseudoconstants */ + if (!root->hasPseudoConstantQuals) + return NIL; + + /* Sort into desirable execution order while still in RestrictInfo form */ + quals = order_qual_clauses(root, quals); + + /* Pull out any pseudoconstant quals from the RestrictInfo list */ + return extract_actual_clauses(quals, true); +} + +/* + * create_gating_plan + * Deal with pseudoconstant qual clauses + * + * Add a gating Result node atop the already-built plan. + */ +static Plan * +create_gating_plan(PlannerInfo *root, Path *path, Plan *plan, + List *gating_quals) +{ + Plan *gplan; + Plan *splan; + + Assert(gating_quals); + + /* + * We might have a trivial Result plan already. Stacking one Result atop + * another is silly, so if that applies, just discard the input plan. + * (We're assuming its targetlist is uninteresting; it should be either + * the same as the result of build_path_tlist, or a simplified version.) + */ + splan = plan; + if (IsA(plan, Result)) + { + Result *rplan = (Result *) plan; + + if (rplan->plan.lefttree == NULL && + rplan->resconstantqual == NULL) + splan = NULL; + } + + /* + * Since we need a Result node anyway, always return the path's requested + * tlist; that's never a wrong choice, even if the parent node didn't ask + * for CP_EXACT_TLIST. + */ + gplan = (Plan *) make_result(build_path_tlist(root, path), + (Node *) gating_quals, + splan); + + /* + * Notice that we don't change cost or size estimates when doing gating. + * The costs of qual eval were already included in the subplan's cost. + * Leaving the size alone amounts to assuming that the gating qual will + * succeed, which is the conservative estimate for planning upper queries. + * We certainly don't want to assume the output size is zero (unless the + * gating qual is actually constant FALSE, and that case is dealt with in + * clausesel.c). Interpolating between the two cases is silly, because it + * doesn't reflect what will really happen at runtime, and besides which + * in most cases we have only a very bad idea of the probability of the + * gating qual being true. + */ + copy_plan_costsize(gplan, plan); + + /* Gating quals could be unsafe, so better use the Path's safety flag */ + gplan->parallel_safe = path->parallel_safe; + + return gplan; +} + +/* + * create_join_plan + * Create a join plan for 'best_path' and (recursively) plans for its + * inner and outer paths. + */ +static Plan * +create_join_plan(PlannerInfo *root, JoinPath *best_path) +{ + Plan *plan; + List *gating_clauses; + + switch (best_path->path.pathtype) + { + case T_MergeJoin: + plan = (Plan *) create_mergejoin_plan(root, + (MergePath *) best_path); + break; + case T_HashJoin: + plan = (Plan *) create_hashjoin_plan(root, + (HashPath *) best_path); + break; + case T_NestLoop: + plan = (Plan *) create_nestloop_plan(root, + (NestPath *) best_path); + break; + default: + elog(ERROR, "unrecognized node type: %d", + (int) best_path->path.pathtype); + plan = NULL; /* keep compiler quiet */ + break; + } + + /* + * If there are any pseudoconstant clauses attached to this node, insert a + * gating Result node that evaluates the pseudoconstants as one-time + * quals. + */ + gating_clauses = get_gating_quals(root, best_path->joinrestrictinfo); + if (gating_clauses) + plan = create_gating_plan(root, (Path *) best_path, plan, + gating_clauses); + +#ifdef NOT_USED + + /* + * * Expensive function pullups may have pulled local predicates * into + * this path node. Put them in the qpqual of the plan node. * JMH, + * 6/15/92 + */ + if (get_loc_restrictinfo(best_path) != NIL) + set_qpqual((Plan) plan, + list_concat(get_qpqual((Plan) plan), + get_actual_clauses(get_loc_restrictinfo(best_path)))); +#endif + + return plan; +} + +/* + * mark_async_capable_plan + * Check whether the Plan node created from a Path node is async-capable, + * and if so, mark the Plan node as such and return true, otherwise + * return false. + */ +static bool +mark_async_capable_plan(Plan *plan, Path *path) +{ + switch (nodeTag(path)) + { + case T_SubqueryScanPath: + { + SubqueryScan *scan_plan = (SubqueryScan *) plan; + + /* + * If the generated plan node includes a gating Result node, + * we can't execute it asynchronously. + */ + if (IsA(plan, Result)) + return false; + + /* + * If a SubqueryScan node atop of an async-capable plan node + * is deletable, consider it as async-capable. + */ + if (trivial_subqueryscan(scan_plan) && + mark_async_capable_plan(scan_plan->subplan, + ((SubqueryScanPath *) path)->subpath)) + break; + return false; + } + case T_ForeignPath: + { + FdwRoutine *fdwroutine = path->parent->fdwroutine; + + /* + * If the generated plan node includes a gating Result node, + * we can't execute it asynchronously. + */ + if (IsA(plan, Result)) + return false; + + Assert(fdwroutine != NULL); + if (fdwroutine->IsForeignPathAsyncCapable != NULL && + fdwroutine->IsForeignPathAsyncCapable((ForeignPath *) path)) + break; + return false; + } + case T_ProjectionPath: + + /* + * If the generated plan node includes a Result node for the + * projection, we can't execute it asynchronously. + */ + if (IsA(plan, Result)) + return false; + + /* + * create_projection_plan() would have pulled up the subplan, so + * check the capability using the subpath. + */ + if (mark_async_capable_plan(plan, + ((ProjectionPath *) path)->subpath)) + return true; + return false; + default: + return false; + } + + plan->async_capable = true; + + return true; +} + +/* + * create_append_plan + * Create an Append plan for 'best_path' and (recursively) plans + * for its subpaths. + * + * Returns a Plan node. + */ +static Plan * +create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) +{ + Append *plan; + List *tlist = build_path_tlist(root, &best_path->path); + int orig_tlist_length = list_length(tlist); + bool tlist_was_changed = false; + List *pathkeys = best_path->path.pathkeys; + List *subplans = NIL; + ListCell *subpaths; + int nasyncplans = 0; + RelOptInfo *rel = best_path->path.parent; + PartitionPruneInfo *partpruneinfo = NULL; + int nodenumsortkeys = 0; + AttrNumber *nodeSortColIdx = NULL; + Oid *nodeSortOperators = NULL; + Oid *nodeCollations = NULL; + bool *nodeNullsFirst = NULL; + bool consider_async = false; + + /* + * The subpaths list could be empty, if every child was proven empty by + * constraint exclusion. In that case generate a dummy plan that returns + * no rows. + * + * Note that an AppendPath with no members is also generated in certain + * cases where there was no appending construct at all, but we know the + * relation is empty (see set_dummy_rel_pathlist and mark_dummy_rel). + */ + if (best_path->subpaths == NIL) + { + /* Generate a Result plan with constant-FALSE gating qual */ + Plan *plan; + + plan = (Plan *) make_result(tlist, + (Node *) list_make1(makeBoolConst(false, + false)), + NULL); + + copy_generic_path_info(plan, (Path *) best_path); + + return plan; + } + + /* + * Otherwise build an Append plan. Note that if there's just one child, + * the Append is pretty useless; but we wait till setrefs.c to get rid of + * it. Doing so here doesn't work because the varno of the child scan + * plan won't match the parent-rel Vars it'll be asked to emit. + * + * We don't have the actual creation of the Append node split out into a + * separate make_xxx function. This is because we want to run + * prepare_sort_from_pathkeys on it before we do so on the individual + * child plans, to make cross-checking the sort info easier. + */ + plan = makeNode(Append); + plan->plan.targetlist = tlist; + plan->plan.qual = NIL; + plan->plan.lefttree = NULL; + plan->plan.righttree = NULL; + plan->apprelids = rel->relids; + + if (pathkeys != NIL) + { + /* + * Compute sort column info, and adjust the Append's tlist as needed. + * Because we pass adjust_tlist_in_place = true, we may ignore the + * function result; it must be the same plan node. However, we then + * need to detect whether any tlist entries were added. + */ + (void) prepare_sort_from_pathkeys((Plan *) plan, pathkeys, + best_path->path.parent->relids, + NULL, + true, + &nodenumsortkeys, + &nodeSortColIdx, + &nodeSortOperators, + &nodeCollations, + &nodeNullsFirst); + tlist_was_changed = (orig_tlist_length != list_length(plan->plan.targetlist)); + } + + /* If appropriate, consider async append */ + consider_async = (enable_async_append && pathkeys == NIL && + !best_path->path.parallel_safe && + list_length(best_path->subpaths) > 1); + + /* Build the plan for each child */ + foreach(subpaths, best_path->subpaths) + { + Path *subpath = (Path *) lfirst(subpaths); + Plan *subplan; + + /* Must insist that all children return the same tlist */ + subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST); + + /* + * For ordered Appends, we must insert a Sort node if subplan isn't + * sufficiently ordered. + */ + if (pathkeys != NIL) + { + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *collations; + bool *nullsFirst; + + /* + * Compute sort column info, and adjust subplan's tlist as needed. + * We must apply prepare_sort_from_pathkeys even to subplans that + * don't need an explicit sort, to make sure they are returning + * the same sort key columns the Append expects. + */ + subplan = prepare_sort_from_pathkeys(subplan, pathkeys, + subpath->parent->relids, + nodeSortColIdx, + false, + &numsortkeys, + &sortColIdx, + &sortOperators, + &collations, + &nullsFirst); + + /* + * Check that we got the same sort key information. We just + * Assert that the sortops match, since those depend only on the + * pathkeys; but it seems like a good idea to check the sort + * column numbers explicitly, to ensure the tlists match up. + */ + Assert(numsortkeys == nodenumsortkeys); + if (memcmp(sortColIdx, nodeSortColIdx, + numsortkeys * sizeof(AttrNumber)) != 0) + elog(ERROR, "Append child's targetlist doesn't match Append"); + Assert(memcmp(sortOperators, nodeSortOperators, + numsortkeys * sizeof(Oid)) == 0); + Assert(memcmp(collations, nodeCollations, + numsortkeys * sizeof(Oid)) == 0); + Assert(memcmp(nullsFirst, nodeNullsFirst, + numsortkeys * sizeof(bool)) == 0); + + /* Now, insert a Sort node if subplan isn't sufficiently ordered */ + if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + Sort *sort = make_sort(subplan, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); + + label_sort_with_costsize(root, sort, best_path->limit_tuples); + subplan = (Plan *) sort; + } + } + + /* If needed, check to see if subplan can be executed asynchronously */ + if (consider_async && mark_async_capable_plan(subplan, subpath)) + { + Assert(subplan->async_capable); + ++nasyncplans; + } + + subplans = lappend(subplans, subplan); + } + + /* + * If any quals exist, they may be useful to perform further partition + * pruning during execution. Gather information needed by the executor to + * do partition pruning. + */ + if (enable_partition_pruning) + { + List *prunequal; + + prunequal = extract_actual_clauses(rel->baserestrictinfo, false); + + if (best_path->path.param_info) + { + List *prmquals = best_path->path.param_info->ppi_clauses; + + prmquals = extract_actual_clauses(prmquals, false); + prmquals = (List *) replace_nestloop_params(root, + (Node *) prmquals); + + prunequal = list_concat(prunequal, prmquals); + } + + if (prunequal != NIL) + partpruneinfo = + make_partition_pruneinfo(root, rel, + best_path->subpaths, + prunequal); + } + + plan->appendplans = subplans; + plan->nasyncplans = nasyncplans; + plan->first_partial_plan = best_path->first_partial_path; + plan->part_prune_info = partpruneinfo; + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + /* + * If prepare_sort_from_pathkeys added sort columns, but we were told to + * produce either the exact tlist or a narrow tlist, we should get rid of + * the sort columns again. We must inject a projection node to do so. + */ + if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST))) + { + tlist = list_truncate(list_copy(plan->plan.targetlist), + orig_tlist_length); + return inject_projection_plan((Plan *) plan, tlist, + plan->plan.parallel_safe); + } + else + return (Plan *) plan; +} + +/* + * create_merge_append_plan + * Create a MergeAppend plan for 'best_path' and (recursively) plans + * for its subpaths. + * + * Returns a Plan node. + */ +static Plan * +create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, + int flags) +{ + MergeAppend *node = makeNode(MergeAppend); + Plan *plan = &node->plan; + List *tlist = build_path_tlist(root, &best_path->path); + int orig_tlist_length = list_length(tlist); + bool tlist_was_changed; + List *pathkeys = best_path->path.pathkeys; + List *subplans = NIL; + ListCell *subpaths; + RelOptInfo *rel = best_path->path.parent; + PartitionPruneInfo *partpruneinfo = NULL; + + /* + * We don't have the actual creation of the MergeAppend node split out + * into a separate make_xxx function. This is because we want to run + * prepare_sort_from_pathkeys on it before we do so on the individual + * child plans, to make cross-checking the sort info easier. + */ + copy_generic_path_info(plan, (Path *) best_path); + plan->targetlist = tlist; + plan->qual = NIL; + plan->lefttree = NULL; + plan->righttree = NULL; + node->apprelids = rel->relids; + + /* + * Compute sort column info, and adjust MergeAppend's tlist as needed. + * Because we pass adjust_tlist_in_place = true, we may ignore the + * function result; it must be the same plan node. However, we then need + * to detect whether any tlist entries were added. + */ + (void) prepare_sort_from_pathkeys(plan, pathkeys, + best_path->path.parent->relids, + NULL, + true, + &node->numCols, + &node->sortColIdx, + &node->sortOperators, + &node->collations, + &node->nullsFirst); + tlist_was_changed = (orig_tlist_length != list_length(plan->targetlist)); + + /* + * Now prepare the child plans. We must apply prepare_sort_from_pathkeys + * even to subplans that don't need an explicit sort, to make sure they + * are returning the same sort key columns the MergeAppend expects. + */ + foreach(subpaths, best_path->subpaths) + { + Path *subpath = (Path *) lfirst(subpaths); + Plan *subplan; + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *collations; + bool *nullsFirst; + + /* Build the child plan */ + /* Must insist that all children return the same tlist */ + subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST); + + /* Compute sort column info, and adjust subplan's tlist as needed */ + subplan = prepare_sort_from_pathkeys(subplan, pathkeys, + subpath->parent->relids, + node->sortColIdx, + false, + &numsortkeys, + &sortColIdx, + &sortOperators, + &collations, + &nullsFirst); + + /* + * Check that we got the same sort key information. We just Assert + * that the sortops match, since those depend only on the pathkeys; + * but it seems like a good idea to check the sort column numbers + * explicitly, to ensure the tlists really do match up. + */ + Assert(numsortkeys == node->numCols); + if (memcmp(sortColIdx, node->sortColIdx, + numsortkeys * sizeof(AttrNumber)) != 0) + elog(ERROR, "MergeAppend child's targetlist doesn't match MergeAppend"); + Assert(memcmp(sortOperators, node->sortOperators, + numsortkeys * sizeof(Oid)) == 0); + Assert(memcmp(collations, node->collations, + numsortkeys * sizeof(Oid)) == 0); + Assert(memcmp(nullsFirst, node->nullsFirst, + numsortkeys * sizeof(bool)) == 0); + + /* Now, insert a Sort node if subplan isn't sufficiently ordered */ + if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + Sort *sort = make_sort(subplan, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); + + label_sort_with_costsize(root, sort, best_path->limit_tuples); + subplan = (Plan *) sort; + } + + subplans = lappend(subplans, subplan); + } + + /* + * If any quals exist, they may be useful to perform further partition + * pruning during execution. Gather information needed by the executor to + * do partition pruning. + */ + if (enable_partition_pruning) + { + List *prunequal; + + prunequal = extract_actual_clauses(rel->baserestrictinfo, false); + + if (best_path->path.param_info) + { + List *prmquals = best_path->path.param_info->ppi_clauses; + + prmquals = extract_actual_clauses(prmquals, false); + prmquals = (List *) replace_nestloop_params(root, + (Node *) prmquals); + + prunequal = list_concat(prunequal, prmquals); + } + + if (prunequal != NIL) + partpruneinfo = make_partition_pruneinfo(root, rel, + best_path->subpaths, + prunequal); + } + + node->mergeplans = subplans; + node->part_prune_info = partpruneinfo; + + /* + * If prepare_sort_from_pathkeys added sort columns, but we were told to + * produce either the exact tlist or a narrow tlist, we should get rid of + * the sort columns again. We must inject a projection node to do so. + */ + if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST))) + { + tlist = list_truncate(list_copy(plan->targetlist), orig_tlist_length); + return inject_projection_plan(plan, tlist, plan->parallel_safe); + } + else + return plan; +} + +/* + * create_group_result_plan + * Create a Result plan for 'best_path'. + * This is only used for degenerate grouping cases. + * + * Returns a Plan node. + */ +static Result * +create_group_result_plan(PlannerInfo *root, GroupResultPath *best_path) +{ + Result *plan; + List *tlist; + List *quals; + + tlist = build_path_tlist(root, &best_path->path); + + /* best_path->quals is just bare clauses */ + quals = order_qual_clauses(root, best_path->quals); + + plan = make_result(tlist, (Node *) quals, NULL); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * create_project_set_plan + * Create a ProjectSet plan for 'best_path'. + * + * Returns a Plan node. + */ +static ProjectSet * +create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) +{ + ProjectSet *plan; + Plan *subplan; + List *tlist; + + /* Since we intend to project, we don't need to constrain child tlist */ + subplan = create_plan_recurse(root, best_path->subpath, 0); + + tlist = build_path_tlist(root, &best_path->path); + + plan = make_project_set(tlist, subplan); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * create_material_plan + * Create a Material plan for 'best_path' and (recursively) plans + * for its subpaths. + * + * Returns a Plan node. + */ +static Material * +create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) +{ + Material *plan; + Plan *subplan; + + /* + * We don't want any excess columns in the materialized tuples, so request + * a smaller tlist. Otherwise, since Material doesn't project, tlist + * requirements pass through. + */ + subplan = create_plan_recurse(root, best_path->subpath, + flags | CP_SMALL_TLIST); + + plan = make_material(subplan); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * create_memoize_plan + * Create a Memoize plan for 'best_path' and (recursively) plans for its + * subpaths. + * + * Returns a Plan node. + */ +static Memoize * +create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags) +{ + Memoize *plan; + Bitmapset *keyparamids; + Plan *subplan; + Oid *operators; + Oid *collations; + List *param_exprs = NIL; + ListCell *lc; + ListCell *lc2; + int nkeys; + int i; + + subplan = create_plan_recurse(root, best_path->subpath, + flags | CP_SMALL_TLIST); + + param_exprs = (List *) replace_nestloop_params(root, (Node *) + best_path->param_exprs); + + nkeys = list_length(param_exprs); + Assert(nkeys > 0); + operators = palloc(nkeys * sizeof(Oid)); + collations = palloc(nkeys * sizeof(Oid)); + + i = 0; + forboth(lc, param_exprs, lc2, best_path->hash_operators) + { + Expr *param_expr = (Expr *) lfirst(lc); + Oid opno = lfirst_oid(lc2); + + operators[i] = opno; + collations[i] = exprCollation((Node *) param_expr); + i++; + } + + keyparamids = pull_paramids((Expr *) param_exprs); + + plan = make_memoize(subplan, operators, collations, param_exprs, + best_path->singlerow, best_path->binary_mode, + best_path->est_entries, keyparamids); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * create_unique_plan + * Create a Unique plan for 'best_path' and (recursively) plans + * for its subpaths. + * + * Returns a Plan node. + */ +static Plan * +create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) +{ + Plan *plan; + Plan *subplan; + List *in_operators; + List *uniq_exprs; + List *newtlist; + int nextresno; + bool newitems; + int numGroupCols; + AttrNumber *groupColIdx; + Oid *groupCollations; + int groupColPos; + ListCell *l; + + /* Unique doesn't project, so tlist requirements pass through */ + subplan = create_plan_recurse(root, best_path->subpath, flags); + + /* Done if we don't need to do any actual unique-ifying */ + if (best_path->umethod == UNIQUE_PATH_NOOP) + return subplan; + + /* + * As constructed, the subplan has a "flat" tlist containing just the Vars + * needed here and at upper levels. The values we are supposed to + * unique-ify may be expressions in these variables. We have to add any + * such expressions to the subplan's tlist. + * + * The subplan may have a "physical" tlist if it is a simple scan plan. If + * we're going to sort, this should be reduced to the regular tlist, so + * that we don't sort more data than we need to. For hashing, the tlist + * should be left as-is if we don't need to add any expressions; but if we + * do have to add expressions, then a projection step will be needed at + * runtime anyway, so we may as well remove unneeded items. Therefore + * newtlist starts from build_path_tlist() not just a copy of the + * subplan's tlist; and we don't install it into the subplan unless we are + * sorting or stuff has to be added. + */ + in_operators = best_path->in_operators; + uniq_exprs = best_path->uniq_exprs; + + /* initialize modified subplan tlist as just the "required" vars */ + newtlist = build_path_tlist(root, &best_path->path); + nextresno = list_length(newtlist) + 1; + newitems = false; + + foreach(l, uniq_exprs) + { + Expr *uniqexpr = lfirst(l); + TargetEntry *tle; + + tle = tlist_member(uniqexpr, newtlist); + if (!tle) + { + tle = makeTargetEntry((Expr *) uniqexpr, + nextresno, + NULL, + false); + newtlist = lappend(newtlist, tle); + nextresno++; + newitems = true; + } + } + + /* Use change_plan_targetlist in case we need to insert a Result node */ + if (newitems || best_path->umethod == UNIQUE_PATH_SORT) + subplan = change_plan_targetlist(subplan, newtlist, + best_path->path.parallel_safe); + + /* + * Build control information showing which subplan output columns are to + * be examined by the grouping step. Unfortunately we can't merge this + * with the previous loop, since we didn't then know which version of the + * subplan tlist we'd end up using. + */ + newtlist = subplan->targetlist; + numGroupCols = list_length(uniq_exprs); + groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber)); + groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid)); + + groupColPos = 0; + foreach(l, uniq_exprs) + { + Expr *uniqexpr = lfirst(l); + TargetEntry *tle; + + tle = tlist_member(uniqexpr, newtlist); + if (!tle) /* shouldn't happen */ + elog(ERROR, "failed to find unique expression in subplan tlist"); + groupColIdx[groupColPos] = tle->resno; + groupCollations[groupColPos] = exprCollation((Node *) tle->expr); + groupColPos++; + } + + if (best_path->umethod == UNIQUE_PATH_HASH) + { + Oid *groupOperators; + + /* + * Get the hashable equality operators for the Agg node to use. + * Normally these are the same as the IN clause operators, but if + * those are cross-type operators then the equality operators are the + * ones for the IN clause operators' RHS datatype. + */ + groupOperators = (Oid *) palloc(numGroupCols * sizeof(Oid)); + groupColPos = 0; + foreach(l, in_operators) + { + Oid in_oper = lfirst_oid(l); + Oid eq_oper; + + if (!get_compatible_hash_operators(in_oper, NULL, &eq_oper)) + elog(ERROR, "could not find compatible hash operator for operator %u", + in_oper); + groupOperators[groupColPos++] = eq_oper; + } + + /* + * Since the Agg node is going to project anyway, we can give it the + * minimum output tlist, without any stuff we might have added to the + * subplan tlist. + */ + plan = (Plan *) make_agg(build_path_tlist(root, &best_path->path), + NIL, + AGG_HASHED, + AGGSPLIT_SIMPLE, + numGroupCols, + groupColIdx, + groupOperators, + groupCollations, + NIL, + NIL, + best_path->path.rows, + 0, + subplan); + } + else + { + List *sortList = NIL; + Sort *sort; + + /* Create an ORDER BY list to sort the input compatibly */ + groupColPos = 0; + foreach(l, in_operators) + { + Oid in_oper = lfirst_oid(l); + Oid sortop; + Oid eqop; + TargetEntry *tle; + SortGroupClause *sortcl; + + sortop = get_ordering_op_for_equality_op(in_oper, false); + if (!OidIsValid(sortop)) /* shouldn't happen */ + elog(ERROR, "could not find ordering operator for equality operator %u", + in_oper); + + /* + * The Unique node will need equality operators. Normally these + * are the same as the IN clause operators, but if those are + * cross-type operators then the equality operators are the ones + * for the IN clause operators' RHS datatype. + */ + eqop = get_equality_op_for_ordering_op(sortop, NULL); + if (!OidIsValid(eqop)) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + sortop); + + tle = get_tle_by_resno(subplan->targetlist, + groupColIdx[groupColPos]); + Assert(tle != NULL); + + sortcl = makeNode(SortGroupClause); + sortcl->tleSortGroupRef = assignSortGroupRef(tle, + subplan->targetlist); + sortcl->eqop = eqop; + sortcl->sortop = sortop; + sortcl->nulls_first = false; + sortcl->hashable = false; /* no need to make this accurate */ + sortList = lappend(sortList, sortcl); + groupColPos++; + } + sort = make_sort_from_sortclauses(sortList, subplan); + label_sort_with_costsize(root, sort, -1.0); + plan = (Plan *) make_unique_from_sortclauses((Plan *) sort, sortList); + } + + /* Copy cost data from Path to Plan */ + copy_generic_path_info(plan, &best_path->path); + + return plan; +} + +/* + * create_gather_plan + * + * Create a Gather plan for 'best_path' and (recursively) plans + * for its subpaths. + */ +static Gather * +create_gather_plan(PlannerInfo *root, GatherPath *best_path) +{ + Gather *gather_plan; + Plan *subplan; + List *tlist; + + /* + * Push projection down to the child node. That way, the projection work + * is parallelized, and there can be no system columns in the result (they + * can't travel through a tuple queue because it uses MinimalTuple + * representation). + */ + subplan = create_plan_recurse(root, best_path->subpath, CP_EXACT_TLIST); + + tlist = build_path_tlist(root, &best_path->path); + + gather_plan = make_gather(tlist, + NIL, + best_path->num_workers, + assign_special_exec_param(root), + best_path->single_copy, + subplan); + + copy_generic_path_info(&gather_plan->plan, &best_path->path); + + /* use parallel mode for parallel plans. */ + root->glob->parallelModeNeeded = true; + + return gather_plan; +} + +/* + * create_gather_merge_plan + * + * Create a Gather Merge plan for 'best_path' and (recursively) + * plans for its subpaths. + */ +static GatherMerge * +create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) +{ + GatherMerge *gm_plan; + Plan *subplan; + List *pathkeys = best_path->path.pathkeys; + List *tlist = build_path_tlist(root, &best_path->path); + + /* As with Gather, project away columns in the workers. */ + subplan = create_plan_recurse(root, best_path->subpath, CP_EXACT_TLIST); + + /* Create a shell for a GatherMerge plan. */ + gm_plan = makeNode(GatherMerge); + gm_plan->plan.targetlist = tlist; + gm_plan->num_workers = best_path->num_workers; + copy_generic_path_info(&gm_plan->plan, &best_path->path); + + /* Assign the rescan Param. */ + gm_plan->rescan_param = assign_special_exec_param(root); + + /* Gather Merge is pointless with no pathkeys; use Gather instead. */ + Assert(pathkeys != NIL); + + /* Compute sort column info, and adjust subplan's tlist as needed */ + subplan = prepare_sort_from_pathkeys(subplan, pathkeys, + best_path->subpath->parent->relids, + gm_plan->sortColIdx, + false, + &gm_plan->numCols, + &gm_plan->sortColIdx, + &gm_plan->sortOperators, + &gm_plan->collations, + &gm_plan->nullsFirst); + + + /* + * All gather merge paths should have already guaranteed the necessary + * sort order either by adding an explicit sort node or by using presorted + * input. We can't simply add a sort here on additional pathkeys, because + * we can't guarantee the sort would be safe. For example, expressions may + * be volatile or otherwise parallel unsafe. + */ + if (!pathkeys_contained_in(pathkeys, best_path->subpath->pathkeys)) + elog(ERROR, "gather merge input not sufficiently sorted"); + + /* Now insert the subplan under GatherMerge. */ + gm_plan->plan.lefttree = subplan; + + /* use parallel mode for parallel plans. */ + root->glob->parallelModeNeeded = true; + + return gm_plan; +} + +/* + * create_projection_plan + * + * Create a plan tree to do a projection step and (recursively) plans + * for its subpaths. We may need a Result node for the projection, + * but sometimes we can just let the subplan do the work. + */ +static Plan * +create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) +{ + Plan *plan; + Plan *subplan; + List *tlist; + bool needs_result_node = false; + + /* + * Convert our subpath to a Plan and determine whether we need a Result + * node. + * + * In most cases where we don't need to project, creation_projection_path + * will have set dummypp, but not always. First, some createplan.c + * routines change the tlists of their nodes. (An example is that + * create_merge_append_plan might add resjunk sort columns to a + * MergeAppend.) Second, create_projection_path has no way of knowing + * what path node will be placed on top of the projection path and + * therefore can't predict whether it will require an exact tlist. For + * both of these reasons, we have to recheck here. + */ + if (use_physical_tlist(root, &best_path->path, flags)) + { + /* + * Our caller doesn't really care what tlist we return, so we don't + * actually need to project. However, we may still need to ensure + * proper sortgroupref labels, if the caller cares about those. + */ + subplan = create_plan_recurse(root, best_path->subpath, 0); + tlist = subplan->targetlist; + if (flags & CP_LABEL_TLIST) + apply_pathtarget_labeling_to_tlist(tlist, + best_path->path.pathtarget); + } + else if (is_projection_capable_path(best_path->subpath)) + { + /* + * Our caller requires that we return the exact tlist, but no separate + * result node is needed because the subpath is projection-capable. + * Tell create_plan_recurse that we're going to ignore the tlist it + * produces. + */ + subplan = create_plan_recurse(root, best_path->subpath, + CP_IGNORE_TLIST); + Assert(is_projection_capable_plan(subplan)); + tlist = build_path_tlist(root, &best_path->path); + } + else + { + /* + * It looks like we need a result node, unless by good fortune the + * requested tlist is exactly the one the child wants to produce. + */ + subplan = create_plan_recurse(root, best_path->subpath, 0); + tlist = build_path_tlist(root, &best_path->path); + needs_result_node = !tlist_same_exprs(tlist, subplan->targetlist); + } + + /* + * If we make a different decision about whether to include a Result node + * than create_projection_path did, we'll have made slightly wrong cost + * estimates; but label the plan with the cost estimates we actually used, + * not "corrected" ones. (XXX this could be cleaned up if we moved more + * of the sortcolumn setup logic into Path creation, but that would add + * expense to creating Paths we might end up not using.) + */ + if (!needs_result_node) + { + /* Don't need a separate Result, just assign tlist to subplan */ + plan = subplan; + plan->targetlist = tlist; + + /* Label plan with the estimated costs we actually used */ + plan->startup_cost = best_path->path.startup_cost; + plan->total_cost = best_path->path.total_cost; + plan->plan_rows = best_path->path.rows; + plan->plan_width = best_path->path.pathtarget->width; + plan->parallel_safe = best_path->path.parallel_safe; + /* ... but don't change subplan's parallel_aware flag */ + } + else + { + /* We need a Result node */ + plan = (Plan *) make_result(tlist, NULL, subplan); + + copy_generic_path_info(plan, (Path *) best_path); + } + + return plan; +} + +/* + * inject_projection_plan + * Insert a Result node to do a projection step. + * + * This is used in a few places where we decide on-the-fly that we need a + * projection step as part of the tree generated for some Path node. + * We should try to get rid of this in favor of doing it more honestly. + * + * One reason it's ugly is we have to be told the right parallel_safe marking + * to apply (since the tlist might be unsafe even if the child plan is safe). + */ +static Plan * +inject_projection_plan(Plan *subplan, List *tlist, bool parallel_safe) +{ + Plan *plan; + + plan = (Plan *) make_result(tlist, NULL, subplan); + + /* + * In principle, we should charge tlist eval cost plus cpu_per_tuple per + * row for the Result node. But the former has probably been factored in + * already and the latter was not accounted for during Path construction, + * so being formally correct might just make the EXPLAIN output look less + * consistent not more so. Hence, just copy the subplan's cost. + */ + copy_plan_costsize(plan, subplan); + plan->parallel_safe = parallel_safe; + + return plan; +} + +/* + * change_plan_targetlist + * Externally available wrapper for inject_projection_plan. + * + * This is meant for use by FDW plan-generation functions, which might + * want to adjust the tlist computed by some subplan tree. In general, + * a Result node is needed to compute the new tlist, but we can optimize + * some cases. + * + * In most cases, tlist_parallel_safe can just be passed as the parallel_safe + * flag of the FDW's own Path node. + */ +Plan * +change_plan_targetlist(Plan *subplan, List *tlist, bool tlist_parallel_safe) +{ + /* + * If the top plan node can't do projections and its existing target list + * isn't already what we need, we need to add a Result node to help it + * along. + */ + if (!is_projection_capable_plan(subplan) && + !tlist_same_exprs(tlist, subplan->targetlist)) + subplan = inject_projection_plan(subplan, tlist, + subplan->parallel_safe && + tlist_parallel_safe); + else + { + /* Else we can just replace the plan node's tlist */ + subplan->targetlist = tlist; + subplan->parallel_safe &= tlist_parallel_safe; + } + return subplan; +} + +/* + * create_sort_plan + * + * Create a Sort plan for 'best_path' and (recursively) plans + * for its subpaths. + */ +static Sort * +create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) +{ + Sort *plan; + Plan *subplan; + + /* + * We don't want any excess columns in the sorted tuples, so request a + * smaller tlist. Otherwise, since Sort doesn't project, tlist + * requirements pass through. + */ + subplan = create_plan_recurse(root, best_path->subpath, + flags | CP_SMALL_TLIST); + + /* + * make_sort_from_pathkeys indirectly calls find_ec_member_matching_expr, + * which will ignore any child EC members that don't belong to the given + * relids. Thus, if this sort path is based on a child relation, we must + * pass its relids. + */ + plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys, + IS_OTHER_REL(best_path->subpath->parent) ? + best_path->path.parent->relids : NULL); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * create_incrementalsort_plan + * + * Do the same as create_sort_plan, but create IncrementalSort plan. + */ +static IncrementalSort * +create_incrementalsort_plan(PlannerInfo *root, IncrementalSortPath *best_path, + int flags) +{ + IncrementalSort *plan; + Plan *subplan; + + /* See comments in create_sort_plan() above */ + subplan = create_plan_recurse(root, best_path->spath.subpath, + flags | CP_SMALL_TLIST); + plan = make_incrementalsort_from_pathkeys(subplan, + best_path->spath.path.pathkeys, + IS_OTHER_REL(best_path->spath.subpath->parent) ? + best_path->spath.path.parent->relids : NULL, + best_path->nPresortedCols); + + copy_generic_path_info(&plan->sort.plan, (Path *) best_path); + + return plan; +} + +/* + * create_group_plan + * + * Create a Group plan for 'best_path' and (recursively) plans + * for its subpaths. + */ +static Group * +create_group_plan(PlannerInfo *root, GroupPath *best_path) +{ + Group *plan; + Plan *subplan; + List *tlist; + List *quals; + + /* + * Group can project, so no need to be terribly picky about child tlist, + * but we do need grouping columns to be available + */ + subplan = create_plan_recurse(root, best_path->subpath, CP_LABEL_TLIST); + + tlist = build_path_tlist(root, &best_path->path); + + quals = order_qual_clauses(root, best_path->qual); + + plan = make_group(tlist, + quals, + list_length(best_path->groupClause), + extract_grouping_cols(best_path->groupClause, + subplan->targetlist), + extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), + subplan); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * create_upper_unique_plan + * + * Create a Unique plan for 'best_path' and (recursively) plans + * for its subpaths. + */ +static Unique * +create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flags) +{ + Unique *plan; + Plan *subplan; + + /* + * Unique doesn't project, so tlist requirements pass through; moreover we + * need grouping columns to be labeled. + */ + subplan = create_plan_recurse(root, best_path->subpath, + flags | CP_LABEL_TLIST); + + plan = make_unique_from_pathkeys(subplan, + best_path->path.pathkeys, + best_path->numkeys); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * create_agg_plan + * + * Create an Agg plan for 'best_path' and (recursively) plans + * for its subpaths. + */ +static Agg * +create_agg_plan(PlannerInfo *root, AggPath *best_path) +{ + Agg *plan; + Plan *subplan; + List *tlist; + List *quals; + + /* + * Agg can project, so no need to be terribly picky about child tlist, but + * we do need grouping columns to be available + */ + subplan = create_plan_recurse(root, best_path->subpath, CP_LABEL_TLIST); + + tlist = build_path_tlist(root, &best_path->path); + + quals = order_qual_clauses(root, best_path->qual); + + plan = make_agg(tlist, quals, + best_path->aggstrategy, + best_path->aggsplit, + list_length(best_path->groupClause), + extract_grouping_cols(best_path->groupClause, + subplan->targetlist), + extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), + NIL, + NIL, + best_path->numGroups, + best_path->transitionSpace, + subplan); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * Given a groupclause for a collection of grouping sets, produce the + * corresponding groupColIdx. + * + * root->grouping_map maps the tleSortGroupRef to the actual column position in + * the input tuple. So we get the ref from the entries in the groupclause and + * look them up there. + */ +static AttrNumber * +remap_groupColIdx(PlannerInfo *root, List *groupClause) +{ + AttrNumber *grouping_map = root->grouping_map; + AttrNumber *new_grpColIdx; + ListCell *lc; + int i; + + Assert(grouping_map); + + new_grpColIdx = palloc0(sizeof(AttrNumber) * list_length(groupClause)); + + i = 0; + foreach(lc, groupClause) + { + SortGroupClause *clause = lfirst(lc); + + new_grpColIdx[i++] = grouping_map[clause->tleSortGroupRef]; + } + + return new_grpColIdx; +} + +/* + * create_groupingsets_plan + * Create a plan for 'best_path' and (recursively) plans + * for its subpaths. + * + * What we emit is an Agg plan with some vestigial Agg and Sort nodes + * hanging off the side. The top Agg implements the last grouping set + * specified in the GroupingSetsPath, and any additional grouping sets + * each give rise to a subsidiary Agg and Sort node in the top Agg's + * "chain" list. These nodes don't participate in the plan directly, + * but they are a convenient way to represent the required data for + * the extra steps. + * + * Returns a Plan node. + */ +static Plan * +create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) +{ + Agg *plan; + Plan *subplan; + List *rollups = best_path->rollups; + AttrNumber *grouping_map; + int maxref; + List *chain; + ListCell *lc; + + /* Shouldn't get here without grouping sets */ + Assert(root->parse->groupingSets); + Assert(rollups != NIL); + + /* + * Agg can project, so no need to be terribly picky about child tlist, but + * we do need grouping columns to be available + */ + subplan = create_plan_recurse(root, best_path->subpath, CP_LABEL_TLIST); + + /* + * Compute the mapping from tleSortGroupRef to column index in the child's + * tlist. First, identify max SortGroupRef in groupClause, for array + * sizing. + */ + maxref = 0; + foreach(lc, root->parse->groupClause) + { + SortGroupClause *gc = (SortGroupClause *) lfirst(lc); + + if (gc->tleSortGroupRef > maxref) + maxref = gc->tleSortGroupRef; + } + + grouping_map = (AttrNumber *) palloc0((maxref + 1) * sizeof(AttrNumber)); + + /* Now look up the column numbers in the child's tlist */ + foreach(lc, root->parse->groupClause) + { + SortGroupClause *gc = (SortGroupClause *) lfirst(lc); + TargetEntry *tle = get_sortgroupclause_tle(gc, subplan->targetlist); + + grouping_map[gc->tleSortGroupRef] = tle->resno; + } + + /* + * During setrefs.c, we'll need the grouping_map to fix up the cols lists + * in GroupingFunc nodes. Save it for setrefs.c to use. + */ + Assert(root->grouping_map == NULL); + root->grouping_map = grouping_map; + + /* + * Generate the side nodes that describe the other sort and group + * operations besides the top one. Note that we don't worry about putting + * accurate cost estimates in the side nodes; only the topmost Agg node's + * costs will be shown by EXPLAIN. + */ + chain = NIL; + if (list_length(rollups) > 1) + { + bool is_first_sort = ((RollupData *) linitial(rollups))->is_hashed; + + for_each_from(lc, rollups, 1) + { + RollupData *rollup = lfirst(lc); + AttrNumber *new_grpColIdx; + Plan *sort_plan = NULL; + Plan *agg_plan; + AggStrategy strat; + + new_grpColIdx = remap_groupColIdx(root, rollup->groupClause); + + if (!rollup->is_hashed && !is_first_sort) + { + sort_plan = (Plan *) + make_sort_from_groupcols(rollup->groupClause, + new_grpColIdx, + subplan); + } + + if (!rollup->is_hashed) + is_first_sort = false; + + if (rollup->is_hashed) + strat = AGG_HASHED; + else if (list_length(linitial(rollup->gsets)) == 0) + strat = AGG_PLAIN; + else + strat = AGG_SORTED; + + agg_plan = (Plan *) make_agg(NIL, + NIL, + strat, + AGGSPLIT_SIMPLE, + list_length((List *) linitial(rollup->gsets)), + new_grpColIdx, + extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), + rollup->gsets, + NIL, + rollup->numGroups, + best_path->transitionSpace, + sort_plan); + + /* + * Remove stuff we don't need to avoid bloating debug output. + */ + if (sort_plan) + { + sort_plan->targetlist = NIL; + sort_plan->lefttree = NULL; + } + + chain = lappend(chain, agg_plan); + } + } + + /* + * Now make the real Agg node + */ + { + RollupData *rollup = linitial(rollups); + AttrNumber *top_grpColIdx; + int numGroupCols; + + top_grpColIdx = remap_groupColIdx(root, rollup->groupClause); + + numGroupCols = list_length((List *) linitial(rollup->gsets)); + + plan = make_agg(build_path_tlist(root, &best_path->path), + best_path->qual, + best_path->aggstrategy, + AGGSPLIT_SIMPLE, + numGroupCols, + top_grpColIdx, + extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), + rollup->gsets, + chain, + rollup->numGroups, + best_path->transitionSpace, + subplan); + + /* Copy cost data from Path to Plan */ + copy_generic_path_info(&plan->plan, &best_path->path); + } + + return (Plan *) plan; +} + +/* + * create_minmaxagg_plan + * + * Create a Result plan for 'best_path' and (recursively) plans + * for its subpaths. + */ +static Result * +create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) +{ + Result *plan; + List *tlist; + ListCell *lc; + + /* Prepare an InitPlan for each aggregate's subquery. */ + foreach(lc, best_path->mmaggregates) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + PlannerInfo *subroot = mminfo->subroot; + Query *subparse = subroot->parse; + Plan *plan; + + /* + * Generate the plan for the subquery. We already have a Path, but we + * have to convert it to a Plan and attach a LIMIT node above it. + * Since we are entering a different planner context (subroot), + * recurse to create_plan not create_plan_recurse. + */ + plan = create_plan(subroot, mminfo->path); + + plan = (Plan *) make_limit(plan, + subparse->limitOffset, + subparse->limitCount, + subparse->limitOption, + 0, NULL, NULL, NULL); + + /* Must apply correct cost/width data to Limit node */ + plan->startup_cost = mminfo->path->startup_cost; + plan->total_cost = mminfo->pathcost; + plan->plan_rows = 1; + plan->plan_width = mminfo->path->pathtarget->width; + plan->parallel_aware = false; + plan->parallel_safe = mminfo->path->parallel_safe; + + /* Convert the plan into an InitPlan in the outer query. */ + SS_make_initplan_from_plan(root, subroot, plan, mminfo->param); + } + + /* Generate the output plan --- basically just a Result */ + tlist = build_path_tlist(root, &best_path->path); + + plan = make_result(tlist, (Node *) best_path->quals, NULL); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + /* + * During setrefs.c, we'll need to replace references to the Agg nodes + * with InitPlan output params. (We can't just do that locally in the + * MinMaxAgg node, because path nodes above here may have Agg references + * as well.) Save the mmaggregates list to tell setrefs.c to do that. + */ + Assert(root->minmax_aggs == NIL); + root->minmax_aggs = best_path->mmaggregates; + + return plan; +} + +/* + * create_windowagg_plan + * + * Create a WindowAgg plan for 'best_path' and (recursively) plans + * for its subpaths. + */ +static WindowAgg * +create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) +{ + WindowAgg *plan; + WindowClause *wc = best_path->winclause; + int numPart = list_length(wc->partitionClause); + int numOrder = list_length(wc->orderClause); + Plan *subplan; + List *tlist; + int partNumCols; + AttrNumber *partColIdx; + Oid *partOperators; + Oid *partCollations; + int ordNumCols; + AttrNumber *ordColIdx; + Oid *ordOperators; + Oid *ordCollations; + ListCell *lc; + + /* + * Choice of tlist here is motivated by the fact that WindowAgg will be + * storing the input rows of window frames in a tuplestore; it therefore + * behooves us to request a small tlist to avoid wasting space. We do of + * course need grouping columns to be available. + */ + subplan = create_plan_recurse(root, best_path->subpath, + CP_LABEL_TLIST | CP_SMALL_TLIST); + + tlist = build_path_tlist(root, &best_path->path); + + /* + * Convert SortGroupClause lists into arrays of attr indexes and equality + * operators, as wanted by executor. (Note: in principle, it's possible + * to drop some of the sort columns, if they were proved redundant by + * pathkey logic. However, it doesn't seem worth going out of our way to + * optimize such cases. In any case, we must *not* remove the ordering + * column for RANGE OFFSET cases, as the executor needs that for in_range + * tests even if it's known to be equal to some partitioning column.) + */ + partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart); + partOperators = (Oid *) palloc(sizeof(Oid) * numPart); + partCollations = (Oid *) palloc(sizeof(Oid) * numPart); + + partNumCols = 0; + foreach(lc, wc->partitionClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + TargetEntry *tle = get_sortgroupclause_tle(sgc, subplan->targetlist); + + Assert(OidIsValid(sgc->eqop)); + partColIdx[partNumCols] = tle->resno; + partOperators[partNumCols] = sgc->eqop; + partCollations[partNumCols] = exprCollation((Node *) tle->expr); + partNumCols++; + } + + ordColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numOrder); + ordOperators = (Oid *) palloc(sizeof(Oid) * numOrder); + ordCollations = (Oid *) palloc(sizeof(Oid) * numOrder); + + ordNumCols = 0; + foreach(lc, wc->orderClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + TargetEntry *tle = get_sortgroupclause_tle(sgc, subplan->targetlist); + + Assert(OidIsValid(sgc->eqop)); + ordColIdx[ordNumCols] = tle->resno; + ordOperators[ordNumCols] = sgc->eqop; + ordCollations[ordNumCols] = exprCollation((Node *) tle->expr); + ordNumCols++; + } + + /* And finally we can make the WindowAgg node */ + plan = make_windowagg(tlist, + wc->winref, + partNumCols, + partColIdx, + partOperators, + partCollations, + ordNumCols, + ordColIdx, + ordOperators, + ordCollations, + wc->frameOptions, + wc->startOffset, + wc->endOffset, + wc->startInRangeFunc, + wc->endInRangeFunc, + wc->inRangeColl, + wc->inRangeAsc, + wc->inRangeNullsFirst, + wc->runCondition, + best_path->qual, + best_path->topwindow, + subplan); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * create_setop_plan + * + * Create a SetOp plan for 'best_path' and (recursively) plans + * for its subpaths. + */ +static SetOp * +create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) +{ + SetOp *plan; + Plan *subplan; + long numGroups; + + /* + * SetOp doesn't project, so tlist requirements pass through; moreover we + * need grouping columns to be labeled. + */ + subplan = create_plan_recurse(root, best_path->subpath, + flags | CP_LABEL_TLIST); + + /* Convert numGroups to long int --- but 'ware overflow! */ + numGroups = clamp_cardinality_to_long(best_path->numGroups); + + plan = make_setop(best_path->cmd, + best_path->strategy, + subplan, + best_path->distinctList, + best_path->flagColIdx, + best_path->firstFlag, + numGroups); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * create_recursiveunion_plan + * + * Create a RecursiveUnion plan for 'best_path' and (recursively) plans + * for its subpaths. + */ +static RecursiveUnion * +create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) +{ + RecursiveUnion *plan; + Plan *leftplan; + Plan *rightplan; + List *tlist; + long numGroups; + + /* Need both children to produce same tlist, so force it */ + leftplan = create_plan_recurse(root, best_path->leftpath, CP_EXACT_TLIST); + rightplan = create_plan_recurse(root, best_path->rightpath, CP_EXACT_TLIST); + + tlist = build_path_tlist(root, &best_path->path); + + /* Convert numGroups to long int --- but 'ware overflow! */ + numGroups = clamp_cardinality_to_long(best_path->numGroups); + + plan = make_recursive_union(tlist, + leftplan, + rightplan, + best_path->wtParam, + best_path->distinctList, + numGroups); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * create_lockrows_plan + * + * Create a LockRows plan for 'best_path' and (recursively) plans + * for its subpaths. + */ +static LockRows * +create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, + int flags) +{ + LockRows *plan; + Plan *subplan; + + /* LockRows doesn't project, so tlist requirements pass through */ + subplan = create_plan_recurse(root, best_path->subpath, flags); + + plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* + * create_modifytable_plan + * Create a ModifyTable plan for 'best_path'. + * + * Returns a Plan node. + */ +static ModifyTable * +create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) +{ + ModifyTable *plan; + Path *subpath = best_path->subpath; + Plan *subplan; + + /* Subplan must produce exactly the specified tlist */ + subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST); + + /* Transfer resname/resjunk labeling, too, to keep executor happy */ + apply_tlist_labeling(subplan->targetlist, root->processed_tlist); + + plan = make_modifytable(root, + subplan, + best_path->operation, + best_path->canSetTag, + best_path->nominalRelation, + best_path->rootRelation, + best_path->partColsUpdated, + best_path->resultRelations, + best_path->updateColnosLists, + best_path->withCheckOptionLists, + best_path->returningLists, + best_path->rowMarks, + best_path->onconflict, + best_path->mergeActionLists, + best_path->epqParam); + + copy_generic_path_info(&plan->plan, &best_path->path); + + return plan; +} + +/* + * create_limit_plan + * + * Create a Limit plan for 'best_path' and (recursively) plans + * for its subpaths. + */ +static Limit * +create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) +{ + Limit *plan; + Plan *subplan; + int numUniqkeys = 0; + AttrNumber *uniqColIdx = NULL; + Oid *uniqOperators = NULL; + Oid *uniqCollations = NULL; + + /* Limit doesn't project, so tlist requirements pass through */ + subplan = create_plan_recurse(root, best_path->subpath, flags); + + /* Extract information necessary for comparing rows for WITH TIES. */ + if (best_path->limitOption == LIMIT_OPTION_WITH_TIES) + { + Query *parse = root->parse; + ListCell *l; + + numUniqkeys = list_length(parse->sortClause); + uniqColIdx = (AttrNumber *) palloc(numUniqkeys * sizeof(AttrNumber)); + uniqOperators = (Oid *) palloc(numUniqkeys * sizeof(Oid)); + uniqCollations = (Oid *) palloc(numUniqkeys * sizeof(Oid)); + + numUniqkeys = 0; + foreach(l, parse->sortClause) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, parse->targetList); + + uniqColIdx[numUniqkeys] = tle->resno; + uniqOperators[numUniqkeys] = sortcl->eqop; + uniqCollations[numUniqkeys] = exprCollation((Node *) tle->expr); + numUniqkeys++; + } + } + + plan = make_limit(subplan, + best_path->limitOffset, + best_path->limitCount, + best_path->limitOption, + numUniqkeys, uniqColIdx, uniqOperators, uniqCollations); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + + +/***************************************************************************** + * + * BASE-RELATION SCAN METHODS + * + *****************************************************************************/ + + +/* + * create_seqscan_plan + * Returns a seqscan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static SeqScan * +create_seqscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) +{ + SeqScan *scan_plan; + Index scan_relid = best_path->parent->relid; + + /* it should be a base rel... */ + Assert(scan_relid > 0); + Assert(best_path->parent->rtekind == RTE_RELATION); + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + } + + scan_plan = make_seqscan(tlist, + scan_clauses, + scan_relid); + + copy_generic_path_info(&scan_plan->scan.plan, best_path); + + return scan_plan; +} + +/* + * create_samplescan_plan + * Returns a samplescan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static SampleScan * +create_samplescan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) +{ + SampleScan *scan_plan; + Index scan_relid = best_path->parent->relid; + RangeTblEntry *rte; + TableSampleClause *tsc; + + /* it should be a base rel with a tablesample clause... */ + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_RELATION); + tsc = rte->tablesample; + Assert(tsc != NULL); + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + tsc = (TableSampleClause *) + replace_nestloop_params(root, (Node *) tsc); + } + + scan_plan = make_samplescan(tlist, + scan_clauses, + scan_relid, + tsc); + + copy_generic_path_info(&scan_plan->scan.plan, best_path); + + return scan_plan; +} + +/* + * create_indexscan_plan + * Returns an indexscan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + * + * We use this for both plain IndexScans and IndexOnlyScans, because the + * qual preprocessing work is the same for both. Note that the caller tells + * us which to build --- we don't look at best_path->path.pathtype, because + * create_bitmap_subplan needs to be able to override the prior decision. + */ +static Scan * +create_indexscan_plan(PlannerInfo *root, + IndexPath *best_path, + List *tlist, + List *scan_clauses, + bool indexonly) +{ + Scan *scan_plan; + List *indexclauses = best_path->indexclauses; + List *indexorderbys = best_path->indexorderbys; + Index baserelid = best_path->path.parent->relid; + IndexOptInfo *indexinfo = best_path->indexinfo; + Oid indexoid = indexinfo->indexoid; + List *qpqual; + List *stripped_indexquals; + List *fixed_indexquals; + List *fixed_indexorderbys; + List *indexorderbyops = NIL; + ListCell *l; + + /* it should be a base rel... */ + Assert(baserelid > 0); + Assert(best_path->path.parent->rtekind == RTE_RELATION); + + /* + * Extract the index qual expressions (stripped of RestrictInfos) from the + * IndexClauses list, and prepare a copy with index Vars substituted for + * table Vars. (This step also does replace_nestloop_params on the + * fixed_indexquals.) + */ + fix_indexqual_references(root, best_path, + &stripped_indexquals, + &fixed_indexquals); + + /* + * Likewise fix up index attr references in the ORDER BY expressions. + */ + fixed_indexorderbys = fix_indexorderby_references(root, best_path); + + /* + * The qpqual list must contain all restrictions not automatically handled + * by the index, other than pseudoconstant clauses which will be handled + * by a separate gating plan node. All the predicates in the indexquals + * will be checked (either by the index itself, or by nodeIndexscan.c), + * but if there are any "special" operators involved then they must be + * included in qpqual. The upshot is that qpqual must contain + * scan_clauses minus whatever appears in indexquals. + * + * is_redundant_with_indexclauses() detects cases where a scan clause is + * present in the indexclauses list or is generated from the same + * EquivalenceClass as some indexclause, and is therefore redundant with + * it, though not equal. (The latter happens when indxpath.c prefers a + * different derived equality than what generate_join_implied_equalities + * picked for a parameterized scan's ppi_clauses.) Note that it will not + * match to lossy index clauses, which is critical because we have to + * include the original clause in qpqual in that case. + * + * In some situations (particularly with OR'd index conditions) we may + * have scan_clauses that are not equal to, but are logically implied by, + * the index quals; so we also try a predicate_implied_by() check to see + * if we can discard quals that way. (predicate_implied_by assumes its + * first input contains only immutable functions, so we have to check + * that.) + * + * Note: if you change this bit of code you should also look at + * extract_nonindex_conditions() in costsize.c. + */ + qpqual = NIL; + foreach(l, scan_clauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (rinfo->pseudoconstant) + continue; /* we may drop pseudoconstants here */ + if (is_redundant_with_indexclauses(rinfo, indexclauses)) + continue; /* dup or derived from same EquivalenceClass */ + if (!contain_mutable_functions((Node *) rinfo->clause) && + predicate_implied_by(list_make1(rinfo->clause), stripped_indexquals, + false)) + continue; /* provably implied by indexquals */ + qpqual = lappend(qpqual, rinfo); + } + + /* Sort clauses into best execution order */ + qpqual = order_qual_clauses(root, qpqual); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + qpqual = extract_actual_clauses(qpqual, false); + + /* + * We have to replace any outer-relation variables with nestloop params in + * the indexqualorig, qpqual, and indexorderbyorig expressions. A bit + * annoying to have to do this separately from the processing in + * fix_indexqual_references --- rethink this when generalizing the inner + * indexscan support. But note we can't really do this earlier because + * it'd break the comparisons to predicates above ... (or would it? Those + * wouldn't have outer refs) + */ + if (best_path->path.param_info) + { + stripped_indexquals = (List *) + replace_nestloop_params(root, (Node *) stripped_indexquals); + qpqual = (List *) + replace_nestloop_params(root, (Node *) qpqual); + indexorderbys = (List *) + replace_nestloop_params(root, (Node *) indexorderbys); + } + + /* + * If there are ORDER BY expressions, look up the sort operators for their + * result datatypes. + */ + if (indexorderbys) + { + ListCell *pathkeyCell, + *exprCell; + + /* + * PathKey contains OID of the btree opfamily we're sorting by, but + * that's not quite enough because we need the expression's datatype + * to look up the sort operator in the operator family. + */ + Assert(list_length(best_path->path.pathkeys) == list_length(indexorderbys)); + forboth(pathkeyCell, best_path->path.pathkeys, exprCell, indexorderbys) + { + PathKey *pathkey = (PathKey *) lfirst(pathkeyCell); + Node *expr = (Node *) lfirst(exprCell); + Oid exprtype = exprType(expr); + Oid sortop; + + /* Get sort operator from opfamily */ + sortop = get_opfamily_member(pathkey->pk_opfamily, + exprtype, + exprtype, + pathkey->pk_strategy); + if (!OidIsValid(sortop)) + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + pathkey->pk_strategy, exprtype, exprtype, pathkey->pk_opfamily); + indexorderbyops = lappend_oid(indexorderbyops, sortop); + } + } + + /* + * For an index-only scan, we must mark indextlist entries as resjunk if + * they are columns that the index AM can't return; this cues setrefs.c to + * not generate references to those columns. + */ + if (indexonly) + { + int i = 0; + + foreach(l, indexinfo->indextlist) + { + TargetEntry *indextle = (TargetEntry *) lfirst(l); + + indextle->resjunk = !indexinfo->canreturn[i]; + i++; + } + } + + /* Finally ready to build the plan node */ + if (indexonly) + scan_plan = (Scan *) make_indexonlyscan(tlist, + qpqual, + baserelid, + indexoid, + fixed_indexquals, + stripped_indexquals, + fixed_indexorderbys, + indexinfo->indextlist, + best_path->indexscandir); + else + scan_plan = (Scan *) make_indexscan(tlist, + qpqual, + baserelid, + indexoid, + fixed_indexquals, + stripped_indexquals, + fixed_indexorderbys, + indexorderbys, + indexorderbyops, + best_path->indexscandir); + + copy_generic_path_info(&scan_plan->plan, &best_path->path); + + return scan_plan; +} + +/* + * create_bitmap_scan_plan + * Returns a bitmap scan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static BitmapHeapScan * +create_bitmap_scan_plan(PlannerInfo *root, + BitmapHeapPath *best_path, + List *tlist, + List *scan_clauses) +{ + Index baserelid = best_path->path.parent->relid; + Plan *bitmapqualplan; + List *bitmapqualorig; + List *indexquals; + List *indexECs; + List *qpqual; + ListCell *l; + BitmapHeapScan *scan_plan; + + /* it should be a base rel... */ + Assert(baserelid > 0); + Assert(best_path->path.parent->rtekind == RTE_RELATION); + + /* Process the bitmapqual tree into a Plan tree and qual lists */ + bitmapqualplan = create_bitmap_subplan(root, best_path->bitmapqual, + &bitmapqualorig, &indexquals, + &indexECs); + + if (best_path->path.parallel_aware) + bitmap_subplan_mark_shared(bitmapqualplan); + + /* + * The qpqual list must contain all restrictions not automatically handled + * by the index, other than pseudoconstant clauses which will be handled + * by a separate gating plan node. All the predicates in the indexquals + * will be checked (either by the index itself, or by + * nodeBitmapHeapscan.c), but if there are any "special" operators + * involved then they must be added to qpqual. The upshot is that qpqual + * must contain scan_clauses minus whatever appears in indexquals. + * + * This loop is similar to the comparable code in create_indexscan_plan(), + * but with some differences because it has to compare the scan clauses to + * stripped (no RestrictInfos) indexquals. See comments there for more + * info. + * + * In normal cases simple equal() checks will be enough to spot duplicate + * clauses, so we try that first. We next see if the scan clause is + * redundant with any top-level indexqual by virtue of being generated + * from the same EC. After that, try predicate_implied_by(). + * + * Unlike create_indexscan_plan(), the predicate_implied_by() test here is + * useful for getting rid of qpquals that are implied by index predicates, + * because the predicate conditions are included in the "indexquals" + * returned by create_bitmap_subplan(). Bitmap scans have to do it that + * way because predicate conditions need to be rechecked if the scan + * becomes lossy, so they have to be included in bitmapqualorig. + */ + qpqual = NIL; + foreach(l, scan_clauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + Node *clause = (Node *) rinfo->clause; + + if (rinfo->pseudoconstant) + continue; /* we may drop pseudoconstants here */ + if (list_member(indexquals, clause)) + continue; /* simple duplicate */ + if (rinfo->parent_ec && list_member_ptr(indexECs, rinfo->parent_ec)) + continue; /* derived from same EquivalenceClass */ + if (!contain_mutable_functions(clause) && + predicate_implied_by(list_make1(clause), indexquals, false)) + continue; /* provably implied by indexquals */ + qpqual = lappend(qpqual, rinfo); + } + + /* Sort clauses into best execution order */ + qpqual = order_qual_clauses(root, qpqual); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + qpqual = extract_actual_clauses(qpqual, false); + + /* + * When dealing with special operators, we will at this point have + * duplicate clauses in qpqual and bitmapqualorig. We may as well drop + * 'em from bitmapqualorig, since there's no point in making the tests + * twice. + */ + bitmapqualorig = list_difference_ptr(bitmapqualorig, qpqual); + + /* + * We have to replace any outer-relation variables with nestloop params in + * the qpqual and bitmapqualorig expressions. (This was already done for + * expressions attached to plan nodes in the bitmapqualplan tree.) + */ + if (best_path->path.param_info) + { + qpqual = (List *) + replace_nestloop_params(root, (Node *) qpqual); + bitmapqualorig = (List *) + replace_nestloop_params(root, (Node *) bitmapqualorig); + } + + /* Finally ready to build the plan node */ + scan_plan = make_bitmap_heapscan(tlist, + qpqual, + bitmapqualplan, + bitmapqualorig, + baserelid); + + copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); + + return scan_plan; +} + +/* + * Given a bitmapqual tree, generate the Plan tree that implements it + * + * As byproducts, we also return in *qual and *indexqual the qual lists + * (in implicit-AND form, without RestrictInfos) describing the original index + * conditions and the generated indexqual conditions. (These are the same in + * simple cases, but when special index operators are involved, the former + * list includes the special conditions while the latter includes the actual + * indexable conditions derived from them.) Both lists include partial-index + * predicates, because we have to recheck predicates as well as index + * conditions if the bitmap scan becomes lossy. + * + * In addition, we return a list of EquivalenceClass pointers for all the + * top-level indexquals that were possibly-redundantly derived from ECs. + * This allows removal of scan_clauses that are redundant with such quals. + * (We do not attempt to detect such redundancies for quals that are within + * OR subtrees. This could be done in a less hacky way if we returned the + * indexquals in RestrictInfo form, but that would be slower and still pretty + * messy, since we'd have to build new RestrictInfos in many cases.) + */ +static Plan * +create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, + List **qual, List **indexqual, List **indexECs) +{ + Plan *plan; + + if (IsA(bitmapqual, BitmapAndPath)) + { + BitmapAndPath *apath = (BitmapAndPath *) bitmapqual; + List *subplans = NIL; + List *subquals = NIL; + List *subindexquals = NIL; + List *subindexECs = NIL; + ListCell *l; + + /* + * There may well be redundant quals among the subplans, since a + * top-level WHERE qual might have gotten used to form several + * different index quals. We don't try exceedingly hard to eliminate + * redundancies, but we do eliminate obvious duplicates by using + * list_concat_unique. + */ + foreach(l, apath->bitmapquals) + { + Plan *subplan; + List *subqual; + List *subindexqual; + List *subindexEC; + + subplan = create_bitmap_subplan(root, (Path *) lfirst(l), + &subqual, &subindexqual, + &subindexEC); + subplans = lappend(subplans, subplan); + subquals = list_concat_unique(subquals, subqual); + subindexquals = list_concat_unique(subindexquals, subindexqual); + /* Duplicates in indexECs aren't worth getting rid of */ + subindexECs = list_concat(subindexECs, subindexEC); + } + plan = (Plan *) make_bitmap_and(subplans); + plan->startup_cost = apath->path.startup_cost; + plan->total_cost = apath->path.total_cost; + plan->plan_rows = + clamp_row_est(apath->bitmapselectivity * apath->path.parent->tuples); + plan->plan_width = 0; /* meaningless */ + plan->parallel_aware = false; + plan->parallel_safe = apath->path.parallel_safe; + *qual = subquals; + *indexqual = subindexquals; + *indexECs = subindexECs; + } + else if (IsA(bitmapqual, BitmapOrPath)) + { + BitmapOrPath *opath = (BitmapOrPath *) bitmapqual; + List *subplans = NIL; + List *subquals = NIL; + List *subindexquals = NIL; + bool const_true_subqual = false; + bool const_true_subindexqual = false; + ListCell *l; + + /* + * Here, we only detect qual-free subplans. A qual-free subplan would + * cause us to generate "... OR true ..." which we may as well reduce + * to just "true". We do not try to eliminate redundant subclauses + * because (a) it's not as likely as in the AND case, and (b) we might + * well be working with hundreds or even thousands of OR conditions, + * perhaps from a long IN list. The performance of list_append_unique + * would be unacceptable. + */ + foreach(l, opath->bitmapquals) + { + Plan *subplan; + List *subqual; + List *subindexqual; + List *subindexEC; + + subplan = create_bitmap_subplan(root, (Path *) lfirst(l), + &subqual, &subindexqual, + &subindexEC); + subplans = lappend(subplans, subplan); + if (subqual == NIL) + const_true_subqual = true; + else if (!const_true_subqual) + subquals = lappend(subquals, + make_ands_explicit(subqual)); + if (subindexqual == NIL) + const_true_subindexqual = true; + else if (!const_true_subindexqual) + subindexquals = lappend(subindexquals, + make_ands_explicit(subindexqual)); + } + + /* + * In the presence of ScalarArrayOpExpr quals, we might have built + * BitmapOrPaths with just one subpath; don't add an OR step. + */ + if (list_length(subplans) == 1) + { + plan = (Plan *) linitial(subplans); + } + else + { + plan = (Plan *) make_bitmap_or(subplans); + plan->startup_cost = opath->path.startup_cost; + plan->total_cost = opath->path.total_cost; + plan->plan_rows = + clamp_row_est(opath->bitmapselectivity * opath->path.parent->tuples); + plan->plan_width = 0; /* meaningless */ + plan->parallel_aware = false; + plan->parallel_safe = opath->path.parallel_safe; + } + + /* + * If there were constant-TRUE subquals, the OR reduces to constant + * TRUE. Also, avoid generating one-element ORs, which could happen + * due to redundancy elimination or ScalarArrayOpExpr quals. + */ + if (const_true_subqual) + *qual = NIL; + else if (list_length(subquals) <= 1) + *qual = subquals; + else + *qual = list_make1(make_orclause(subquals)); + if (const_true_subindexqual) + *indexqual = NIL; + else if (list_length(subindexquals) <= 1) + *indexqual = subindexquals; + else + *indexqual = list_make1(make_orclause(subindexquals)); + *indexECs = NIL; + } + else if (IsA(bitmapqual, IndexPath)) + { + IndexPath *ipath = (IndexPath *) bitmapqual; + IndexScan *iscan; + List *subquals; + List *subindexquals; + List *subindexECs; + ListCell *l; + + /* Use the regular indexscan plan build machinery... */ + iscan = castNode(IndexScan, + create_indexscan_plan(root, ipath, + NIL, NIL, false)); + /* then convert to a bitmap indexscan */ + plan = (Plan *) make_bitmap_indexscan(iscan->scan.scanrelid, + iscan->indexid, + iscan->indexqual, + iscan->indexqualorig); + /* and set its cost/width fields appropriately */ + plan->startup_cost = 0.0; + plan->total_cost = ipath->indextotalcost; + plan->plan_rows = + clamp_row_est(ipath->indexselectivity * ipath->path.parent->tuples); + plan->plan_width = 0; /* meaningless */ + plan->parallel_aware = false; + plan->parallel_safe = ipath->path.parallel_safe; + /* Extract original index clauses, actual index quals, relevant ECs */ + subquals = NIL; + subindexquals = NIL; + subindexECs = NIL; + foreach(l, ipath->indexclauses) + { + IndexClause *iclause = (IndexClause *) lfirst(l); + RestrictInfo *rinfo = iclause->rinfo; + + Assert(!rinfo->pseudoconstant); + subquals = lappend(subquals, rinfo->clause); + subindexquals = list_concat(subindexquals, + get_actual_clauses(iclause->indexquals)); + if (rinfo->parent_ec) + subindexECs = lappend(subindexECs, rinfo->parent_ec); + } + /* We can add any index predicate conditions, too */ + foreach(l, ipath->indexinfo->indpred) + { + Expr *pred = (Expr *) lfirst(l); + + /* + * We know that the index predicate must have been implied by the + * query condition as a whole, but it may or may not be implied by + * the conditions that got pushed into the bitmapqual. Avoid + * generating redundant conditions. + */ + if (!predicate_implied_by(list_make1(pred), subquals, false)) + { + subquals = lappend(subquals, pred); + subindexquals = lappend(subindexquals, pred); + } + } + *qual = subquals; + *indexqual = subindexquals; + *indexECs = subindexECs; + } + else + { + elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual)); + plan = NULL; /* keep compiler quiet */ + } + + return plan; +} + +/* + * create_tidscan_plan + * Returns a tidscan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static TidScan * +create_tidscan_plan(PlannerInfo *root, TidPath *best_path, + List *tlist, List *scan_clauses) +{ + TidScan *scan_plan; + Index scan_relid = best_path->path.parent->relid; + List *tidquals = best_path->tidquals; + + /* it should be a base rel... */ + Assert(scan_relid > 0); + Assert(best_path->path.parent->rtekind == RTE_RELATION); + + /* + * The qpqual list must contain all restrictions not enforced by the + * tidquals list. Since tidquals has OR semantics, we have to be careful + * about matching it up to scan_clauses. It's convenient to handle the + * single-tidqual case separately from the multiple-tidqual case. In the + * single-tidqual case, we look through the scan_clauses while they are + * still in RestrictInfo form, and drop any that are redundant with the + * tidqual. + * + * In normal cases simple pointer equality checks will be enough to spot + * duplicate RestrictInfos, so we try that first. + * + * Another common case is that a scan_clauses entry is generated from the + * same EquivalenceClass as some tidqual, and is therefore redundant with + * it, though not equal. + * + * Unlike indexpaths, we don't bother with predicate_implied_by(); the + * number of cases where it could win are pretty small. + */ + if (list_length(tidquals) == 1) + { + List *qpqual = NIL; + ListCell *l; + + foreach(l, scan_clauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (rinfo->pseudoconstant) + continue; /* we may drop pseudoconstants here */ + if (list_member_ptr(tidquals, rinfo)) + continue; /* simple duplicate */ + if (is_redundant_derived_clause(rinfo, tidquals)) + continue; /* derived from same EquivalenceClass */ + qpqual = lappend(qpqual, rinfo); + } + scan_clauses = qpqual; + } + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo lists to bare expressions; ignore pseudoconstants */ + tidquals = extract_actual_clauses(tidquals, false); + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* + * If we have multiple tidquals, it's more convenient to remove duplicate + * scan_clauses after stripping the RestrictInfos. In this situation, + * because the tidquals represent OR sub-clauses, they could not have come + * from EquivalenceClasses so we don't have to worry about matching up + * non-identical clauses. On the other hand, because tidpath.c will have + * extracted those sub-clauses from some OR clause and built its own list, + * we will certainly not have pointer equality to any scan clause. So + * convert the tidquals list to an explicit OR clause and see if we can + * match it via equal() to any scan clause. + */ + if (list_length(tidquals) > 1) + scan_clauses = list_difference(scan_clauses, + list_make1(make_orclause(tidquals))); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->path.param_info) + { + tidquals = (List *) + replace_nestloop_params(root, (Node *) tidquals); + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + } + + scan_plan = make_tidscan(tlist, + scan_clauses, + scan_relid, + tidquals); + + copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); + + return scan_plan; +} + +/* + * create_tidrangescan_plan + * Returns a tidrangescan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static TidRangeScan * +create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path, + List *tlist, List *scan_clauses) +{ + TidRangeScan *scan_plan; + Index scan_relid = best_path->path.parent->relid; + List *tidrangequals = best_path->tidrangequals; + + /* it should be a base rel... */ + Assert(scan_relid > 0); + Assert(best_path->path.parent->rtekind == RTE_RELATION); + + /* + * The qpqual list must contain all restrictions not enforced by the + * tidrangequals list. tidrangequals has AND semantics, so we can simply + * remove any qual that appears in it. + */ + { + List *qpqual = NIL; + ListCell *l; + + foreach(l, scan_clauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (rinfo->pseudoconstant) + continue; /* we may drop pseudoconstants here */ + if (list_member_ptr(tidrangequals, rinfo)) + continue; /* simple duplicate */ + qpqual = lappend(qpqual, rinfo); + } + scan_clauses = qpqual; + } + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo lists to bare expressions; ignore pseudoconstants */ + tidrangequals = extract_actual_clauses(tidrangequals, false); + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->path.param_info) + { + tidrangequals = (List *) + replace_nestloop_params(root, (Node *) tidrangequals); + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + } + + scan_plan = make_tidrangescan(tlist, + scan_clauses, + scan_relid, + tidrangequals); + + copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); + + return scan_plan; +} + +/* + * create_subqueryscan_plan + * Returns a subqueryscan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static SubqueryScan * +create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, + List *tlist, List *scan_clauses) +{ + SubqueryScan *scan_plan; + RelOptInfo *rel = best_path->path.parent; + Index scan_relid = rel->relid; + Plan *subplan; + + /* it should be a subquery base rel... */ + Assert(scan_relid > 0); + Assert(rel->rtekind == RTE_SUBQUERY); + + /* + * Recursively create Plan from Path for subquery. Since we are entering + * a different planner context (subroot), recurse to create_plan not + * create_plan_recurse. + */ + subplan = create_plan(rel->subroot, best_path->subpath); + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->path.param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + process_subquery_nestloop_params(root, + rel->subplan_params); + } + + scan_plan = make_subqueryscan(tlist, + scan_clauses, + scan_relid, + subplan); + + copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); + + return scan_plan; +} + +/* + * create_functionscan_plan + * Returns a functionscan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static FunctionScan * +create_functionscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) +{ + FunctionScan *scan_plan; + Index scan_relid = best_path->parent->relid; + RangeTblEntry *rte; + List *functions; + + /* it should be a function base rel... */ + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_FUNCTION); + functions = rte->functions; + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + /* The function expressions could contain nestloop params, too */ + functions = (List *) replace_nestloop_params(root, (Node *) functions); + } + + scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, + functions, rte->funcordinality); + + copy_generic_path_info(&scan_plan->scan.plan, best_path); + + return scan_plan; +} + +/* + * create_tablefuncscan_plan + * Returns a tablefuncscan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static TableFuncScan * +create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) +{ + TableFuncScan *scan_plan; + Index scan_relid = best_path->parent->relid; + RangeTblEntry *rte; + TableFunc *tablefunc; + + /* it should be a function base rel... */ + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_TABLEFUNC); + tablefunc = rte->tablefunc; + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + /* The function expressions could contain nestloop params, too */ + tablefunc = (TableFunc *) replace_nestloop_params(root, (Node *) tablefunc); + } + + scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, + tablefunc); + + copy_generic_path_info(&scan_plan->scan.plan, best_path); + + return scan_plan; +} + +/* + * create_valuesscan_plan + * Returns a valuesscan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static ValuesScan * +create_valuesscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) +{ + ValuesScan *scan_plan; + Index scan_relid = best_path->parent->relid; + RangeTblEntry *rte; + List *values_lists; + + /* it should be a values base rel... */ + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_VALUES); + values_lists = rte->values_lists; + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + /* The values lists could contain nestloop params, too */ + values_lists = (List *) + replace_nestloop_params(root, (Node *) values_lists); + } + + scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, + values_lists); + + copy_generic_path_info(&scan_plan->scan.plan, best_path); + + return scan_plan; +} + +/* + * create_ctescan_plan + * Returns a ctescan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static CteScan * +create_ctescan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) +{ + CteScan *scan_plan; + Index scan_relid = best_path->parent->relid; + RangeTblEntry *rte; + SubPlan *ctesplan = NULL; + int plan_id; + int cte_param_id; + PlannerInfo *cteroot; + Index levelsup; + int ndx; + ListCell *lc; + + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_CTE); + Assert(!rte->self_reference); + + /* + * Find the referenced CTE, and locate the SubPlan previously made for it. + */ + levelsup = rte->ctelevelsup; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + + /* + * Note: cte_plan_ids can be shorter than cteList, if we are still working + * on planning the CTEs (ie, this is a side-reference from another CTE). + * So we mustn't use forboth here. + */ + ndx = 0; + foreach(lc, cteroot->parse->cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); + + if (strcmp(cte->ctename, rte->ctename) == 0) + break; + ndx++; + } + if (lc == NULL) /* shouldn't happen */ + elog(ERROR, "could not find CTE \"%s\"", rte->ctename); + if (ndx >= list_length(cteroot->cte_plan_ids)) + elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); + plan_id = list_nth_int(cteroot->cte_plan_ids, ndx); + if (plan_id <= 0) + elog(ERROR, "no plan was made for CTE \"%s\"", rte->ctename); + foreach(lc, cteroot->init_plans) + { + ctesplan = (SubPlan *) lfirst(lc); + if (ctesplan->plan_id == plan_id) + break; + } + if (lc == NULL) /* shouldn't happen */ + elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); + + /* + * We need the CTE param ID, which is the sole member of the SubPlan's + * setParam list. + */ + cte_param_id = linitial_int(ctesplan->setParam); + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + } + + scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, + plan_id, cte_param_id); + + copy_generic_path_info(&scan_plan->scan.plan, best_path); + + return scan_plan; +} + +/* + * create_namedtuplestorescan_plan + * Returns a tuplestorescan plan for the base relation scanned by + * 'best_path' with restriction clauses 'scan_clauses' and targetlist + * 'tlist'. + */ +static NamedTuplestoreScan * +create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) +{ + NamedTuplestoreScan *scan_plan; + Index scan_relid = best_path->parent->relid; + RangeTblEntry *rte; + + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_NAMEDTUPLESTORE); + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + } + + scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, + rte->enrname); + + copy_generic_path_info(&scan_plan->scan.plan, best_path); + + return scan_plan; +} + +/* + * create_resultscan_plan + * Returns a Result plan for the RTE_RESULT base relation scanned by + * 'best_path' with restriction clauses 'scan_clauses' and targetlist + * 'tlist'. + */ +static Result * +create_resultscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) +{ + Result *scan_plan; + Index scan_relid = best_path->parent->relid; + RangeTblEntry *rte PG_USED_FOR_ASSERTS_ONLY; + + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_RESULT); + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + } + + scan_plan = make_result(tlist, (Node *) scan_clauses, NULL); + + copy_generic_path_info(&scan_plan->plan, best_path); + + return scan_plan; +} + +/* + * create_worktablescan_plan + * Returns a worktablescan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static WorkTableScan * +create_worktablescan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) +{ + WorkTableScan *scan_plan; + Index scan_relid = best_path->parent->relid; + RangeTblEntry *rte; + Index levelsup; + PlannerInfo *cteroot; + + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_CTE); + Assert(rte->self_reference); + + /* + * We need to find the worktable param ID, which is in the plan level + * that's processing the recursive UNION, which is one level *below* where + * the CTE comes from. + */ + levelsup = rte->ctelevelsup; + if (levelsup == 0) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + levelsup--; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + if (cteroot->wt_param_id < 0) /* shouldn't happen */ + elog(ERROR, "could not find param ID for CTE \"%s\"", rte->ctename); + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + } + + scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, + cteroot->wt_param_id); + + copy_generic_path_info(&scan_plan->scan.plan, best_path); + + return scan_plan; +} + +/* + * create_foreignscan_plan + * Returns a foreignscan plan for the relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static ForeignScan * +create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, + List *tlist, List *scan_clauses) +{ + ForeignScan *scan_plan; + RelOptInfo *rel = best_path->path.parent; + Index scan_relid = rel->relid; + Oid rel_oid = InvalidOid; + Plan *outer_plan = NULL; + + Assert(rel->fdwroutine != NULL); + + /* transform the child path if any */ + if (best_path->fdw_outerpath) + outer_plan = create_plan_recurse(root, best_path->fdw_outerpath, + CP_EXACT_TLIST); + + /* + * If we're scanning a base relation, fetch its OID. (Irrelevant if + * scanning a join relation.) + */ + if (scan_relid > 0) + { + RangeTblEntry *rte; + + Assert(rel->rtekind == RTE_RELATION); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_RELATION); + rel_oid = rte->relid; + } + + /* + * Sort clauses into best execution order. We do this first since the FDW + * might have more info than we do and wish to adjust the ordering. + */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* + * Let the FDW perform its processing on the restriction clauses and + * generate the plan node. Note that the FDW might remove restriction + * clauses that it intends to execute remotely, or even add more (if it + * has selected some join clauses for remote use but also wants them + * rechecked locally). + */ + scan_plan = rel->fdwroutine->GetForeignPlan(root, rel, rel_oid, + best_path, + tlist, scan_clauses, + outer_plan); + + /* Copy cost data from Path to Plan; no need to make FDW do this */ + copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); + + /* Copy foreign server OID; likewise, no need to make FDW do this */ + scan_plan->fs_server = rel->serverid; + + /* + * Likewise, copy the relids that are represented by this foreign scan. An + * upper rel doesn't have relids set, but it covers all the base relations + * participating in the underlying scan, so use root's all_baserels. + */ + if (rel->reloptkind == RELOPT_UPPER_REL) + scan_plan->fs_relids = root->all_baserels; + else + scan_plan->fs_relids = best_path->path.parent->relids; + + /* + * If this is a foreign join, and to make it valid to push down we had to + * assume that the current user is the same as some user explicitly named + * in the query, mark the finished plan as depending on the current user. + */ + if (rel->useridiscurrent) + root->glob->dependsOnRole = true; + + /* + * Replace any outer-relation variables with nestloop params in the qual, + * fdw_exprs and fdw_recheck_quals expressions. We do this last so that + * the FDW doesn't have to be involved. (Note that parts of fdw_exprs or + * fdw_recheck_quals could have come from join clauses, so doing this + * beforehand on the scan_clauses wouldn't work.) We assume + * fdw_scan_tlist contains no such variables. + */ + if (best_path->path.param_info) + { + scan_plan->scan.plan.qual = (List *) + replace_nestloop_params(root, (Node *) scan_plan->scan.plan.qual); + scan_plan->fdw_exprs = (List *) + replace_nestloop_params(root, (Node *) scan_plan->fdw_exprs); + scan_plan->fdw_recheck_quals = (List *) + replace_nestloop_params(root, + (Node *) scan_plan->fdw_recheck_quals); + } + + /* + * If rel is a base relation, detect whether any system columns are + * requested from the rel. (If rel is a join relation, rel->relid will be + * 0, but there can be no Var with relid 0 in the rel's targetlist or the + * restriction clauses, so we skip this in that case. Note that any such + * columns in base relations that were joined are assumed to be contained + * in fdw_scan_tlist.) This is a bit of a kluge and might go away + * someday, so we intentionally leave it out of the API presented to FDWs. + */ + scan_plan->fsSystemCol = false; + if (scan_relid > 0) + { + Bitmapset *attrs_used = NULL; + ListCell *lc; + int i; + + /* + * First, examine all the attributes needed for joins or final output. + * Note: we must look at rel's targetlist, not the attr_needed data, + * because attr_needed isn't computed for inheritance child rels. + */ + pull_varattnos((Node *) rel->reltarget->exprs, scan_relid, &attrs_used); + + /* Add all the attributes used by restriction clauses. */ + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + pull_varattnos((Node *) rinfo->clause, scan_relid, &attrs_used); + } + + /* Now, are any system columns requested from rel? */ + for (i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++) + { + if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber, attrs_used)) + { + scan_plan->fsSystemCol = true; + break; + } + } + + bms_free(attrs_used); + } + + return scan_plan; +} + +/* + * create_customscan_plan + * + * Transform a CustomPath into a Plan. + */ +static CustomScan * +create_customscan_plan(PlannerInfo *root, CustomPath *best_path, + List *tlist, List *scan_clauses) +{ + CustomScan *cplan; + RelOptInfo *rel = best_path->path.parent; + List *custom_plans = NIL; + ListCell *lc; + + /* Recursively transform child paths. */ + foreach(lc, best_path->custom_paths) + { + Plan *plan = create_plan_recurse(root, (Path *) lfirst(lc), + CP_EXACT_TLIST); + + custom_plans = lappend(custom_plans, plan); + } + + /* + * Sort clauses into the best execution order, although custom-scan + * provider can reorder them again. + */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* + * Invoke custom plan provider to create the Plan node represented by the + * CustomPath. + */ + cplan = castNode(CustomScan, + best_path->methods->PlanCustomPath(root, + rel, + best_path, + tlist, + scan_clauses, + custom_plans)); + + /* + * Copy cost data from Path to Plan; no need to make custom-plan providers + * do this + */ + copy_generic_path_info(&cplan->scan.plan, &best_path->path); + + /* Likewise, copy the relids that are represented by this custom scan */ + cplan->custom_relids = best_path->path.parent->relids; + + /* + * Replace any outer-relation variables with nestloop params in the qual + * and custom_exprs expressions. We do this last so that the custom-plan + * provider doesn't have to be involved. (Note that parts of custom_exprs + * could have come from join clauses, so doing this beforehand on the + * scan_clauses wouldn't work.) We assume custom_scan_tlist contains no + * such variables. + */ + if (best_path->path.param_info) + { + cplan->scan.plan.qual = (List *) + replace_nestloop_params(root, (Node *) cplan->scan.plan.qual); + cplan->custom_exprs = (List *) + replace_nestloop_params(root, (Node *) cplan->custom_exprs); + } + + return cplan; +} + + +/***************************************************************************** + * + * JOIN METHODS + * + *****************************************************************************/ + +static NestLoop * +create_nestloop_plan(PlannerInfo *root, + NestPath *best_path) +{ + NestLoop *join_plan; + Plan *outer_plan; + Plan *inner_plan; + List *tlist = build_path_tlist(root, &best_path->jpath.path); + List *joinrestrictclauses = best_path->jpath.joinrestrictinfo; + List *joinclauses; + List *otherclauses; + Relids outerrelids; + List *nestParams; + Relids saveOuterRels = root->curOuterRels; + + /* NestLoop can project, so no need to be picky about child tlists */ + outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, 0); + + /* For a nestloop, include outer relids in curOuterRels for inner side */ + root->curOuterRels = bms_union(root->curOuterRels, + best_path->jpath.outerjoinpath->parent->relids); + + inner_plan = create_plan_recurse(root, best_path->jpath.innerjoinpath, 0); + + /* Restore curOuterRels */ + bms_free(root->curOuterRels); + root->curOuterRels = saveOuterRels; + + /* Sort join qual clauses into best execution order */ + joinrestrictclauses = order_qual_clauses(root, joinrestrictclauses); + + /* Get the join qual clauses (in plain expression form) */ + /* Any pseudoconstant clauses are ignored here */ + if (IS_OUTER_JOIN(best_path->jpath.jointype)) + { + extract_actual_join_clauses(joinrestrictclauses, + best_path->jpath.path.parent->relids, + &joinclauses, &otherclauses); + } + else + { + /* We can treat all clauses alike for an inner join */ + joinclauses = extract_actual_clauses(joinrestrictclauses, false); + otherclauses = NIL; + } + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->jpath.path.param_info) + { + joinclauses = (List *) + replace_nestloop_params(root, (Node *) joinclauses); + otherclauses = (List *) + replace_nestloop_params(root, (Node *) otherclauses); + } + + /* + * Identify any nestloop parameters that should be supplied by this join + * node, and remove them from root->curOuterParams. + */ + outerrelids = best_path->jpath.outerjoinpath->parent->relids; + nestParams = identify_current_nestloop_params(root, outerrelids); + + join_plan = make_nestloop(tlist, + joinclauses, + otherclauses, + nestParams, + outer_plan, + inner_plan, + best_path->jpath.jointype, + best_path->jpath.inner_unique); + + copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); + + return join_plan; +} + +static MergeJoin * +create_mergejoin_plan(PlannerInfo *root, + MergePath *best_path) +{ + MergeJoin *join_plan; + Plan *outer_plan; + Plan *inner_plan; + List *tlist = build_path_tlist(root, &best_path->jpath.path); + List *joinclauses; + List *otherclauses; + List *mergeclauses; + List *outerpathkeys; + List *innerpathkeys; + int nClauses; + Oid *mergefamilies; + Oid *mergecollations; + int *mergestrategies; + bool *mergenullsfirst; + PathKey *opathkey; + EquivalenceClass *opeclass; + int i; + ListCell *lc; + ListCell *lop; + ListCell *lip; + Path *outer_path = best_path->jpath.outerjoinpath; + Path *inner_path = best_path->jpath.innerjoinpath; + + /* + * MergeJoin can project, so we don't have to demand exact tlists from the + * inputs. However, if we're intending to sort an input's result, it's + * best to request a small tlist so we aren't sorting more data than + * necessary. + */ + outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, + (best_path->outersortkeys != NIL) ? CP_SMALL_TLIST : 0); + + inner_plan = create_plan_recurse(root, best_path->jpath.innerjoinpath, + (best_path->innersortkeys != NIL) ? CP_SMALL_TLIST : 0); + + /* Sort join qual clauses into best execution order */ + /* NB: do NOT reorder the mergeclauses */ + joinclauses = order_qual_clauses(root, best_path->jpath.joinrestrictinfo); + + /* Get the join qual clauses (in plain expression form) */ + /* Any pseudoconstant clauses are ignored here */ + if (IS_OUTER_JOIN(best_path->jpath.jointype)) + { + extract_actual_join_clauses(joinclauses, + best_path->jpath.path.parent->relids, + &joinclauses, &otherclauses); + } + else + { + /* We can treat all clauses alike for an inner join */ + joinclauses = extract_actual_clauses(joinclauses, false); + otherclauses = NIL; + } + + /* + * Remove the mergeclauses from the list of join qual clauses, leaving the + * list of quals that must be checked as qpquals. + */ + mergeclauses = get_actual_clauses(best_path->path_mergeclauses); + joinclauses = list_difference(joinclauses, mergeclauses); + + /* + * Replace any outer-relation variables with nestloop params. There + * should not be any in the mergeclauses. + */ + if (best_path->jpath.path.param_info) + { + joinclauses = (List *) + replace_nestloop_params(root, (Node *) joinclauses); + otherclauses = (List *) + replace_nestloop_params(root, (Node *) otherclauses); + } + + /* + * Rearrange mergeclauses, if needed, so that the outer variable is always + * on the left; mark the mergeclause restrictinfos with correct + * outer_is_left status. + */ + mergeclauses = get_switched_clauses(best_path->path_mergeclauses, + best_path->jpath.outerjoinpath->parent->relids); + + /* + * Create explicit sort nodes for the outer and inner paths if necessary. + */ + if (best_path->outersortkeys) + { + Relids outer_relids = outer_path->parent->relids; + Sort *sort = make_sort_from_pathkeys(outer_plan, + best_path->outersortkeys, + outer_relids); + + label_sort_with_costsize(root, sort, -1.0); + outer_plan = (Plan *) sort; + outerpathkeys = best_path->outersortkeys; + } + else + outerpathkeys = best_path->jpath.outerjoinpath->pathkeys; + + if (best_path->innersortkeys) + { + Relids inner_relids = inner_path->parent->relids; + Sort *sort = make_sort_from_pathkeys(inner_plan, + best_path->innersortkeys, + inner_relids); + + label_sort_with_costsize(root, sort, -1.0); + inner_plan = (Plan *) sort; + innerpathkeys = best_path->innersortkeys; + } + else + innerpathkeys = best_path->jpath.innerjoinpath->pathkeys; + + /* + * If specified, add a materialize node to shield the inner plan from the + * need to handle mark/restore. + */ + if (best_path->materialize_inner) + { + Plan *matplan = (Plan *) make_material(inner_plan); + + /* + * We assume the materialize will not spill to disk, and therefore + * charge just cpu_operator_cost per tuple. (Keep this estimate in + * sync with final_cost_mergejoin.) + */ + copy_plan_costsize(matplan, inner_plan); + matplan->total_cost += cpu_operator_cost * matplan->plan_rows; + + inner_plan = matplan; + } + + /* + * Compute the opfamily/collation/strategy/nullsfirst arrays needed by the + * executor. The information is in the pathkeys for the two inputs, but + * we need to be careful about the possibility of mergeclauses sharing a + * pathkey, as well as the possibility that the inner pathkeys are not in + * an order matching the mergeclauses. + */ + nClauses = list_length(mergeclauses); + Assert(nClauses == list_length(best_path->path_mergeclauses)); + mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid)); + mergecollations = (Oid *) palloc(nClauses * sizeof(Oid)); + mergestrategies = (int *) palloc(nClauses * sizeof(int)); + mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool)); + + opathkey = NULL; + opeclass = NULL; + lop = list_head(outerpathkeys); + lip = list_head(innerpathkeys); + i = 0; + foreach(lc, best_path->path_mergeclauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + EquivalenceClass *oeclass; + EquivalenceClass *ieclass; + PathKey *ipathkey = NULL; + EquivalenceClass *ipeclass = NULL; + bool first_inner_match = false; + + /* fetch outer/inner eclass from mergeclause */ + if (rinfo->outer_is_left) + { + oeclass = rinfo->left_ec; + ieclass = rinfo->right_ec; + } + else + { + oeclass = rinfo->right_ec; + ieclass = rinfo->left_ec; + } + Assert(oeclass != NULL); + Assert(ieclass != NULL); + + /* + * We must identify the pathkey elements associated with this clause + * by matching the eclasses (which should give a unique match, since + * the pathkey lists should be canonical). In typical cases the merge + * clauses are one-to-one with the pathkeys, but when dealing with + * partially redundant query conditions, things are more complicated. + * + * lop and lip reference the first as-yet-unmatched pathkey elements. + * If they're NULL then all pathkey elements have been matched. + * + * The ordering of the outer pathkeys should match the mergeclauses, + * by construction (see find_mergeclauses_for_outer_pathkeys()). There + * could be more than one mergeclause for the same outer pathkey, but + * no pathkey may be entirely skipped over. + */ + if (oeclass != opeclass) /* multiple matches are not interesting */ + { + /* doesn't match the current opathkey, so must match the next */ + if (lop == NULL) + elog(ERROR, "outer pathkeys do not match mergeclauses"); + opathkey = (PathKey *) lfirst(lop); + opeclass = opathkey->pk_eclass; + lop = lnext(outerpathkeys, lop); + if (oeclass != opeclass) + elog(ERROR, "outer pathkeys do not match mergeclauses"); + } + + /* + * The inner pathkeys likewise should not have skipped-over keys, but + * it's possible for a mergeclause to reference some earlier inner + * pathkey if we had redundant pathkeys. For example we might have + * mergeclauses like "o.a = i.x AND o.b = i.y AND o.c = i.x". The + * implied inner ordering is then "ORDER BY x, y, x", but the pathkey + * mechanism drops the second sort by x as redundant, and this code + * must cope. + * + * It's also possible for the implied inner-rel ordering to be like + * "ORDER BY x, y, x DESC". We still drop the second instance of x as + * redundant; but this means that the sort ordering of a redundant + * inner pathkey should not be considered significant. So we must + * detect whether this is the first clause matching an inner pathkey. + */ + if (lip) + { + ipathkey = (PathKey *) lfirst(lip); + ipeclass = ipathkey->pk_eclass; + if (ieclass == ipeclass) + { + /* successful first match to this inner pathkey */ + lip = lnext(innerpathkeys, lip); + first_inner_match = true; + } + } + if (!first_inner_match) + { + /* redundant clause ... must match something before lip */ + ListCell *l2; + + foreach(l2, innerpathkeys) + { + if (l2 == lip) + break; + ipathkey = (PathKey *) lfirst(l2); + ipeclass = ipathkey->pk_eclass; + if (ieclass == ipeclass) + break; + } + if (ieclass != ipeclass) + elog(ERROR, "inner pathkeys do not match mergeclauses"); + } + + /* + * The pathkeys should always match each other as to opfamily and + * collation (which affect equality), but if we're considering a + * redundant inner pathkey, its sort ordering might not match. In + * such cases we may ignore the inner pathkey's sort ordering and use + * the outer's. (In effect, we're lying to the executor about the + * sort direction of this inner column, but it does not matter since + * the run-time row comparisons would only reach this column when + * there's equality for the earlier column containing the same eclass. + * There could be only one value in this column for the range of inner + * rows having a given value in the earlier column, so it does not + * matter which way we imagine this column to be ordered.) But a + * non-redundant inner pathkey had better match outer's ordering too. + */ + if (opathkey->pk_opfamily != ipathkey->pk_opfamily || + opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation) + elog(ERROR, "left and right pathkeys do not match in mergejoin"); + if (first_inner_match && + (opathkey->pk_strategy != ipathkey->pk_strategy || + opathkey->pk_nulls_first != ipathkey->pk_nulls_first)) + elog(ERROR, "left and right pathkeys do not match in mergejoin"); + + /* OK, save info for executor */ + mergefamilies[i] = opathkey->pk_opfamily; + mergecollations[i] = opathkey->pk_eclass->ec_collation; + mergestrategies[i] = opathkey->pk_strategy; + mergenullsfirst[i] = opathkey->pk_nulls_first; + i++; + } + + /* + * Note: it is not an error if we have additional pathkey elements (i.e., + * lop or lip isn't NULL here). The input paths might be better-sorted + * than we need for the current mergejoin. + */ + + /* + * Now we can build the mergejoin node. + */ + join_plan = make_mergejoin(tlist, + joinclauses, + otherclauses, + mergeclauses, + mergefamilies, + mergecollations, + mergestrategies, + mergenullsfirst, + outer_plan, + inner_plan, + best_path->jpath.jointype, + best_path->jpath.inner_unique, + best_path->skip_mark_restore); + + /* Costs of sort and material steps are included in path cost already */ + copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); + + return join_plan; +} + +static HashJoin * +create_hashjoin_plan(PlannerInfo *root, + HashPath *best_path) +{ + HashJoin *join_plan; + Hash *hash_plan; + Plan *outer_plan; + Plan *inner_plan; + List *tlist = build_path_tlist(root, &best_path->jpath.path); + List *joinclauses; + List *otherclauses; + List *hashclauses; + List *hashoperators = NIL; + List *hashcollations = NIL; + List *inner_hashkeys = NIL; + List *outer_hashkeys = NIL; + Oid skewTable = InvalidOid; + AttrNumber skewColumn = InvalidAttrNumber; + bool skewInherit = false; + ListCell *lc; + + /* + * HashJoin can project, so we don't have to demand exact tlists from the + * inputs. However, it's best to request a small tlist from the inner + * side, so that we aren't storing more data than necessary. Likewise, if + * we anticipate batching, request a small tlist from the outer side so + * that we don't put extra data in the outer batch files. + */ + outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, + (best_path->num_batches > 1) ? CP_SMALL_TLIST : 0); + + inner_plan = create_plan_recurse(root, best_path->jpath.innerjoinpath, + CP_SMALL_TLIST); + + /* Sort join qual clauses into best execution order */ + joinclauses = order_qual_clauses(root, best_path->jpath.joinrestrictinfo); + /* There's no point in sorting the hash clauses ... */ + + /* Get the join qual clauses (in plain expression form) */ + /* Any pseudoconstant clauses are ignored here */ + if (IS_OUTER_JOIN(best_path->jpath.jointype)) + { + extract_actual_join_clauses(joinclauses, + best_path->jpath.path.parent->relids, + &joinclauses, &otherclauses); + } + else + { + /* We can treat all clauses alike for an inner join */ + joinclauses = extract_actual_clauses(joinclauses, false); + otherclauses = NIL; + } + + /* + * Remove the hashclauses from the list of join qual clauses, leaving the + * list of quals that must be checked as qpquals. + */ + hashclauses = get_actual_clauses(best_path->path_hashclauses); + joinclauses = list_difference(joinclauses, hashclauses); + + /* + * Replace any outer-relation variables with nestloop params. There + * should not be any in the hashclauses. + */ + if (best_path->jpath.path.param_info) + { + joinclauses = (List *) + replace_nestloop_params(root, (Node *) joinclauses); + otherclauses = (List *) + replace_nestloop_params(root, (Node *) otherclauses); + } + + /* + * Rearrange hashclauses, if needed, so that the outer variable is always + * on the left. + */ + hashclauses = get_switched_clauses(best_path->path_hashclauses, + best_path->jpath.outerjoinpath->parent->relids); + + /* + * If there is a single join clause and we can identify the outer variable + * as a simple column reference, supply its identity for possible use in + * skew optimization. (Note: in principle we could do skew optimization + * with multiple join clauses, but we'd have to be able to determine the + * most common combinations of outer values, which we don't currently have + * enough stats for.) + */ + if (list_length(hashclauses) == 1) + { + OpExpr *clause = (OpExpr *) linitial(hashclauses); + Node *node; + + Assert(is_opclause(clause)); + node = (Node *) linitial(clause->args); + if (IsA(node, RelabelType)) + node = (Node *) ((RelabelType *) node)->arg; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + RangeTblEntry *rte; + + rte = root->simple_rte_array[var->varno]; + if (rte->rtekind == RTE_RELATION) + { + skewTable = rte->relid; + skewColumn = var->varattno; + skewInherit = rte->inh; + } + } + } + + /* + * Collect hash related information. The hashed expressions are + * deconstructed into outer/inner expressions, so they can be computed + * separately (inner expressions are used to build the hashtable via Hash, + * outer expressions to perform lookups of tuples from HashJoin's outer + * plan in the hashtable). Also collect operator information necessary to + * build the hashtable. + */ + foreach(lc, hashclauses) + { + OpExpr *hclause = lfirst_node(OpExpr, lc); + + hashoperators = lappend_oid(hashoperators, hclause->opno); + hashcollations = lappend_oid(hashcollations, hclause->inputcollid); + outer_hashkeys = lappend(outer_hashkeys, linitial(hclause->args)); + inner_hashkeys = lappend(inner_hashkeys, lsecond(hclause->args)); + } + + /* + * Build the hash node and hash join node. + */ + hash_plan = make_hash(inner_plan, + inner_hashkeys, + skewTable, + skewColumn, + skewInherit); + + /* + * Set Hash node's startup & total costs equal to total cost of input + * plan; this only affects EXPLAIN display not decisions. + */ + copy_plan_costsize(&hash_plan->plan, inner_plan); + hash_plan->plan.startup_cost = hash_plan->plan.total_cost; + + /* + * If parallel-aware, the executor will also need an estimate of the total + * number of rows expected from all participants so that it can size the + * shared hash table. + */ + if (best_path->jpath.path.parallel_aware) + { + hash_plan->plan.parallel_aware = true; + hash_plan->rows_total = best_path->inner_rows_total; + } + + join_plan = make_hashjoin(tlist, + joinclauses, + otherclauses, + hashclauses, + hashoperators, + hashcollations, + outer_hashkeys, + outer_plan, + (Plan *) hash_plan, + best_path->jpath.jointype, + best_path->jpath.inner_unique); + + copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); + + return join_plan; +} + + +/***************************************************************************** + * + * SUPPORTING ROUTINES + * + *****************************************************************************/ + +/* + * replace_nestloop_params + * Replace outer-relation Vars and PlaceHolderVars in the given expression + * with nestloop Params + * + * All Vars and PlaceHolderVars belonging to the relation(s) identified by + * root->curOuterRels are replaced by Params, and entries are added to + * root->curOuterParams if not already present. + */ +static Node * +replace_nestloop_params(PlannerInfo *root, Node *expr) +{ + /* No setup needed for tree walk, so away we go */ + return replace_nestloop_params_mutator(expr, root); +} + +static Node * +replace_nestloop_params_mutator(Node *node, PlannerInfo *root) +{ + if (node == NULL) + return NULL; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + /* Upper-level Vars should be long gone at this point */ + Assert(var->varlevelsup == 0); + /* If not to be replaced, we can just return the Var unmodified */ + if (IS_SPECIAL_VARNO(var->varno) || + !bms_is_member(var->varno, root->curOuterRels)) + return node; + /* Replace the Var with a nestloop Param */ + return (Node *) replace_nestloop_param_var(root, var); + } + if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + /* Upper-level PlaceHolderVars should be long gone at this point */ + Assert(phv->phlevelsup == 0); + + /* + * Check whether we need to replace the PHV. We use bms_overlap as a + * cheap/quick test to see if the PHV might be evaluated in the outer + * rels, and then grab its PlaceHolderInfo to tell for sure. + */ + if (!bms_overlap(phv->phrels, root->curOuterRels) || + !bms_is_subset(find_placeholder_info(root, phv, false)->ph_eval_at, + root->curOuterRels)) + { + /* + * We can't replace the whole PHV, but we might still need to + * replace Vars or PHVs within its expression, in case it ends up + * actually getting evaluated here. (It might get evaluated in + * this plan node, or some child node; in the latter case we don't + * really need to process the expression here, but we haven't got + * enough info to tell if that's the case.) Flat-copy the PHV + * node and then recurse on its expression. + * + * Note that after doing this, we might have different + * representations of the contents of the same PHV in different + * parts of the plan tree. This is OK because equal() will just + * match on phid/phlevelsup, so setrefs.c will still recognize an + * upper-level reference to a lower-level copy of the same PHV. + */ + PlaceHolderVar *newphv = makeNode(PlaceHolderVar); + + memcpy(newphv, phv, sizeof(PlaceHolderVar)); + newphv->phexpr = (Expr *) + replace_nestloop_params_mutator((Node *) phv->phexpr, + root); + return (Node *) newphv; + } + /* Replace the PlaceHolderVar with a nestloop Param */ + return (Node *) replace_nestloop_param_placeholdervar(root, phv); + } + return expression_tree_mutator(node, + replace_nestloop_params_mutator, + (void *) root); +} + +/* + * fix_indexqual_references + * Adjust indexqual clauses to the form the executor's indexqual + * machinery needs. + * + * We have three tasks here: + * * Select the actual qual clauses out of the input IndexClause list, + * and remove RestrictInfo nodes from the qual clauses. + * * Replace any outer-relation Var or PHV nodes with nestloop Params. + * (XXX eventually, that responsibility should go elsewhere?) + * * Index keys must be represented by Var nodes with varattno set to the + * index's attribute number, not the attribute number in the original rel. + * + * *stripped_indexquals_p receives a list of the actual qual clauses. + * + * *fixed_indexquals_p receives a list of the adjusted quals. This is a copy + * that shares no substructure with the original; this is needed in case there + * are subplans in it (we need two separate copies of the subplan tree, or + * things will go awry). + */ +static void +fix_indexqual_references(PlannerInfo *root, IndexPath *index_path, + List **stripped_indexquals_p, List **fixed_indexquals_p) +{ + IndexOptInfo *index = index_path->indexinfo; + List *stripped_indexquals; + List *fixed_indexquals; + ListCell *lc; + + stripped_indexquals = fixed_indexquals = NIL; + + foreach(lc, index_path->indexclauses) + { + IndexClause *iclause = lfirst_node(IndexClause, lc); + int indexcol = iclause->indexcol; + ListCell *lc2; + + foreach(lc2, iclause->indexquals) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); + Node *clause = (Node *) rinfo->clause; + + stripped_indexquals = lappend(stripped_indexquals, clause); + clause = fix_indexqual_clause(root, index, indexcol, + clause, iclause->indexcols); + fixed_indexquals = lappend(fixed_indexquals, clause); + } + } + + *stripped_indexquals_p = stripped_indexquals; + *fixed_indexquals_p = fixed_indexquals; +} + +/* + * fix_indexorderby_references + * Adjust indexorderby clauses to the form the executor's index + * machinery needs. + * + * This is a simplified version of fix_indexqual_references. The input is + * bare clauses and a separate indexcol list, instead of IndexClauses. + */ +static List * +fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path) +{ + IndexOptInfo *index = index_path->indexinfo; + List *fixed_indexorderbys; + ListCell *lcc, + *lci; + + fixed_indexorderbys = NIL; + + forboth(lcc, index_path->indexorderbys, lci, index_path->indexorderbycols) + { + Node *clause = (Node *) lfirst(lcc); + int indexcol = lfirst_int(lci); + + clause = fix_indexqual_clause(root, index, indexcol, clause, NIL); + fixed_indexorderbys = lappend(fixed_indexorderbys, clause); + } + + return fixed_indexorderbys; +} + +/* + * fix_indexqual_clause + * Convert a single indexqual clause to the form needed by the executor. + * + * We replace nestloop params here, and replace the index key variables + * or expressions by index Var nodes. + */ +static Node * +fix_indexqual_clause(PlannerInfo *root, IndexOptInfo *index, int indexcol, + Node *clause, List *indexcolnos) +{ + /* + * Replace any outer-relation variables with nestloop params. + * + * This also makes a copy of the clause, so it's safe to modify it + * in-place below. + */ + clause = replace_nestloop_params(root, clause); + + if (IsA(clause, OpExpr)) + { + OpExpr *op = (OpExpr *) clause; + + /* Replace the indexkey expression with an index Var. */ + linitial(op->args) = fix_indexqual_operand(linitial(op->args), + index, + indexcol); + } + else if (IsA(clause, RowCompareExpr)) + { + RowCompareExpr *rc = (RowCompareExpr *) clause; + ListCell *lca, + *lcai; + + /* Replace the indexkey expressions with index Vars. */ + Assert(list_length(rc->largs) == list_length(indexcolnos)); + forboth(lca, rc->largs, lcai, indexcolnos) + { + lfirst(lca) = fix_indexqual_operand(lfirst(lca), + index, + lfirst_int(lcai)); + } + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + + /* Replace the indexkey expression with an index Var. */ + linitial(saop->args) = fix_indexqual_operand(linitial(saop->args), + index, + indexcol); + } + else if (IsA(clause, NullTest)) + { + NullTest *nt = (NullTest *) clause; + + /* Replace the indexkey expression with an index Var. */ + nt->arg = (Expr *) fix_indexqual_operand((Node *) nt->arg, + index, + indexcol); + } + else + elog(ERROR, "unsupported indexqual type: %d", + (int) nodeTag(clause)); + + return clause; +} + +/* + * fix_indexqual_operand + * Convert an indexqual expression to a Var referencing the index column. + * + * We represent index keys by Var nodes having varno == INDEX_VAR and varattno + * equal to the index's attribute number (index column position). + * + * Most of the code here is just for sanity cross-checking that the given + * expression actually matches the index column it's claimed to. + */ +static Node * +fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol) +{ + Var *result; + int pos; + ListCell *indexpr_item; + + /* + * Remove any binary-compatible relabeling of the indexkey + */ + if (IsA(node, RelabelType)) + node = (Node *) ((RelabelType *) node)->arg; + + Assert(indexcol >= 0 && indexcol < index->ncolumns); + + if (index->indexkeys[indexcol] != 0) + { + /* It's a simple index column */ + if (IsA(node, Var) && + ((Var *) node)->varno == index->rel->relid && + ((Var *) node)->varattno == index->indexkeys[indexcol]) + { + result = (Var *) copyObject(node); + result->varno = INDEX_VAR; + result->varattno = indexcol + 1; + return (Node *) result; + } + else + elog(ERROR, "index key does not match expected index column"); + } + + /* It's an index expression, so find and cross-check the expression */ + indexpr_item = list_head(index->indexprs); + for (pos = 0; pos < index->ncolumns; pos++) + { + if (index->indexkeys[pos] == 0) + { + if (indexpr_item == NULL) + elog(ERROR, "too few entries in indexprs list"); + if (pos == indexcol) + { + Node *indexkey; + + indexkey = (Node *) lfirst(indexpr_item); + if (indexkey && IsA(indexkey, RelabelType)) + indexkey = (Node *) ((RelabelType *) indexkey)->arg; + if (equal(node, indexkey)) + { + result = makeVar(INDEX_VAR, indexcol + 1, + exprType(lfirst(indexpr_item)), -1, + exprCollation(lfirst(indexpr_item)), + 0); + return (Node *) result; + } + else + elog(ERROR, "index key does not match expected index column"); + } + indexpr_item = lnext(index->indexprs, indexpr_item); + } + } + + /* Oops... */ + elog(ERROR, "index key does not match expected index column"); + return NULL; /* keep compiler quiet */ +} + +/* + * get_switched_clauses + * Given a list of merge or hash joinclauses (as RestrictInfo nodes), + * extract the bare clauses, and rearrange the elements within the + * clauses, if needed, so the outer join variable is on the left and + * the inner is on the right. The original clause data structure is not + * touched; a modified list is returned. We do, however, set the transient + * outer_is_left field in each RestrictInfo to show which side was which. + */ +static List * +get_switched_clauses(List *clauses, Relids outerrelids) +{ + List *t_list = NIL; + ListCell *l; + + foreach(l, clauses) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + OpExpr *clause = (OpExpr *) restrictinfo->clause; + + Assert(is_opclause(clause)); + if (bms_is_subset(restrictinfo->right_relids, outerrelids)) + { + /* + * Duplicate just enough of the structure to allow commuting the + * clause without changing the original list. Could use + * copyObject, but a complete deep copy is overkill. + */ + OpExpr *temp = makeNode(OpExpr); + + temp->opno = clause->opno; + temp->opfuncid = InvalidOid; + temp->opresulttype = clause->opresulttype; + temp->opretset = clause->opretset; + temp->opcollid = clause->opcollid; + temp->inputcollid = clause->inputcollid; + temp->args = list_copy(clause->args); + temp->location = clause->location; + /* Commute it --- note this modifies the temp node in-place. */ + CommuteOpExpr(temp); + t_list = lappend(t_list, temp); + restrictinfo->outer_is_left = false; + } + else + { + Assert(bms_is_subset(restrictinfo->left_relids, outerrelids)); + t_list = lappend(t_list, clause); + restrictinfo->outer_is_left = true; + } + } + return t_list; +} + +/* + * order_qual_clauses + * Given a list of qual clauses that will all be evaluated at the same + * plan node, sort the list into the order we want to check the quals + * in at runtime. + * + * When security barrier quals are used in the query, we may have quals with + * different security levels in the list. Quals of lower security_level + * must go before quals of higher security_level, except that we can grant + * exceptions to move up quals that are leakproof. When security level + * doesn't force the decision, we prefer to order clauses by estimated + * execution cost, cheapest first. + * + * Ideally the order should be driven by a combination of execution cost and + * selectivity, but it's not immediately clear how to account for both, + * and given the uncertainty of the estimates the reliability of the decisions + * would be doubtful anyway. So we just order by security level then + * estimated per-tuple cost, being careful not to change the order when + * (as is often the case) the estimates are identical. + * + * Although this will work on either bare clauses or RestrictInfos, it's + * much faster to apply it to RestrictInfos, since it can re-use cost + * information that is cached in RestrictInfos. XXX in the bare-clause + * case, we are also not able to apply security considerations. That is + * all right for the moment, because the bare-clause case doesn't occur + * anywhere that barrier quals could be present, but it would be better to + * get rid of it. + * + * Note: some callers pass lists that contain entries that will later be + * removed; this is the easiest way to let this routine see RestrictInfos + * instead of bare clauses. This is another reason why trying to consider + * selectivity in the ordering would likely do the wrong thing. + */ +static List * +order_qual_clauses(PlannerInfo *root, List *clauses) +{ + typedef struct + { + Node *clause; + Cost cost; + Index security_level; + } QualItem; + int nitems = list_length(clauses); + QualItem *items; + ListCell *lc; + int i; + List *result; + + /* No need to work hard for 0 or 1 clause */ + if (nitems <= 1) + return clauses; + + /* + * Collect the items and costs into an array. This is to avoid repeated + * cost_qual_eval work if the inputs aren't RestrictInfos. + */ + items = (QualItem *) palloc(nitems * sizeof(QualItem)); + i = 0; + foreach(lc, clauses) + { + Node *clause = (Node *) lfirst(lc); + QualCost qcost; + + cost_qual_eval_node(&qcost, clause, root); + items[i].clause = clause; + items[i].cost = qcost.per_tuple; + if (IsA(clause, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) clause; + + /* + * If a clause is leakproof, it doesn't have to be constrained by + * its nominal security level. If it's also reasonably cheap + * (here defined as 10X cpu_operator_cost), pretend it has + * security_level 0, which will allow it to go in front of + * more-expensive quals of lower security levels. Of course, that + * will also force it to go in front of cheaper quals of its own + * security level, which is not so great, but we can alleviate + * that risk by applying the cost limit cutoff. + */ + if (rinfo->leakproof && items[i].cost < 10 * cpu_operator_cost) + items[i].security_level = 0; + else + items[i].security_level = rinfo->security_level; + } + else + items[i].security_level = 0; + i++; + } + + /* + * Sort. We don't use qsort() because it's not guaranteed stable for + * equal keys. The expected number of entries is small enough that a + * simple insertion sort should be good enough. + */ + for (i = 1; i < nitems; i++) + { + QualItem newitem = items[i]; + int j; + + /* insert newitem into the already-sorted subarray */ + for (j = i; j > 0; j--) + { + QualItem *olditem = &items[j - 1]; + + if (newitem.security_level > olditem->security_level || + (newitem.security_level == olditem->security_level && + newitem.cost >= olditem->cost)) + break; + items[j] = *olditem; + } + items[j] = newitem; + } + + /* Convert back to a list */ + result = NIL; + for (i = 0; i < nitems; i++) + result = lappend(result, items[i].clause); + + return result; +} + +/* + * Copy cost and size info from a Path node to the Plan node created from it. + * The executor usually won't use this info, but it's needed by EXPLAIN. + * Also copy the parallel-related flags, which the executor *will* use. + */ +static void +copy_generic_path_info(Plan *dest, Path *src) +{ + dest->startup_cost = src->startup_cost; + dest->total_cost = src->total_cost; + dest->plan_rows = src->rows; + dest->plan_width = src->pathtarget->width; + dest->parallel_aware = src->parallel_aware; + dest->parallel_safe = src->parallel_safe; +} + +/* + * Copy cost and size info from a lower plan node to an inserted node. + * (Most callers alter the info after copying it.) + */ +static void +copy_plan_costsize(Plan *dest, Plan *src) +{ + dest->startup_cost = src->startup_cost; + dest->total_cost = src->total_cost; + dest->plan_rows = src->plan_rows; + dest->plan_width = src->plan_width; + /* Assume the inserted node is not parallel-aware. */ + dest->parallel_aware = false; + /* Assume the inserted node is parallel-safe, if child plan is. */ + dest->parallel_safe = src->parallel_safe; +} + +/* + * Some places in this file build Sort nodes that don't have a directly + * corresponding Path node. The cost of the sort is, or should have been, + * included in the cost of the Path node we're working from, but since it's + * not split out, we have to re-figure it using cost_sort(). This is just + * to label the Sort node nicely for EXPLAIN. + * + * limit_tuples is as for cost_sort (in particular, pass -1 if no limit) + */ +static void +label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples) +{ + Plan *lefttree = plan->plan.lefttree; + Path sort_path; /* dummy for result of cost_sort */ + + /* + * This function shouldn't have to deal with IncrementalSort plans because + * they are only created from corresponding Path nodes. + */ + Assert(IsA(plan, Sort)); + + cost_sort(&sort_path, root, NIL, + lefttree->total_cost, + lefttree->plan_rows, + lefttree->plan_width, + 0.0, + work_mem, + limit_tuples); + plan->plan.startup_cost = sort_path.startup_cost; + plan->plan.total_cost = sort_path.total_cost; + plan->plan.plan_rows = lefttree->plan_rows; + plan->plan.plan_width = lefttree->plan_width; + plan->plan.parallel_aware = false; + plan->plan.parallel_safe = lefttree->parallel_safe; +} + +/* + * bitmap_subplan_mark_shared + * Set isshared flag in bitmap subplan so that it will be created in + * shared memory. + */ +static void +bitmap_subplan_mark_shared(Plan *plan) +{ + if (IsA(plan, BitmapAnd)) + bitmap_subplan_mark_shared(linitial(((BitmapAnd *) plan)->bitmapplans)); + else if (IsA(plan, BitmapOr)) + { + ((BitmapOr *) plan)->isshared = true; + bitmap_subplan_mark_shared(linitial(((BitmapOr *) plan)->bitmapplans)); + } + else if (IsA(plan, BitmapIndexScan)) + ((BitmapIndexScan *) plan)->isshared = true; + else + elog(ERROR, "unrecognized node type: %d", nodeTag(plan)); +} + +/***************************************************************************** + * + * PLAN NODE BUILDING ROUTINES + * + * In general, these functions are not passed the original Path and therefore + * leave it to the caller to fill in the cost/width fields from the Path, + * typically by calling copy_generic_path_info(). This convention is + * somewhat historical, but it does support a few places above where we build + * a plan node without having an exactly corresponding Path node. Under no + * circumstances should one of these functions do its own cost calculations, + * as that would be redundant with calculations done while building Paths. + * + *****************************************************************************/ + +static SeqScan * +make_seqscan(List *qptlist, + List *qpqual, + Index scanrelid) +{ + SeqScan *node = makeNode(SeqScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + + return node; +} + +static SampleScan * +make_samplescan(List *qptlist, + List *qpqual, + Index scanrelid, + TableSampleClause *tsc) +{ + SampleScan *node = makeNode(SampleScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->tablesample = tsc; + + return node; +} + +static IndexScan * +make_indexscan(List *qptlist, + List *qpqual, + Index scanrelid, + Oid indexid, + List *indexqual, + List *indexqualorig, + List *indexorderby, + List *indexorderbyorig, + List *indexorderbyops, + ScanDirection indexscandir) +{ + IndexScan *node = makeNode(IndexScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->indexid = indexid; + node->indexqual = indexqual; + node->indexqualorig = indexqualorig; + node->indexorderby = indexorderby; + node->indexorderbyorig = indexorderbyorig; + node->indexorderbyops = indexorderbyops; + node->indexorderdir = indexscandir; + + return node; +} + +static IndexOnlyScan * +make_indexonlyscan(List *qptlist, + List *qpqual, + Index scanrelid, + Oid indexid, + List *indexqual, + List *recheckqual, + List *indexorderby, + List *indextlist, + ScanDirection indexscandir) +{ + IndexOnlyScan *node = makeNode(IndexOnlyScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->indexid = indexid; + node->indexqual = indexqual; + node->recheckqual = recheckqual; + node->indexorderby = indexorderby; + node->indextlist = indextlist; + node->indexorderdir = indexscandir; + + return node; +} + +static BitmapIndexScan * +make_bitmap_indexscan(Index scanrelid, + Oid indexid, + List *indexqual, + List *indexqualorig) +{ + BitmapIndexScan *node = makeNode(BitmapIndexScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = NIL; /* not used */ + plan->qual = NIL; /* not used */ + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->indexid = indexid; + node->indexqual = indexqual; + node->indexqualorig = indexqualorig; + + return node; +} + +static BitmapHeapScan * +make_bitmap_heapscan(List *qptlist, + List *qpqual, + Plan *lefttree, + List *bitmapqualorig, + Index scanrelid) +{ + BitmapHeapScan *node = makeNode(BitmapHeapScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = lefttree; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->bitmapqualorig = bitmapqualorig; + + return node; +} + +static TidScan * +make_tidscan(List *qptlist, + List *qpqual, + Index scanrelid, + List *tidquals) +{ + TidScan *node = makeNode(TidScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->tidquals = tidquals; + + return node; +} + +static TidRangeScan * +make_tidrangescan(List *qptlist, + List *qpqual, + Index scanrelid, + List *tidrangequals) +{ + TidRangeScan *node = makeNode(TidRangeScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->tidrangequals = tidrangequals; + + return node; +} + +static SubqueryScan * +make_subqueryscan(List *qptlist, + List *qpqual, + Index scanrelid, + Plan *subplan) +{ + SubqueryScan *node = makeNode(SubqueryScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->subplan = subplan; + node->scanstatus = SUBQUERY_SCAN_UNKNOWN; + + return node; +} + +static FunctionScan * +make_functionscan(List *qptlist, + List *qpqual, + Index scanrelid, + List *functions, + bool funcordinality) +{ + FunctionScan *node = makeNode(FunctionScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->functions = functions; + node->funcordinality = funcordinality; + + return node; +} + +static TableFuncScan * +make_tablefuncscan(List *qptlist, + List *qpqual, + Index scanrelid, + TableFunc *tablefunc) +{ + TableFuncScan *node = makeNode(TableFuncScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->tablefunc = tablefunc; + + return node; +} + +static ValuesScan * +make_valuesscan(List *qptlist, + List *qpqual, + Index scanrelid, + List *values_lists) +{ + ValuesScan *node = makeNode(ValuesScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->values_lists = values_lists; + + return node; +} + +static CteScan * +make_ctescan(List *qptlist, + List *qpqual, + Index scanrelid, + int ctePlanId, + int cteParam) +{ + CteScan *node = makeNode(CteScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->ctePlanId = ctePlanId; + node->cteParam = cteParam; + + return node; +} + +static NamedTuplestoreScan * +make_namedtuplestorescan(List *qptlist, + List *qpqual, + Index scanrelid, + char *enrname) +{ + NamedTuplestoreScan *node = makeNode(NamedTuplestoreScan); + Plan *plan = &node->scan.plan; + + /* cost should be inserted by caller */ + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->enrname = enrname; + + return node; +} + +static WorkTableScan * +make_worktablescan(List *qptlist, + List *qpqual, + Index scanrelid, + int wtParam) +{ + WorkTableScan *node = makeNode(WorkTableScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->wtParam = wtParam; + + return node; +} + +ForeignScan * +make_foreignscan(List *qptlist, + List *qpqual, + Index scanrelid, + List *fdw_exprs, + List *fdw_private, + List *fdw_scan_tlist, + List *fdw_recheck_quals, + Plan *outer_plan) +{ + ForeignScan *node = makeNode(ForeignScan); + Plan *plan = &node->scan.plan; + + /* cost will be filled in by create_foreignscan_plan */ + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = outer_plan; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + + /* these may be overridden by the FDW's PlanDirectModify callback. */ + node->operation = CMD_SELECT; + node->resultRelation = 0; + + /* fs_server will be filled in by create_foreignscan_plan */ + node->fs_server = InvalidOid; + node->fdw_exprs = fdw_exprs; + node->fdw_private = fdw_private; + node->fdw_scan_tlist = fdw_scan_tlist; + node->fdw_recheck_quals = fdw_recheck_quals; + /* fs_relids will be filled in by create_foreignscan_plan */ + node->fs_relids = NULL; + /* fsSystemCol will be filled in by create_foreignscan_plan */ + node->fsSystemCol = false; + + return node; +} + +static RecursiveUnion * +make_recursive_union(List *tlist, + Plan *lefttree, + Plan *righttree, + int wtParam, + List *distinctList, + long numGroups) +{ + RecursiveUnion *node = makeNode(RecursiveUnion); + Plan *plan = &node->plan; + int numCols = list_length(distinctList); + + plan->targetlist = tlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = righttree; + node->wtParam = wtParam; + + /* + * convert SortGroupClause list into arrays of attr indexes and equality + * operators, as wanted by executor + */ + node->numCols = numCols; + if (numCols > 0) + { + int keyno = 0; + AttrNumber *dupColIdx; + Oid *dupOperators; + Oid *dupCollations; + ListCell *slitem; + + dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(slitem, distinctList) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, + plan->targetlist); + + dupColIdx[keyno] = tle->resno; + dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); + Assert(OidIsValid(dupOperators[keyno])); + keyno++; + } + node->dupColIdx = dupColIdx; + node->dupOperators = dupOperators; + node->dupCollations = dupCollations; + } + node->numGroups = numGroups; + + return node; +} + +static BitmapAnd * +make_bitmap_and(List *bitmapplans) +{ + BitmapAnd *node = makeNode(BitmapAnd); + Plan *plan = &node->plan; + + plan->targetlist = NIL; + plan->qual = NIL; + plan->lefttree = NULL; + plan->righttree = NULL; + node->bitmapplans = bitmapplans; + + return node; +} + +static BitmapOr * +make_bitmap_or(List *bitmapplans) +{ + BitmapOr *node = makeNode(BitmapOr); + Plan *plan = &node->plan; + + plan->targetlist = NIL; + plan->qual = NIL; + plan->lefttree = NULL; + plan->righttree = NULL; + node->bitmapplans = bitmapplans; + + return node; +} + +static NestLoop * +make_nestloop(List *tlist, + List *joinclauses, + List *otherclauses, + List *nestParams, + Plan *lefttree, + Plan *righttree, + JoinType jointype, + bool inner_unique) +{ + NestLoop *node = makeNode(NestLoop); + Plan *plan = &node->join.plan; + + plan->targetlist = tlist; + plan->qual = otherclauses; + plan->lefttree = lefttree; + plan->righttree = righttree; + node->join.jointype = jointype; + node->join.inner_unique = inner_unique; + node->join.joinqual = joinclauses; + node->nestParams = nestParams; + + return node; +} + +static HashJoin * +make_hashjoin(List *tlist, + List *joinclauses, + List *otherclauses, + List *hashclauses, + List *hashoperators, + List *hashcollations, + List *hashkeys, + Plan *lefttree, + Plan *righttree, + JoinType jointype, + bool inner_unique) +{ + HashJoin *node = makeNode(HashJoin); + Plan *plan = &node->join.plan; + + plan->targetlist = tlist; + plan->qual = otherclauses; + plan->lefttree = lefttree; + plan->righttree = righttree; + node->hashclauses = hashclauses; + node->hashoperators = hashoperators; + node->hashcollations = hashcollations; + node->hashkeys = hashkeys; + node->join.jointype = jointype; + node->join.inner_unique = inner_unique; + node->join.joinqual = joinclauses; + + return node; +} + +static Hash * +make_hash(Plan *lefttree, + List *hashkeys, + Oid skewTable, + AttrNumber skewColumn, + bool skewInherit) +{ + Hash *node = makeNode(Hash); + Plan *plan = &node->plan; + + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + + node->hashkeys = hashkeys; + node->skewTable = skewTable; + node->skewColumn = skewColumn; + node->skewInherit = skewInherit; + + return node; +} + +static MergeJoin * +make_mergejoin(List *tlist, + List *joinclauses, + List *otherclauses, + List *mergeclauses, + Oid *mergefamilies, + Oid *mergecollations, + int *mergestrategies, + bool *mergenullsfirst, + Plan *lefttree, + Plan *righttree, + JoinType jointype, + bool inner_unique, + bool skip_mark_restore) +{ + MergeJoin *node = makeNode(MergeJoin); + Plan *plan = &node->join.plan; + + plan->targetlist = tlist; + plan->qual = otherclauses; + plan->lefttree = lefttree; + plan->righttree = righttree; + node->skip_mark_restore = skip_mark_restore; + node->mergeclauses = mergeclauses; + node->mergeFamilies = mergefamilies; + node->mergeCollations = mergecollations; + node->mergeStrategies = mergestrategies; + node->mergeNullsFirst = mergenullsfirst; + node->join.jointype = jointype; + node->join.inner_unique = inner_unique; + node->join.joinqual = joinclauses; + + return node; +} + +/* + * make_sort --- basic routine to build a Sort plan node + * + * Caller must have built the sortColIdx, sortOperators, collations, and + * nullsFirst arrays already. + */ +static Sort * +make_sort(Plan *lefttree, int numCols, + AttrNumber *sortColIdx, Oid *sortOperators, + Oid *collations, bool *nullsFirst) +{ + Sort *node; + Plan *plan; + + node = makeNode(Sort); + + plan = &node->plan; + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + node->numCols = numCols; + node->sortColIdx = sortColIdx; + node->sortOperators = sortOperators; + node->collations = collations; + node->nullsFirst = nullsFirst; + + return node; +} + +/* + * make_incrementalsort --- basic routine to build an IncrementalSort plan node + * + * Caller must have built the sortColIdx, sortOperators, collations, and + * nullsFirst arrays already. + */ +static IncrementalSort * +make_incrementalsort(Plan *lefttree, int numCols, int nPresortedCols, + AttrNumber *sortColIdx, Oid *sortOperators, + Oid *collations, bool *nullsFirst) +{ + IncrementalSort *node; + Plan *plan; + + node = makeNode(IncrementalSort); + + plan = &node->sort.plan; + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + node->nPresortedCols = nPresortedCols; + node->sort.numCols = numCols; + node->sort.sortColIdx = sortColIdx; + node->sort.sortOperators = sortOperators; + node->sort.collations = collations; + node->sort.nullsFirst = nullsFirst; + + return node; +} + +/* + * prepare_sort_from_pathkeys + * Prepare to sort according to given pathkeys + * + * This is used to set up for Sort, MergeAppend, and Gather Merge nodes. It + * calculates the executor's representation of the sort key information, and + * adjusts the plan targetlist if needed to add resjunk sort columns. + * + * Input parameters: + * 'lefttree' is the plan node which yields input tuples + * 'pathkeys' is the list of pathkeys by which the result is to be sorted + * 'relids' identifies the child relation being sorted, if any + * 'reqColIdx' is NULL or an array of required sort key column numbers + * 'adjust_tlist_in_place' is true if lefttree must be modified in-place + * + * We must convert the pathkey information into arrays of sort key column + * numbers, sort operator OIDs, collation OIDs, and nulls-first flags, + * which is the representation the executor wants. These are returned into + * the output parameters *p_numsortkeys etc. + * + * When looking for matches to an EquivalenceClass's members, we will only + * consider child EC members if they belong to given 'relids'. This protects + * against possible incorrect matches to child expressions that contain no + * Vars. + * + * If reqColIdx isn't NULL then it contains sort key column numbers that + * we should match. This is used when making child plans for a MergeAppend; + * it's an error if we can't match the columns. + * + * If the pathkeys include expressions that aren't simple Vars, we will + * usually need to add resjunk items to the input plan's targetlist to + * compute these expressions, since a Sort or MergeAppend node itself won't + * do any such calculations. If the input plan type isn't one that can do + * projections, this means adding a Result node just to do the projection. + * However, the caller can pass adjust_tlist_in_place = true to force the + * lefttree tlist to be modified in-place regardless of whether the node type + * can project --- we use this for fixing the tlist of MergeAppend itself. + * + * Returns the node which is to be the input to the Sort (either lefttree, + * or a Result stacked atop lefttree). + */ +static Plan * +prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, + Relids relids, + const AttrNumber *reqColIdx, + bool adjust_tlist_in_place, + int *p_numsortkeys, + AttrNumber **p_sortColIdx, + Oid **p_sortOperators, + Oid **p_collations, + bool **p_nullsFirst) +{ + List *tlist = lefttree->targetlist; + ListCell *i; + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *collations; + bool *nullsFirst; + + /* + * We will need at most list_length(pathkeys) sort columns; possibly less + */ + numsortkeys = list_length(pathkeys); + sortColIdx = (AttrNumber *) palloc(numsortkeys * sizeof(AttrNumber)); + sortOperators = (Oid *) palloc(numsortkeys * sizeof(Oid)); + collations = (Oid *) palloc(numsortkeys * sizeof(Oid)); + nullsFirst = (bool *) palloc(numsortkeys * sizeof(bool)); + + numsortkeys = 0; + + foreach(i, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(i); + EquivalenceClass *ec = pathkey->pk_eclass; + EquivalenceMember *em; + TargetEntry *tle = NULL; + Oid pk_datatype = InvalidOid; + Oid sortop; + ListCell *j; + + if (ec->ec_has_volatile) + { + /* + * If the pathkey's EquivalenceClass is volatile, then it must + * have come from an ORDER BY clause, and we have to match it to + * that same targetlist entry. + */ + if (ec->ec_sortref == 0) /* can't happen */ + elog(ERROR, "volatile EquivalenceClass has no sortref"); + tle = get_sortgroupref_tle(ec->ec_sortref, tlist); + Assert(tle); + Assert(list_length(ec->ec_members) == 1); + pk_datatype = ((EquivalenceMember *) linitial(ec->ec_members))->em_datatype; + } + else if (reqColIdx != NULL) + { + /* + * If we are given a sort column number to match, only consider + * the single TLE at that position. It's possible that there is + * no such TLE, in which case fall through and generate a resjunk + * targetentry (we assume this must have happened in the parent + * plan as well). If there is a TLE but it doesn't match the + * pathkey's EC, we do the same, which is probably the wrong thing + * but we'll leave it to caller to complain about the mismatch. + */ + tle = get_tle_by_resno(tlist, reqColIdx[numsortkeys]); + if (tle) + { + em = find_ec_member_matching_expr(ec, tle->expr, relids); + if (em) + { + /* found expr at right place in tlist */ + pk_datatype = em->em_datatype; + } + else + tle = NULL; + } + } + else + { + /* + * Otherwise, we can sort by any non-constant expression listed in + * the pathkey's EquivalenceClass. For now, we take the first + * tlist item found in the EC. If there's no match, we'll generate + * a resjunk entry using the first EC member that is an expression + * in the input's vars. (The non-const restriction only matters + * if the EC is below_outer_join; but if it isn't, it won't + * contain consts anyway, else we'd have discarded the pathkey as + * redundant.) + * + * XXX if we have a choice, is there any way of figuring out which + * might be cheapest to execute? (For example, int4lt is likely + * much cheaper to execute than numericlt, but both might appear + * in the same equivalence class...) Not clear that we ever will + * have an interesting choice in practice, so it may not matter. + */ + foreach(j, tlist) + { + tle = (TargetEntry *) lfirst(j); + em = find_ec_member_matching_expr(ec, tle->expr, relids); + if (em) + { + /* found expr already in tlist */ + pk_datatype = em->em_datatype; + break; + } + tle = NULL; + } + } + + if (!tle) + { + /* + * No matching tlist item; look for a computable expression. + */ + em = find_computable_ec_member(NULL, ec, tlist, relids, false); + if (!em) + elog(ERROR, "could not find pathkey item to sort"); + pk_datatype = em->em_datatype; + + /* + * Do we need to insert a Result node? + */ + if (!adjust_tlist_in_place && + !is_projection_capable_plan(lefttree)) + { + /* copy needed so we don't modify input's tlist below */ + tlist = copyObject(tlist); + lefttree = inject_projection_plan(lefttree, tlist, + lefttree->parallel_safe); + } + + /* Don't bother testing is_projection_capable_plan again */ + adjust_tlist_in_place = true; + + /* + * Add resjunk entry to input's tlist + */ + tle = makeTargetEntry(copyObject(em->em_expr), + list_length(tlist) + 1, + NULL, + true); + tlist = lappend(tlist, tle); + lefttree->targetlist = tlist; /* just in case NIL before */ + } + + /* + * Look up the correct sort operator from the PathKey's slightly + * abstracted representation. + */ + sortop = get_opfamily_member(pathkey->pk_opfamily, + pk_datatype, + pk_datatype, + pathkey->pk_strategy); + if (!OidIsValid(sortop)) /* should not happen */ + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + pathkey->pk_strategy, pk_datatype, pk_datatype, + pathkey->pk_opfamily); + + /* Add the column to the sort arrays */ + sortColIdx[numsortkeys] = tle->resno; + sortOperators[numsortkeys] = sortop; + collations[numsortkeys] = ec->ec_collation; + nullsFirst[numsortkeys] = pathkey->pk_nulls_first; + numsortkeys++; + } + + /* Return results */ + *p_numsortkeys = numsortkeys; + *p_sortColIdx = sortColIdx; + *p_sortOperators = sortOperators; + *p_collations = collations; + *p_nullsFirst = nullsFirst; + + return lefttree; +} + +/* + * make_sort_from_pathkeys + * Create sort plan to sort according to given pathkeys + * + * 'lefttree' is the node which yields input tuples + * 'pathkeys' is the list of pathkeys by which the result is to be sorted + * 'relids' is the set of relations required by prepare_sort_from_pathkeys() + */ +static Sort * +make_sort_from_pathkeys(Plan *lefttree, List *pathkeys, Relids relids) +{ + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *collations; + bool *nullsFirst; + + /* Compute sort column info, and adjust lefttree as needed */ + lefttree = prepare_sort_from_pathkeys(lefttree, pathkeys, + relids, + NULL, + false, + &numsortkeys, + &sortColIdx, + &sortOperators, + &collations, + &nullsFirst); + + /* Now build the Sort node */ + return make_sort(lefttree, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); +} + +/* + * make_incrementalsort_from_pathkeys + * Create sort plan to sort according to given pathkeys + * + * 'lefttree' is the node which yields input tuples + * 'pathkeys' is the list of pathkeys by which the result is to be sorted + * 'relids' is the set of relations required by prepare_sort_from_pathkeys() + * 'nPresortedCols' is the number of presorted columns in input tuples + */ +static IncrementalSort * +make_incrementalsort_from_pathkeys(Plan *lefttree, List *pathkeys, + Relids relids, int nPresortedCols) +{ + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *collations; + bool *nullsFirst; + + /* Compute sort column info, and adjust lefttree as needed */ + lefttree = prepare_sort_from_pathkeys(lefttree, pathkeys, + relids, + NULL, + false, + &numsortkeys, + &sortColIdx, + &sortOperators, + &collations, + &nullsFirst); + + /* Now build the Sort node */ + return make_incrementalsort(lefttree, numsortkeys, nPresortedCols, + sortColIdx, sortOperators, + collations, nullsFirst); +} + +/* + * make_sort_from_sortclauses + * Create sort plan to sort according to given sortclauses + * + * 'sortcls' is a list of SortGroupClauses + * 'lefttree' is the node which yields input tuples + */ +Sort * +make_sort_from_sortclauses(List *sortcls, Plan *lefttree) +{ + List *sub_tlist = lefttree->targetlist; + ListCell *l; + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *collations; + bool *nullsFirst; + + /* Convert list-ish representation to arrays wanted by executor */ + numsortkeys = list_length(sortcls); + sortColIdx = (AttrNumber *) palloc(numsortkeys * sizeof(AttrNumber)); + sortOperators = (Oid *) palloc(numsortkeys * sizeof(Oid)); + collations = (Oid *) palloc(numsortkeys * sizeof(Oid)); + nullsFirst = (bool *) palloc(numsortkeys * sizeof(bool)); + + numsortkeys = 0; + foreach(l, sortcls) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, sub_tlist); + + sortColIdx[numsortkeys] = tle->resno; + sortOperators[numsortkeys] = sortcl->sortop; + collations[numsortkeys] = exprCollation((Node *) tle->expr); + nullsFirst[numsortkeys] = sortcl->nulls_first; + numsortkeys++; + } + + return make_sort(lefttree, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); +} + +/* + * make_sort_from_groupcols + * Create sort plan to sort based on grouping columns + * + * 'groupcls' is the list of SortGroupClauses + * 'grpColIdx' gives the column numbers to use + * + * This might look like it could be merged with make_sort_from_sortclauses, + * but presently we *must* use the grpColIdx[] array to locate sort columns, + * because the child plan's tlist is not marked with ressortgroupref info + * appropriate to the grouping node. So, only the sort ordering info + * is used from the SortGroupClause entries. + */ +static Sort * +make_sort_from_groupcols(List *groupcls, + AttrNumber *grpColIdx, + Plan *lefttree) +{ + List *sub_tlist = lefttree->targetlist; + ListCell *l; + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *collations; + bool *nullsFirst; + + /* Convert list-ish representation to arrays wanted by executor */ + numsortkeys = list_length(groupcls); + sortColIdx = (AttrNumber *) palloc(numsortkeys * sizeof(AttrNumber)); + sortOperators = (Oid *) palloc(numsortkeys * sizeof(Oid)); + collations = (Oid *) palloc(numsortkeys * sizeof(Oid)); + nullsFirst = (bool *) palloc(numsortkeys * sizeof(bool)); + + numsortkeys = 0; + foreach(l, groupcls) + { + SortGroupClause *grpcl = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_tle_by_resno(sub_tlist, grpColIdx[numsortkeys]); + + if (!tle) + elog(ERROR, "could not retrieve tle for sort-from-groupcols"); + + sortColIdx[numsortkeys] = tle->resno; + sortOperators[numsortkeys] = grpcl->sortop; + collations[numsortkeys] = exprCollation((Node *) tle->expr); + nullsFirst[numsortkeys] = grpcl->nulls_first; + numsortkeys++; + } + + return make_sort(lefttree, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); +} + +static Material * +make_material(Plan *lefttree) +{ + Material *node = makeNode(Material); + Plan *plan = &node->plan; + + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + + return node; +} + +/* + * materialize_finished_plan: stick a Material node atop a completed plan + * + * There are a couple of places where we want to attach a Material node + * after completion of create_plan(), without any MaterialPath path. + * Those places should probably be refactored someday to do this on the + * Path representation, but it's not worth the trouble yet. + */ +Plan * +materialize_finished_plan(Plan *subplan) +{ + Plan *matplan; + Path matpath; /* dummy for result of cost_material */ + + matplan = (Plan *) make_material(subplan); + + /* + * XXX horrid kluge: if there are any initPlans attached to the subplan, + * move them up to the Material node, which is now effectively the top + * plan node in its query level. This prevents failure in + * SS_finalize_plan(), which see for comments. We don't bother adjusting + * the subplan's cost estimate for this. + */ + matplan->initPlan = subplan->initPlan; + subplan->initPlan = NIL; + + /* Set cost data */ + cost_material(&matpath, + subplan->startup_cost, + subplan->total_cost, + subplan->plan_rows, + subplan->plan_width); + matplan->startup_cost = matpath.startup_cost; + matplan->total_cost = matpath.total_cost; + matplan->plan_rows = subplan->plan_rows; + matplan->plan_width = subplan->plan_width; + matplan->parallel_aware = false; + matplan->parallel_safe = subplan->parallel_safe; + + return matplan; +} + +static Memoize * +make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, + List *param_exprs, bool singlerow, bool binary_mode, + uint32 est_entries, Bitmapset *keyparamids) +{ + Memoize *node = makeNode(Memoize); + Plan *plan = &node->plan; + + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + + node->numKeys = list_length(param_exprs); + node->hashOperators = hashoperators; + node->collations = collations; + node->param_exprs = param_exprs; + node->singlerow = singlerow; + node->binary_mode = binary_mode; + node->est_entries = est_entries; + node->keyparamids = keyparamids; + + return node; +} + +Agg * +make_agg(List *tlist, List *qual, + AggStrategy aggstrategy, AggSplit aggsplit, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, + List *groupingSets, List *chain, double dNumGroups, + Size transitionSpace, Plan *lefttree) +{ + Agg *node = makeNode(Agg); + Plan *plan = &node->plan; + long numGroups; + + /* Reduce to long, but 'ware overflow! */ + numGroups = clamp_cardinality_to_long(dNumGroups); + + node->aggstrategy = aggstrategy; + node->aggsplit = aggsplit; + node->numCols = numGroupCols; + node->grpColIdx = grpColIdx; + node->grpOperators = grpOperators; + node->grpCollations = grpCollations; + node->numGroups = numGroups; + node->transitionSpace = transitionSpace; + node->aggParams = NULL; /* SS_finalize_plan() will fill this */ + node->groupingSets = groupingSets; + node->chain = chain; + + plan->qual = qual; + plan->targetlist = tlist; + plan->lefttree = lefttree; + plan->righttree = NULL; + + return node; +} + +static WindowAgg * +make_windowagg(List *tlist, Index winref, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, + int frameOptions, Node *startOffset, Node *endOffset, + Oid startInRangeFunc, Oid endInRangeFunc, + Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, + List *runCondition, List *qual, bool topWindow, Plan *lefttree) +{ + WindowAgg *node = makeNode(WindowAgg); + Plan *plan = &node->plan; + + node->winref = winref; + node->partNumCols = partNumCols; + node->partColIdx = partColIdx; + node->partOperators = partOperators; + node->partCollations = partCollations; + node->ordNumCols = ordNumCols; + node->ordColIdx = ordColIdx; + node->ordOperators = ordOperators; + node->ordCollations = ordCollations; + node->frameOptions = frameOptions; + node->startOffset = startOffset; + node->endOffset = endOffset; + node->runCondition = runCondition; + /* a duplicate of the above for EXPLAIN */ + node->runConditionOrig = runCondition; + node->startInRangeFunc = startInRangeFunc; + node->endInRangeFunc = endInRangeFunc; + node->inRangeColl = inRangeColl; + node->inRangeAsc = inRangeAsc; + node->inRangeNullsFirst = inRangeNullsFirst; + node->topWindow = topWindow; + + plan->targetlist = tlist; + plan->lefttree = lefttree; + plan->righttree = NULL; + plan->qual = qual; + + return node; +} + +static Group * +make_group(List *tlist, + List *qual, + int numGroupCols, + AttrNumber *grpColIdx, + Oid *grpOperators, + Oid *grpCollations, + Plan *lefttree) +{ + Group *node = makeNode(Group); + Plan *plan = &node->plan; + + node->numCols = numGroupCols; + node->grpColIdx = grpColIdx; + node->grpOperators = grpOperators; + node->grpCollations = grpCollations; + + plan->qual = qual; + plan->targetlist = tlist; + plan->lefttree = lefttree; + plan->righttree = NULL; + + return node; +} + +/* + * distinctList is a list of SortGroupClauses, identifying the targetlist items + * that should be considered by the Unique filter. The input path must + * already be sorted accordingly. + */ +static Unique * +make_unique_from_sortclauses(Plan *lefttree, List *distinctList) +{ + Unique *node = makeNode(Unique); + Plan *plan = &node->plan; + int numCols = list_length(distinctList); + int keyno = 0; + AttrNumber *uniqColIdx; + Oid *uniqOperators; + Oid *uniqCollations; + ListCell *slitem; + + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + + /* + * convert SortGroupClause list into arrays of attr indexes and equality + * operators, as wanted by executor + */ + Assert(numCols > 0); + uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(slitem, distinctList) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); + + uniqColIdx[keyno] = tle->resno; + uniqOperators[keyno] = sortcl->eqop; + uniqCollations[keyno] = exprCollation((Node *) tle->expr); + Assert(OidIsValid(uniqOperators[keyno])); + keyno++; + } + + node->numCols = numCols; + node->uniqColIdx = uniqColIdx; + node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; + + return node; +} + +/* + * as above, but use pathkeys to identify the sort columns and semantics + */ +static Unique * +make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) +{ + Unique *node = makeNode(Unique); + Plan *plan = &node->plan; + int keyno = 0; + AttrNumber *uniqColIdx; + Oid *uniqOperators; + Oid *uniqCollations; + ListCell *lc; + + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + + /* + * Convert pathkeys list into arrays of attr indexes and equality + * operators, as wanted by executor. This has a lot in common with + * prepare_sort_from_pathkeys ... maybe unify sometime? + */ + Assert(numCols >= 0 && numCols <= list_length(pathkeys)); + uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(lc, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *ec = pathkey->pk_eclass; + EquivalenceMember *em; + TargetEntry *tle = NULL; + Oid pk_datatype = InvalidOid; + Oid eqop; + ListCell *j; + + /* Ignore pathkeys beyond the specified number of columns */ + if (keyno >= numCols) + break; + + if (ec->ec_has_volatile) + { + /* + * If the pathkey's EquivalenceClass is volatile, then it must + * have come from an ORDER BY clause, and we have to match it to + * that same targetlist entry. + */ + if (ec->ec_sortref == 0) /* can't happen */ + elog(ERROR, "volatile EquivalenceClass has no sortref"); + tle = get_sortgroupref_tle(ec->ec_sortref, plan->targetlist); + Assert(tle); + Assert(list_length(ec->ec_members) == 1); + pk_datatype = ((EquivalenceMember *) linitial(ec->ec_members))->em_datatype; + } + else + { + /* + * Otherwise, we can use any non-constant expression listed in the + * pathkey's EquivalenceClass. For now, we take the first tlist + * item found in the EC. + */ + foreach(j, plan->targetlist) + { + tle = (TargetEntry *) lfirst(j); + em = find_ec_member_matching_expr(ec, tle->expr, NULL); + if (em) + { + /* found expr already in tlist */ + pk_datatype = em->em_datatype; + break; + } + tle = NULL; + } + } + + if (!tle) + elog(ERROR, "could not find pathkey item to sort"); + + /* + * Look up the correct equality operator from the PathKey's slightly + * abstracted representation. + */ + eqop = get_opfamily_member(pathkey->pk_opfamily, + pk_datatype, + pk_datatype, + BTEqualStrategyNumber); + if (!OidIsValid(eqop)) /* should not happen */ + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + BTEqualStrategyNumber, pk_datatype, pk_datatype, + pathkey->pk_opfamily); + + uniqColIdx[keyno] = tle->resno; + uniqOperators[keyno] = eqop; + uniqCollations[keyno] = ec->ec_collation; + + keyno++; + } + + node->numCols = numCols; + node->uniqColIdx = uniqColIdx; + node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; + + return node; +} + +static Gather * +make_gather(List *qptlist, + List *qpqual, + int nworkers, + int rescan_param, + bool single_copy, + Plan *subplan) +{ + Gather *node = makeNode(Gather); + Plan *plan = &node->plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = subplan; + plan->righttree = NULL; + node->num_workers = nworkers; + node->rescan_param = rescan_param; + node->single_copy = single_copy; + node->invisible = false; + node->initParam = NULL; + + return node; +} + +/* + * distinctList is a list of SortGroupClauses, identifying the targetlist + * items that should be considered by the SetOp filter. The input path must + * already be sorted accordingly. + */ +static SetOp * +make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, + List *distinctList, AttrNumber flagColIdx, int firstFlag, + long numGroups) +{ + SetOp *node = makeNode(SetOp); + Plan *plan = &node->plan; + int numCols = list_length(distinctList); + int keyno = 0; + AttrNumber *dupColIdx; + Oid *dupOperators; + Oid *dupCollations; + ListCell *slitem; + + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + + /* + * convert SortGroupClause list into arrays of attr indexes and equality + * operators, as wanted by executor + */ + dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(slitem, distinctList) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); + + dupColIdx[keyno] = tle->resno; + dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); + Assert(OidIsValid(dupOperators[keyno])); + keyno++; + } + + node->cmd = cmd; + node->strategy = strategy; + node->numCols = numCols; + node->dupColIdx = dupColIdx; + node->dupOperators = dupOperators; + node->dupCollations = dupCollations; + node->flagColIdx = flagColIdx; + node->firstFlag = firstFlag; + node->numGroups = numGroups; + + return node; +} + +/* + * make_lockrows + * Build a LockRows plan node + */ +static LockRows * +make_lockrows(Plan *lefttree, List *rowMarks, int epqParam) +{ + LockRows *node = makeNode(LockRows); + Plan *plan = &node->plan; + + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + + node->rowMarks = rowMarks; + node->epqParam = epqParam; + + return node; +} + +/* + * make_limit + * Build a Limit plan node + */ +Limit * +make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount, + LimitOption limitOption, int uniqNumCols, AttrNumber *uniqColIdx, + Oid *uniqOperators, Oid *uniqCollations) +{ + Limit *node = makeNode(Limit); + Plan *plan = &node->plan; + + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + + node->limitOffset = limitOffset; + node->limitCount = limitCount; + node->limitOption = limitOption; + node->uniqNumCols = uniqNumCols; + node->uniqColIdx = uniqColIdx; + node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; + + return node; +} + +/* + * make_result + * Build a Result plan node + */ +static Result * +make_result(List *tlist, + Node *resconstantqual, + Plan *subplan) +{ + Result *node = makeNode(Result); + Plan *plan = &node->plan; + + plan->targetlist = tlist; + plan->qual = NIL; + plan->lefttree = subplan; + plan->righttree = NULL; + node->resconstantqual = resconstantqual; + + return node; +} + +/* + * make_project_set + * Build a ProjectSet plan node + */ +static ProjectSet * +make_project_set(List *tlist, + Plan *subplan) +{ + ProjectSet *node = makeNode(ProjectSet); + Plan *plan = &node->plan; + + plan->targetlist = tlist; + plan->qual = NIL; + plan->lefttree = subplan; + plan->righttree = NULL; + + return node; +} + +/* + * make_modifytable + * Build a ModifyTable plan node + */ +static ModifyTable * +make_modifytable(PlannerInfo *root, Plan *subplan, + CmdType operation, bool canSetTag, + Index nominalRelation, Index rootRelation, + bool partColsUpdated, + List *resultRelations, + List *updateColnosLists, + List *withCheckOptionLists, List *returningLists, + List *rowMarks, OnConflictExpr *onconflict, + List *mergeActionLists, int epqParam) +{ + ModifyTable *node = makeNode(ModifyTable); + List *fdw_private_list; + Bitmapset *direct_modify_plans; + ListCell *lc; + int i; + + Assert(operation == CMD_MERGE || + (operation == CMD_UPDATE ? + list_length(resultRelations) == list_length(updateColnosLists) : + updateColnosLists == NIL)); + Assert(withCheckOptionLists == NIL || + list_length(resultRelations) == list_length(withCheckOptionLists)); + Assert(returningLists == NIL || + list_length(resultRelations) == list_length(returningLists)); + + node->plan.lefttree = subplan; + node->plan.righttree = NULL; + node->plan.qual = NIL; + /* setrefs.c will fill in the targetlist, if needed */ + node->plan.targetlist = NIL; + + node->operation = operation; + node->canSetTag = canSetTag; + node->nominalRelation = nominalRelation; + node->rootRelation = rootRelation; + node->partColsUpdated = partColsUpdated; + node->resultRelations = resultRelations; + if (!onconflict) + { + node->onConflictAction = ONCONFLICT_NONE; + node->onConflictSet = NIL; + node->onConflictCols = NIL; + node->onConflictWhere = NULL; + node->arbiterIndexes = NIL; + node->exclRelRTI = 0; + node->exclRelTlist = NIL; + } + else + { + node->onConflictAction = onconflict->action; + + /* + * Here we convert the ON CONFLICT UPDATE tlist, if any, to the + * executor's convention of having consecutive resno's. The actual + * target column numbers are saved in node->onConflictCols. (This + * could be done earlier, but there seems no need to.) + */ + node->onConflictSet = onconflict->onConflictSet; + node->onConflictCols = + extract_update_targetlist_colnos(node->onConflictSet); + node->onConflictWhere = onconflict->onConflictWhere; + + /* + * If a set of unique index inference elements was provided (an + * INSERT...ON CONFLICT "inference specification"), then infer + * appropriate unique indexes (or throw an error if none are + * available). + */ + node->arbiterIndexes = infer_arbiter_indexes(root); + + node->exclRelRTI = onconflict->exclRelIndex; + node->exclRelTlist = onconflict->exclRelTlist; + } + node->updateColnosLists = updateColnosLists; + node->withCheckOptionLists = withCheckOptionLists; + node->returningLists = returningLists; + node->rowMarks = rowMarks; + node->mergeActionLists = mergeActionLists; + node->epqParam = epqParam; + + /* + * For each result relation that is a foreign table, allow the FDW to + * construct private plan data, and accumulate it all into a list. + */ + fdw_private_list = NIL; + direct_modify_plans = NULL; + i = 0; + foreach(lc, resultRelations) + { + Index rti = lfirst_int(lc); + FdwRoutine *fdwroutine; + List *fdw_private; + bool direct_modify; + + /* + * If possible, we want to get the FdwRoutine from our RelOptInfo for + * the table. But sometimes we don't have a RelOptInfo and must get + * it the hard way. (In INSERT, the target relation is not scanned, + * so it's not a baserel; and there are also corner cases for + * updatable views where the target rel isn't a baserel.) + */ + if (rti < root->simple_rel_array_size && + root->simple_rel_array[rti] != NULL) + { + RelOptInfo *resultRel = root->simple_rel_array[rti]; + + fdwroutine = resultRel->fdwroutine; + + /* + * MERGE is not currently supported for foreign tables and we + * already checked when the table mentioned in the query is + * foreign; but we can still get here if a partitioned table has a + * foreign table as partition. Disallow that now, to avoid an + * uglier error message later. + */ + if (operation == CMD_MERGE && fdwroutine != NULL) + { + RangeTblEntry *rte = root->simple_rte_array[rti]; + + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot execute MERGE on relation \"%s\"", + get_rel_name(rte->relid)), + errdetail_relkind_not_supported(rte->relkind)); + } + + } + else + { + RangeTblEntry *rte = planner_rt_fetch(rti, root); + + Assert(rte->rtekind == RTE_RELATION); + Assert(operation != CMD_MERGE); + if (rte->relkind == RELKIND_FOREIGN_TABLE) + fdwroutine = GetFdwRoutineByRelId(rte->relid); + else + fdwroutine = NULL; + } + + /* + * Try to modify the foreign table directly if (1) the FDW provides + * callback functions needed for that and (2) there are no local + * structures that need to be run for each modified row: row-level + * triggers on the foreign table, stored generated columns, WITH CHECK + * OPTIONs from parent views. + */ + direct_modify = false; + if (fdwroutine != NULL && + fdwroutine->PlanDirectModify != NULL && + fdwroutine->BeginDirectModify != NULL && + fdwroutine->IterateDirectModify != NULL && + fdwroutine->EndDirectModify != NULL && + withCheckOptionLists == NIL && + !has_row_triggers(root, rti, operation) && + !has_stored_generated_columns(root, rti)) + direct_modify = fdwroutine->PlanDirectModify(root, node, rti, i); + if (direct_modify) + direct_modify_plans = bms_add_member(direct_modify_plans, i); + + if (!direct_modify && + fdwroutine != NULL && + fdwroutine->PlanForeignModify != NULL) + fdw_private = fdwroutine->PlanForeignModify(root, node, rti, i); + else + fdw_private = NIL; + fdw_private_list = lappend(fdw_private_list, fdw_private); + i++; + } + node->fdwPrivLists = fdw_private_list; + node->fdwDirectModifyPlans = direct_modify_plans; + + return node; +} + +/* + * is_projection_capable_path + * Check whether a given Path node is able to do projection. + */ +bool +is_projection_capable_path(Path *path) +{ + /* Most plan types can project, so just list the ones that can't */ + switch (path->pathtype) + { + case T_Hash: + case T_Material: + case T_Memoize: + case T_Sort: + case T_IncrementalSort: + case T_Unique: + case T_SetOp: + case T_LockRows: + case T_Limit: + case T_ModifyTable: + case T_MergeAppend: + case T_RecursiveUnion: + return false; + case T_CustomScan: + if (castNode(CustomPath, path)->flags & CUSTOMPATH_SUPPORT_PROJECTION) + return true; + return false; + case T_Append: + + /* + * Append can't project, but if an AppendPath is being used to + * represent a dummy path, what will actually be generated is a + * Result which can project. + */ + return IS_DUMMY_APPEND(path); + case T_ProjectSet: + + /* + * Although ProjectSet certainly projects, say "no" because we + * don't want the planner to randomly replace its tlist with + * something else; the SRFs have to stay at top level. This might + * get relaxed later. + */ + return false; + default: + break; + } + return true; +} + +/* + * is_projection_capable_plan + * Check whether a given Plan node is able to do projection. + */ +bool +is_projection_capable_plan(Plan *plan) +{ + /* Most plan types can project, so just list the ones that can't */ + switch (nodeTag(plan)) + { + case T_Hash: + case T_Material: + case T_Memoize: + case T_Sort: + case T_Unique: + case T_SetOp: + case T_LockRows: + case T_Limit: + case T_ModifyTable: + case T_Append: + case T_MergeAppend: + case T_RecursiveUnion: + return false; + case T_CustomScan: + if (((CustomScan *) plan)->flags & CUSTOMPATH_SUPPORT_PROJECTION) + return true; + return false; + case T_ProjectSet: + + /* + * Although ProjectSet certainly projects, say "no" because we + * don't want the planner to randomly replace its tlist with + * something else; the SRFs have to stay at top level. This might + * get relaxed later. + */ + return false; + default: + break; + } + return true; +} diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c new file mode 100644 index 0000000..023efba --- /dev/null +++ b/src/backend/optimizer/plan/initsplan.c @@ -0,0 +1,2752 @@ +/*------------------------------------------------------------------------- + * + * initsplan.c + * Target list, qualification, joininfo initialization routines + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/initsplan.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_class.h" +#include "catalog/pg_type.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/inherit.h" +#include "optimizer/joininfo.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/placeholder.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/prep.h" +#include "optimizer/restrictinfo.h" +#include "parser/analyze.h" +#include "rewrite/rewriteManip.h" +#include "utils/lsyscache.h" +#include "utils/typcache.h" + +/* These parameters are set by GUC */ +int from_collapse_limit; +int join_collapse_limit; + + +/* Elements of the postponed_qual_list used during deconstruct_recurse */ +typedef struct PostponedQual +{ + Node *qual; /* a qual clause waiting to be processed */ + Relids relids; /* the set of baserels it references */ +} PostponedQual; + + +static void extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, + Index rtindex); +static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode, + bool below_outer_join, + Relids *qualscope, Relids *inner_join_rels, + List **postponed_qual_list); +static void process_security_barrier_quals(PlannerInfo *root, + int rti, Relids qualscope, + bool below_outer_join); +static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root, + Relids left_rels, Relids right_rels, + Relids inner_join_rels, + JoinType jointype, List *clause); +static void compute_semijoin_info(PlannerInfo *root, SpecialJoinInfo *sjinfo, + List *clause); +static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, + bool below_outer_join, + JoinType jointype, + Index security_level, + Relids qualscope, + Relids ojscope, + Relids outerjoin_nonnullable, + List **postponed_qual_list); +static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p, + Relids *nullable_relids_p, bool is_pushed_down); +static bool check_equivalence_delay(PlannerInfo *root, + RestrictInfo *restrictinfo); +static bool check_redundant_nullability_qual(PlannerInfo *root, Node *clause); +static void check_mergejoinable(RestrictInfo *restrictinfo); +static void check_hashjoinable(RestrictInfo *restrictinfo); +static void check_memoizable(RestrictInfo *restrictinfo); + + +/***************************************************************************** + * + * JOIN TREES + * + *****************************************************************************/ + +/* + * add_base_rels_to_query + * + * Scan the query's jointree and create baserel RelOptInfos for all + * the base relations (e.g., table, subquery, and function RTEs) + * appearing in the jointree. + * + * The initial invocation must pass root->parse->jointree as the value of + * jtnode. Internally, the function recurses through the jointree. + * + * At the end of this process, there should be one baserel RelOptInfo for + * every non-join RTE that is used in the query. Some of the baserels + * may be appendrel parents, which will require additional "otherrel" + * RelOptInfos for their member rels, but those are added later. + */ +void +add_base_rels_to_query(PlannerInfo *root, Node *jtnode) +{ + if (jtnode == NULL) + return; + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + + (void) build_simple_rel(root, varno, NULL); + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *l; + + foreach(l, f->fromlist) + add_base_rels_to_query(root, lfirst(l)); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + add_base_rels_to_query(root, j->larg); + add_base_rels_to_query(root, j->rarg); + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); +} + +/* + * add_other_rels_to_query + * create "otherrel" RelOptInfos for the children of appendrel baserels + * + * At the end of this process, there should be RelOptInfos for all relations + * that will be scanned by the query. + */ +void +add_other_rels_to_query(PlannerInfo *root) +{ + int rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + RangeTblEntry *rte = root->simple_rte_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + /* Ignore any "otherrels" that were already added. */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + /* If it's marked as inheritable, look for children. */ + if (rte->inh) + expand_inherited_rtentry(root, rel, rte, rti); + } +} + + +/***************************************************************************** + * + * TARGET LISTS + * + *****************************************************************************/ + +/* + * build_base_rel_tlists + * Add targetlist entries for each var needed in the query's final tlist + * (and HAVING clause, if any) to the appropriate base relations. + * + * We mark such vars as needed by "relation 0" to ensure that they will + * propagate up through all join plan steps. + */ +void +build_base_rel_tlists(PlannerInfo *root, List *final_tlist) +{ + List *tlist_vars = pull_var_clause((Node *) final_tlist, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + + if (tlist_vars != NIL) + { + add_vars_to_targetlist(root, tlist_vars, bms_make_singleton(0), true); + list_free(tlist_vars); + } + + /* + * If there's a HAVING clause, we'll need the Vars it uses, too. Note + * that HAVING can contain Aggrefs but not WindowFuncs. + */ + if (root->parse->havingQual) + { + List *having_vars = pull_var_clause(root->parse->havingQual, + PVC_RECURSE_AGGREGATES | + PVC_INCLUDE_PLACEHOLDERS); + + if (having_vars != NIL) + { + add_vars_to_targetlist(root, having_vars, + bms_make_singleton(0), true); + list_free(having_vars); + } + } +} + +/* + * add_vars_to_targetlist + * For each variable appearing in the list, add it to the owning + * relation's targetlist if not already present, and mark the variable + * as being needed for the indicated join (or for final output if + * where_needed includes "relation 0"). + * + * The list may also contain PlaceHolderVars. These don't necessarily + * have a single owning relation; we keep their attr_needed info in + * root->placeholder_list instead. If create_new_ph is true, it's OK + * to create new PlaceHolderInfos; otherwise, the PlaceHolderInfos must + * already exist, and we should only update their ph_needed. (This should + * be true before deconstruct_jointree begins, and false after that.) + */ +void +add_vars_to_targetlist(PlannerInfo *root, List *vars, + Relids where_needed, bool create_new_ph) +{ + ListCell *temp; + + Assert(!bms_is_empty(where_needed)); + + foreach(temp, vars) + { + Node *node = (Node *) lfirst(temp); + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + RelOptInfo *rel = find_base_rel(root, var->varno); + int attno = var->varattno; + + if (bms_is_subset(where_needed, rel->relids)) + continue; + Assert(attno >= rel->min_attr && attno <= rel->max_attr); + attno -= rel->min_attr; + if (rel->attr_needed[attno] == NULL) + { + /* Variable not yet requested, so add to rel's targetlist */ + /* XXX is copyObject necessary here? */ + rel->reltarget->exprs = lappend(rel->reltarget->exprs, + copyObject(var)); + /* reltarget cost and width will be computed later */ + } + rel->attr_needed[attno] = bms_add_members(rel->attr_needed[attno], + where_needed); + } + else if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + PlaceHolderInfo *phinfo = find_placeholder_info(root, phv, + create_new_ph); + + phinfo->ph_needed = bms_add_members(phinfo->ph_needed, + where_needed); + } + else + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); + } +} + + +/***************************************************************************** + * + * LATERAL REFERENCES + * + *****************************************************************************/ + +/* + * find_lateral_references + * For each LATERAL subquery, extract all its references to Vars and + * PlaceHolderVars of the current query level, and make sure those values + * will be available for evaluation of the subquery. + * + * While later planning steps ensure that the Var/PHV source rels are on the + * outside of nestloops relative to the LATERAL subquery, we also need to + * ensure that the Vars/PHVs propagate up to the nestloop join level; this + * means setting suitable where_needed values for them. + * + * Note that this only deals with lateral references in unflattened LATERAL + * subqueries. When we flatten a LATERAL subquery, its lateral references + * become plain Vars in the parent query, but they may have to be wrapped in + * PlaceHolderVars if they need to be forced NULL by outer joins that don't + * also null the LATERAL subquery. That's all handled elsewhere. + * + * This has to run before deconstruct_jointree, since it might result in + * creation of PlaceHolderInfos. + */ +void +find_lateral_references(PlannerInfo *root) +{ + Index rti; + + /* We need do nothing if the query contains no LATERAL RTEs */ + if (!root->hasLateralRTEs) + return; + + /* + * Examine all baserels (the rel array has been set up by now). + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (brel == NULL) + continue; + + Assert(brel->relid == rti); /* sanity check on array */ + + /* + * This bit is less obvious than it might look. We ignore appendrel + * otherrels and consider only their parent baserels. In a case where + * a LATERAL-containing UNION ALL subquery was pulled up, it is the + * otherrel that is actually going to be in the plan. However, we + * want to mark all its lateral references as needed by the parent, + * because it is the parent's relid that will be used for join + * planning purposes. And the parent's RTE will contain all the + * lateral references we need to know, since the pulled-up member is + * nothing but a copy of parts of the original RTE's subquery. We + * could visit the parent's children instead and transform their + * references back to the parent's relid, but it would be much more + * complicated for no real gain. (Important here is that the child + * members have not yet received any processing beyond being pulled + * up.) Similarly, in appendrels created by inheritance expansion, + * it's sufficient to look at the parent relation. + */ + + /* ignore RTEs that are "other rels" */ + if (brel->reloptkind != RELOPT_BASEREL) + continue; + + extract_lateral_references(root, brel, rti); + } +} + +static void +extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex) +{ + RangeTblEntry *rte = root->simple_rte_array[rtindex]; + List *vars; + List *newvars; + Relids where_needed; + ListCell *lc; + + /* No cross-references are possible if it's not LATERAL */ + if (!rte->lateral) + return; + + /* Fetch the appropriate variables */ + if (rte->rtekind == RTE_RELATION) + vars = pull_vars_of_level((Node *) rte->tablesample, 0); + else if (rte->rtekind == RTE_SUBQUERY) + vars = pull_vars_of_level((Node *) rte->subquery, 1); + else if (rte->rtekind == RTE_FUNCTION) + vars = pull_vars_of_level((Node *) rte->functions, 0); + else if (rte->rtekind == RTE_TABLEFUNC) + vars = pull_vars_of_level((Node *) rte->tablefunc, 0); + else if (rte->rtekind == RTE_VALUES) + vars = pull_vars_of_level((Node *) rte->values_lists, 0); + else + { + Assert(false); + return; /* keep compiler quiet */ + } + + if (vars == NIL) + return; /* nothing to do */ + + /* Copy each Var (or PlaceHolderVar) and adjust it to match our level */ + newvars = NIL; + foreach(lc, vars) + { + Node *node = (Node *) lfirst(lc); + + node = copyObject(node); + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + /* Adjustment is easy since it's just one node */ + var->varlevelsup = 0; + } + else if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + int levelsup = phv->phlevelsup; + + /* Have to work harder to adjust the contained expression too */ + if (levelsup != 0) + IncrementVarSublevelsUp(node, -levelsup, 0); + + /* + * If we pulled the PHV out of a subquery RTE, its expression + * needs to be preprocessed. subquery_planner() already did this + * for level-zero PHVs in function and values RTEs, though. + */ + if (levelsup > 0) + phv->phexpr = preprocess_phv_expression(root, phv->phexpr); + } + else + Assert(false); + newvars = lappend(newvars, node); + } + + list_free(vars); + + /* + * We mark the Vars as being "needed" at the LATERAL RTE. This is a bit + * of a cheat: a more formal approach would be to mark each one as needed + * at the join of the LATERAL RTE with its source RTE. But it will work, + * and it's much less tedious than computing a separate where_needed for + * each Var. + */ + where_needed = bms_make_singleton(rtindex); + + /* + * Push Vars into their source relations' targetlists, and PHVs into + * root->placeholder_list. + */ + add_vars_to_targetlist(root, newvars, where_needed, true); + + /* Remember the lateral references for create_lateral_join_info */ + brel->lateral_vars = newvars; +} + +/* + * create_lateral_join_info + * Fill in the per-base-relation direct_lateral_relids, lateral_relids + * and lateral_referencers sets. + * + * This has to run after deconstruct_jointree, because we need to know the + * final ph_eval_at values for PlaceHolderVars. + */ +void +create_lateral_join_info(PlannerInfo *root) +{ + bool found_laterals = false; + Index rti; + ListCell *lc; + + /* We need do nothing if the query contains no LATERAL RTEs */ + if (!root->hasLateralRTEs) + return; + + /* + * Examine all baserels (the rel array has been set up by now). + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + Relids lateral_relids; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (brel == NULL) + continue; + + Assert(brel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (brel->reloptkind != RELOPT_BASEREL) + continue; + + lateral_relids = NULL; + + /* consider each laterally-referenced Var or PHV */ + foreach(lc, brel->lateral_vars) + { + Node *node = (Node *) lfirst(lc); + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + found_laterals = true; + lateral_relids = bms_add_member(lateral_relids, + var->varno); + } + else if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + PlaceHolderInfo *phinfo = find_placeholder_info(root, phv, + false); + + found_laterals = true; + lateral_relids = bms_add_members(lateral_relids, + phinfo->ph_eval_at); + } + else + Assert(false); + } + + /* We now have all the simple lateral refs from this rel */ + brel->direct_lateral_relids = lateral_relids; + brel->lateral_relids = bms_copy(lateral_relids); + } + + /* + * Now check for lateral references within PlaceHolderVars, and mark their + * eval_at rels as having lateral references to the source rels. + * + * For a PHV that is due to be evaluated at a baserel, mark its source(s) + * as direct lateral dependencies of the baserel (adding onto the ones + * recorded above). If it's due to be evaluated at a join, mark its + * source(s) as indirect lateral dependencies of each baserel in the join, + * ie put them into lateral_relids but not direct_lateral_relids. This is + * appropriate because we can't put any such baserel on the outside of a + * join to one of the PHV's lateral dependencies, but on the other hand we + * also can't yet join it directly to the dependency. + */ + foreach(lc, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); + Relids eval_at = phinfo->ph_eval_at; + int varno; + + if (phinfo->ph_lateral == NULL) + continue; /* PHV is uninteresting if no lateral refs */ + + found_laterals = true; + + if (bms_get_singleton_member(eval_at, &varno)) + { + /* Evaluation site is a baserel */ + RelOptInfo *brel = find_base_rel(root, varno); + + brel->direct_lateral_relids = + bms_add_members(brel->direct_lateral_relids, + phinfo->ph_lateral); + brel->lateral_relids = + bms_add_members(brel->lateral_relids, + phinfo->ph_lateral); + } + else + { + /* Evaluation site is a join */ + varno = -1; + while ((varno = bms_next_member(eval_at, varno)) >= 0) + { + RelOptInfo *brel = find_base_rel(root, varno); + + brel->lateral_relids = bms_add_members(brel->lateral_relids, + phinfo->ph_lateral); + } + } + } + + /* + * If we found no actual lateral references, we're done; but reset the + * hasLateralRTEs flag to avoid useless work later. + */ + if (!found_laterals) + { + root->hasLateralRTEs = false; + return; + } + + /* + * Calculate the transitive closure of the lateral_relids sets, so that + * they describe both direct and indirect lateral references. If relation + * X references Y laterally, and Y references Z laterally, then we will + * have to scan X on the inside of a nestloop with Z, so for all intents + * and purposes X is laterally dependent on Z too. + * + * This code is essentially Warshall's algorithm for transitive closure. + * The outer loop considers each baserel, and propagates its lateral + * dependencies to those baserels that have a lateral dependency on it. + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + Relids outer_lateral_relids; + Index rti2; + + if (brel == NULL || brel->reloptkind != RELOPT_BASEREL) + continue; + + /* need not consider baserel further if it has no lateral refs */ + outer_lateral_relids = brel->lateral_relids; + if (outer_lateral_relids == NULL) + continue; + + /* else scan all baserels */ + for (rti2 = 1; rti2 < root->simple_rel_array_size; rti2++) + { + RelOptInfo *brel2 = root->simple_rel_array[rti2]; + + if (brel2 == NULL || brel2->reloptkind != RELOPT_BASEREL) + continue; + + /* if brel2 has lateral ref to brel, propagate brel's refs */ + if (bms_is_member(rti, brel2->lateral_relids)) + brel2->lateral_relids = bms_add_members(brel2->lateral_relids, + outer_lateral_relids); + } + } + + /* + * Now that we've identified all lateral references, mark each baserel + * with the set of relids of rels that reference it laterally (possibly + * indirectly) --- that is, the inverse mapping of lateral_relids. + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + Relids lateral_relids; + int rti2; + + if (brel == NULL || brel->reloptkind != RELOPT_BASEREL) + continue; + + /* Nothing to do at rels with no lateral refs */ + lateral_relids = brel->lateral_relids; + if (lateral_relids == NULL) + continue; + + /* + * We should not have broken the invariant that lateral_relids is + * exactly NULL if empty. + */ + Assert(!bms_is_empty(lateral_relids)); + + /* Also, no rel should have a lateral dependency on itself */ + Assert(!bms_is_member(rti, lateral_relids)); + + /* Mark this rel's referencees */ + rti2 = -1; + while ((rti2 = bms_next_member(lateral_relids, rti2)) >= 0) + { + RelOptInfo *brel2 = root->simple_rel_array[rti2]; + + Assert(brel2 != NULL && brel2->reloptkind == RELOPT_BASEREL); + brel2->lateral_referencers = + bms_add_member(brel2->lateral_referencers, rti); + } + } +} + + +/***************************************************************************** + * + * JOIN TREE PROCESSING + * + *****************************************************************************/ + +/* + * deconstruct_jointree + * Recursively scan the query's join tree for WHERE and JOIN/ON qual + * clauses, and add these to the appropriate restrictinfo and joininfo + * lists belonging to base RelOptInfos. Also, add SpecialJoinInfo nodes + * to root->join_info_list for any outer joins appearing in the query tree. + * Return a "joinlist" data structure showing the join order decisions + * that need to be made by make_one_rel(). + * + * The "joinlist" result is a list of items that are either RangeTblRef + * jointree nodes or sub-joinlists. All the items at the same level of + * joinlist must be joined in an order to be determined by make_one_rel() + * (note that legal orders may be constrained by SpecialJoinInfo nodes). + * A sub-joinlist represents a subproblem to be planned separately. Currently + * sub-joinlists arise only from FULL OUTER JOIN or when collapsing of + * subproblems is stopped by join_collapse_limit or from_collapse_limit. + * + * NOTE: when dealing with inner joins, it is appropriate to let a qual clause + * be evaluated at the lowest level where all the variables it mentions are + * available. However, we cannot push a qual down into the nullable side(s) + * of an outer join since the qual might eliminate matching rows and cause a + * NULL row to be incorrectly emitted by the join. Therefore, we artificially + * OR the minimum-relids of such an outer join into the required_relids of + * clauses appearing above it. This forces those clauses to be delayed until + * application of the outer join (or maybe even higher in the join tree). + */ +List * +deconstruct_jointree(PlannerInfo *root) +{ + List *result; + Relids qualscope; + Relids inner_join_rels; + List *postponed_qual_list = NIL; + + /* Start recursion at top of jointree */ + Assert(root->parse->jointree != NULL && + IsA(root->parse->jointree, FromExpr)); + + /* this is filled as we scan the jointree */ + root->nullable_baserels = NULL; + + result = deconstruct_recurse(root, (Node *) root->parse->jointree, false, + &qualscope, &inner_join_rels, + &postponed_qual_list); + + /* Shouldn't be any leftover quals */ + Assert(postponed_qual_list == NIL); + + return result; +} + +/* + * deconstruct_recurse + * One recursion level of deconstruct_jointree processing. + * + * Inputs: + * jtnode is the jointree node to examine + * below_outer_join is true if this node is within the nullable side of a + * higher-level outer join + * Outputs: + * *qualscope gets the set of base Relids syntactically included in this + * jointree node (do not modify or free this, as it may also be pointed + * to by RestrictInfo and SpecialJoinInfo nodes) + * *inner_join_rels gets the set of base Relids syntactically included in + * inner joins appearing at or below this jointree node (do not modify + * or free this, either) + * *postponed_qual_list is a list of PostponedQual structs, which we can + * add quals to if they turn out to belong to a higher join level + * Return value is the appropriate joinlist for this jointree node + * + * In addition, entries will be added to root->join_info_list for outer joins. + */ +static List * +deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, + Relids *qualscope, Relids *inner_join_rels, + List **postponed_qual_list) +{ + List *joinlist; + + if (jtnode == NULL) + { + *qualscope = NULL; + *inner_join_rels = NULL; + return NIL; + } + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + + /* qualscope is just the one RTE */ + *qualscope = bms_make_singleton(varno); + /* Deal with any securityQuals attached to the RTE */ + if (root->qual_security_level > 0) + process_security_barrier_quals(root, + varno, + *qualscope, + below_outer_join); + /* A single baserel does not create an inner join */ + *inner_join_rels = NULL; + joinlist = list_make1(jtnode); + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + List *child_postponed_quals = NIL; + int remaining; + ListCell *l; + + /* + * First, recurse to handle child joins. We collapse subproblems into + * a single joinlist whenever the resulting joinlist wouldn't exceed + * from_collapse_limit members. Also, always collapse one-element + * subproblems, since that won't lengthen the joinlist anyway. + */ + *qualscope = NULL; + *inner_join_rels = NULL; + joinlist = NIL; + remaining = list_length(f->fromlist); + foreach(l, f->fromlist) + { + Relids sub_qualscope; + List *sub_joinlist; + int sub_members; + + sub_joinlist = deconstruct_recurse(root, lfirst(l), + below_outer_join, + &sub_qualscope, + inner_join_rels, + &child_postponed_quals); + *qualscope = bms_add_members(*qualscope, sub_qualscope); + sub_members = list_length(sub_joinlist); + remaining--; + if (sub_members <= 1 || + list_length(joinlist) + sub_members + remaining <= from_collapse_limit) + joinlist = list_concat(joinlist, sub_joinlist); + else + joinlist = lappend(joinlist, sub_joinlist); + } + + /* + * A FROM with more than one list element is an inner join subsuming + * all below it, so we should report inner_join_rels = qualscope. If + * there was exactly one element, we should (and already did) report + * whatever its inner_join_rels were. If there were no elements (is + * that still possible?) the initialization before the loop fixed it. + */ + if (list_length(f->fromlist) > 1) + *inner_join_rels = *qualscope; + + /* + * Try to process any quals postponed by children. If they need + * further postponement, add them to my output postponed_qual_list. + */ + foreach(l, child_postponed_quals) + { + PostponedQual *pq = (PostponedQual *) lfirst(l); + + if (bms_is_subset(pq->relids, *qualscope)) + distribute_qual_to_rels(root, pq->qual, + below_outer_join, JOIN_INNER, + root->qual_security_level, + *qualscope, NULL, NULL, + NULL); + else + *postponed_qual_list = lappend(*postponed_qual_list, pq); + } + + /* + * Now process the top-level quals. + */ + foreach(l, (List *) f->quals) + { + Node *qual = (Node *) lfirst(l); + + distribute_qual_to_rels(root, qual, + below_outer_join, JOIN_INNER, + root->qual_security_level, + *qualscope, NULL, NULL, + postponed_qual_list); + } + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + List *child_postponed_quals = NIL; + Relids leftids, + rightids, + left_inners, + right_inners, + nonnullable_rels, + nullable_rels, + ojscope; + List *leftjoinlist, + *rightjoinlist; + List *my_quals; + SpecialJoinInfo *sjinfo; + ListCell *l; + + /* + * Order of operations here is subtle and critical. First we recurse + * to handle sub-JOINs. Their join quals will be placed without + * regard for whether this level is an outer join, which is correct. + * Then we place our own join quals, which are restricted by lower + * outer joins in any case, and are forced to this level if this is an + * outer join and they mention the outer side. Finally, if this is an + * outer join, we create a join_info_list entry for the join. This + * will prevent quals above us in the join tree that use those rels + * from being pushed down below this level. (It's okay for upper + * quals to be pushed down to the outer side, however.) + */ + switch (j->jointype) + { + case JOIN_INNER: + leftjoinlist = deconstruct_recurse(root, j->larg, + below_outer_join, + &leftids, &left_inners, + &child_postponed_quals); + rightjoinlist = deconstruct_recurse(root, j->rarg, + below_outer_join, + &rightids, &right_inners, + &child_postponed_quals); + *qualscope = bms_union(leftids, rightids); + *inner_join_rels = *qualscope; + /* Inner join adds no restrictions for quals */ + nonnullable_rels = NULL; + /* and it doesn't force anything to null, either */ + nullable_rels = NULL; + break; + case JOIN_LEFT: + case JOIN_ANTI: + leftjoinlist = deconstruct_recurse(root, j->larg, + below_outer_join, + &leftids, &left_inners, + &child_postponed_quals); + rightjoinlist = deconstruct_recurse(root, j->rarg, + true, + &rightids, &right_inners, + &child_postponed_quals); + *qualscope = bms_union(leftids, rightids); + *inner_join_rels = bms_union(left_inners, right_inners); + nonnullable_rels = leftids; + nullable_rels = rightids; + break; + case JOIN_SEMI: + leftjoinlist = deconstruct_recurse(root, j->larg, + below_outer_join, + &leftids, &left_inners, + &child_postponed_quals); + rightjoinlist = deconstruct_recurse(root, j->rarg, + below_outer_join, + &rightids, &right_inners, + &child_postponed_quals); + *qualscope = bms_union(leftids, rightids); + *inner_join_rels = bms_union(left_inners, right_inners); + /* Semi join adds no restrictions for quals */ + nonnullable_rels = NULL; + + /* + * Theoretically, a semijoin would null the RHS; but since the + * RHS can't be accessed above the join, this is immaterial + * and we needn't account for it. + */ + nullable_rels = NULL; + break; + case JOIN_FULL: + leftjoinlist = deconstruct_recurse(root, j->larg, + true, + &leftids, &left_inners, + &child_postponed_quals); + rightjoinlist = deconstruct_recurse(root, j->rarg, + true, + &rightids, &right_inners, + &child_postponed_quals); + *qualscope = bms_union(leftids, rightids); + *inner_join_rels = bms_union(left_inners, right_inners); + /* each side is both outer and inner */ + nonnullable_rels = *qualscope; + nullable_rels = *qualscope; + break; + default: + /* JOIN_RIGHT was eliminated during reduce_outer_joins() */ + elog(ERROR, "unrecognized join type: %d", + (int) j->jointype); + nonnullable_rels = NULL; /* keep compiler quiet */ + nullable_rels = NULL; + leftjoinlist = rightjoinlist = NIL; + break; + } + + /* Report all rels that will be nulled anywhere in the jointree */ + root->nullable_baserels = bms_add_members(root->nullable_baserels, + nullable_rels); + + /* + * Try to process any quals postponed by children. If they need + * further postponement, add them to my output postponed_qual_list. + * Quals that can be processed now must be included in my_quals, so + * that they'll be handled properly in make_outerjoininfo. + */ + my_quals = NIL; + foreach(l, child_postponed_quals) + { + PostponedQual *pq = (PostponedQual *) lfirst(l); + + if (bms_is_subset(pq->relids, *qualscope)) + my_quals = lappend(my_quals, pq->qual); + else + { + /* + * We should not be postponing any quals past an outer join. + * If this Assert fires, pull_up_subqueries() messed up. + */ + Assert(j->jointype == JOIN_INNER); + *postponed_qual_list = lappend(*postponed_qual_list, pq); + } + } + my_quals = list_concat(my_quals, (List *) j->quals); + + /* + * For an OJ, form the SpecialJoinInfo now, because we need the OJ's + * semantic scope (ojscope) to pass to distribute_qual_to_rels. But + * we mustn't add it to join_info_list just yet, because we don't want + * distribute_qual_to_rels to think it is an outer join below us. + * + * Semijoins are a bit of a hybrid: we build a SpecialJoinInfo, but we + * want ojscope = NULL for distribute_qual_to_rels. + */ + if (j->jointype != JOIN_INNER) + { + sjinfo = make_outerjoininfo(root, + leftids, rightids, + *inner_join_rels, + j->jointype, + my_quals); + if (j->jointype == JOIN_SEMI) + ojscope = NULL; + else + ojscope = bms_union(sjinfo->min_lefthand, + sjinfo->min_righthand); + } + else + { + sjinfo = NULL; + ojscope = NULL; + } + + /* Process the JOIN's qual clauses */ + foreach(l, my_quals) + { + Node *qual = (Node *) lfirst(l); + + distribute_qual_to_rels(root, qual, + below_outer_join, j->jointype, + root->qual_security_level, + *qualscope, + ojscope, nonnullable_rels, + postponed_qual_list); + } + + /* Now we can add the SpecialJoinInfo to join_info_list */ + if (sjinfo) + { + root->join_info_list = lappend(root->join_info_list, sjinfo); + /* Each time we do that, recheck placeholder eval levels */ + update_placeholder_eval_levels(root, sjinfo); + } + + /* + * Finally, compute the output joinlist. We fold subproblems together + * except at a FULL JOIN or where join_collapse_limit would be + * exceeded. + */ + if (j->jointype == JOIN_FULL) + { + /* force the join order exactly at this node */ + joinlist = list_make1(list_make2(leftjoinlist, rightjoinlist)); + } + else if (list_length(leftjoinlist) + list_length(rightjoinlist) <= + join_collapse_limit) + { + /* OK to combine subproblems */ + joinlist = list_concat(leftjoinlist, rightjoinlist); + } + else + { + /* can't combine, but needn't force join order above here */ + Node *leftpart, + *rightpart; + + /* avoid creating useless 1-element sublists */ + if (list_length(leftjoinlist) == 1) + leftpart = (Node *) linitial(leftjoinlist); + else + leftpart = (Node *) leftjoinlist; + if (list_length(rightjoinlist) == 1) + rightpart = (Node *) linitial(rightjoinlist); + else + rightpart = (Node *) rightjoinlist; + joinlist = list_make2(leftpart, rightpart); + } + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); + joinlist = NIL; /* keep compiler quiet */ + } + return joinlist; +} + +/* + * process_security_barrier_quals + * Transfer security-barrier quals into relation's baserestrictinfo list. + * + * The rewriter put any relevant security-barrier conditions into the RTE's + * securityQuals field, but it's now time to copy them into the rel's + * baserestrictinfo. + * + * In inheritance cases, we only consider quals attached to the parent rel + * here; they will be valid for all children too, so it's okay to consider + * them for purposes like equivalence class creation. Quals attached to + * individual child rels will be dealt with during path creation. + */ +static void +process_security_barrier_quals(PlannerInfo *root, + int rti, Relids qualscope, + bool below_outer_join) +{ + RangeTblEntry *rte = root->simple_rte_array[rti]; + Index security_level = 0; + ListCell *lc; + + /* + * Each element of the securityQuals list has been preprocessed into an + * implicitly-ANDed list of clauses. All the clauses in a given sublist + * should get the same security level, but successive sublists get higher + * levels. + */ + foreach(lc, rte->securityQuals) + { + List *qualset = (List *) lfirst(lc); + ListCell *lc2; + + foreach(lc2, qualset) + { + Node *qual = (Node *) lfirst(lc2); + + /* + * We cheat to the extent of passing ojscope = qualscope rather + * than its more logical value of NULL. The only effect this has + * is to force a Var-free qual to be evaluated at the rel rather + * than being pushed up to top of tree, which we don't want. + */ + distribute_qual_to_rels(root, qual, + below_outer_join, + JOIN_INNER, + security_level, + qualscope, + qualscope, + NULL, + NULL); + } + security_level++; + } + + /* Assert that qual_security_level is higher than anything we just used */ + Assert(security_level <= root->qual_security_level); +} + +/* + * make_outerjoininfo + * Build a SpecialJoinInfo for the current outer join + * + * Inputs: + * left_rels: the base Relids syntactically on outer side of join + * right_rels: the base Relids syntactically on inner side of join + * inner_join_rels: base Relids participating in inner joins below this one + * jointype: what it says (must always be LEFT, FULL, SEMI, or ANTI) + * clause: the outer join's join condition (in implicit-AND format) + * + * The node should eventually be appended to root->join_info_list, but we + * do not do that here. + * + * Note: we assume that this function is invoked bottom-up, so that + * root->join_info_list already contains entries for all outer joins that are + * syntactically below this one. + */ +static SpecialJoinInfo * +make_outerjoininfo(PlannerInfo *root, + Relids left_rels, Relids right_rels, + Relids inner_join_rels, + JoinType jointype, List *clause) +{ + SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo); + Relids clause_relids; + Relids strict_relids; + Relids min_lefthand; + Relids min_righthand; + ListCell *l; + + /* + * We should not see RIGHT JOIN here because left/right were switched + * earlier + */ + Assert(jointype != JOIN_INNER); + Assert(jointype != JOIN_RIGHT); + + /* + * Presently the executor cannot support FOR [KEY] UPDATE/SHARE marking of + * rels appearing on the nullable side of an outer join. (It's somewhat + * unclear what that would mean, anyway: what should we mark when a result + * row is generated from no element of the nullable relation?) So, + * complain if any nullable rel is FOR [KEY] UPDATE/SHARE. + * + * You might be wondering why this test isn't made far upstream in the + * parser. It's because the parser hasn't got enough info --- consider + * FOR UPDATE applied to a view. Only after rewriting and flattening do + * we know whether the view contains an outer join. + * + * We use the original RowMarkClause list here; the PlanRowMark list would + * list everything. + */ + foreach(l, root->parse->rowMarks) + { + RowMarkClause *rc = (RowMarkClause *) lfirst(l); + + if (bms_is_member(rc->rti, right_rels) || + (jointype == JOIN_FULL && bms_is_member(rc->rti, left_rels))) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /*------ + translator: %s is a SQL row locking clause such as FOR UPDATE */ + errmsg("%s cannot be applied to the nullable side of an outer join", + LCS_asString(rc->strength)))); + } + + sjinfo->syn_lefthand = left_rels; + sjinfo->syn_righthand = right_rels; + sjinfo->jointype = jointype; + /* this always starts out false */ + sjinfo->delay_upper_joins = false; + + compute_semijoin_info(root, sjinfo, clause); + + /* If it's a full join, no need to be very smart */ + if (jointype == JOIN_FULL) + { + sjinfo->min_lefthand = bms_copy(left_rels); + sjinfo->min_righthand = bms_copy(right_rels); + sjinfo->lhs_strict = false; /* don't care about this */ + return sjinfo; + } + + /* + * Retrieve all relids mentioned within the join clause. + */ + clause_relids = pull_varnos(root, (Node *) clause); + + /* + * For which relids is the clause strict, ie, it cannot succeed if the + * rel's columns are all NULL? + */ + strict_relids = find_nonnullable_rels((Node *) clause); + + /* Remember whether the clause is strict for any LHS relations */ + sjinfo->lhs_strict = bms_overlap(strict_relids, left_rels); + + /* + * Required LHS always includes the LHS rels mentioned in the clause. We + * may have to add more rels based on lower outer joins; see below. + */ + min_lefthand = bms_intersect(clause_relids, left_rels); + + /* + * Similarly for required RHS. But here, we must also include any lower + * inner joins, to ensure we don't try to commute with any of them. + */ + min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels), + right_rels); + + /* + * Now check previous outer joins for ordering restrictions. + */ + foreach(l, root->join_info_list) + { + SpecialJoinInfo *otherinfo = (SpecialJoinInfo *) lfirst(l); + + /* + * A full join is an optimization barrier: we can't associate into or + * out of it. Hence, if it overlaps either LHS or RHS of the current + * rel, expand that side's min relset to cover the whole full join. + */ + if (otherinfo->jointype == JOIN_FULL) + { + if (bms_overlap(left_rels, otherinfo->syn_lefthand) || + bms_overlap(left_rels, otherinfo->syn_righthand)) + { + min_lefthand = bms_add_members(min_lefthand, + otherinfo->syn_lefthand); + min_lefthand = bms_add_members(min_lefthand, + otherinfo->syn_righthand); + } + if (bms_overlap(right_rels, otherinfo->syn_lefthand) || + bms_overlap(right_rels, otherinfo->syn_righthand)) + { + min_righthand = bms_add_members(min_righthand, + otherinfo->syn_lefthand); + min_righthand = bms_add_members(min_righthand, + otherinfo->syn_righthand); + } + /* Needn't do anything else with the full join */ + continue; + } + + /* + * For a lower OJ in our LHS, if our join condition uses the lower + * join's RHS and is not strict for that rel, we must preserve the + * ordering of the two OJs, so add lower OJ's full syntactic relset to + * min_lefthand. (We must use its full syntactic relset, not just its + * min_lefthand + min_righthand. This is because there might be other + * OJs below this one that this one can commute with, but we cannot + * commute with them if we don't with this one.) Also, if the current + * join is a semijoin or antijoin, we must preserve ordering + * regardless of strictness. + * + * Note: I believe we have to insist on being strict for at least one + * rel in the lower OJ's min_righthand, not its whole syn_righthand. + */ + if (bms_overlap(left_rels, otherinfo->syn_righthand)) + { + if (bms_overlap(clause_relids, otherinfo->syn_righthand) && + (jointype == JOIN_SEMI || jointype == JOIN_ANTI || + !bms_overlap(strict_relids, otherinfo->min_righthand))) + { + min_lefthand = bms_add_members(min_lefthand, + otherinfo->syn_lefthand); + min_lefthand = bms_add_members(min_lefthand, + otherinfo->syn_righthand); + } + } + + /* + * For a lower OJ in our RHS, if our join condition does not use the + * lower join's RHS and the lower OJ's join condition is strict, we + * can interchange the ordering of the two OJs; otherwise we must add + * the lower OJ's full syntactic relset to min_righthand. + * + * Also, if our join condition does not use the lower join's LHS + * either, force the ordering to be preserved. Otherwise we can end + * up with SpecialJoinInfos with identical min_righthands, which can + * confuse join_is_legal (see discussion in backend/optimizer/README). + * + * Also, we must preserve ordering anyway if either the current join + * or the lower OJ is either a semijoin or an antijoin. + * + * Here, we have to consider that "our join condition" includes any + * clauses that syntactically appeared above the lower OJ and below + * ours; those are equivalent to degenerate clauses in our OJ and must + * be treated as such. Such clauses obviously can't reference our + * LHS, and they must be non-strict for the lower OJ's RHS (else + * reduce_outer_joins would have reduced the lower OJ to a plain + * join). Hence the other ways in which we handle clauses within our + * join condition are not affected by them. The net effect is + * therefore sufficiently represented by the delay_upper_joins flag + * saved for us by check_outerjoin_delay. + */ + if (bms_overlap(right_rels, otherinfo->syn_righthand)) + { + if (bms_overlap(clause_relids, otherinfo->syn_righthand) || + !bms_overlap(clause_relids, otherinfo->min_lefthand) || + jointype == JOIN_SEMI || + jointype == JOIN_ANTI || + otherinfo->jointype == JOIN_SEMI || + otherinfo->jointype == JOIN_ANTI || + !otherinfo->lhs_strict || otherinfo->delay_upper_joins) + { + min_righthand = bms_add_members(min_righthand, + otherinfo->syn_lefthand); + min_righthand = bms_add_members(min_righthand, + otherinfo->syn_righthand); + } + } + } + + /* + * Examine PlaceHolderVars. If a PHV is supposed to be evaluated within + * this join's nullable side, then ensure that min_righthand contains the + * full eval_at set of the PHV. This ensures that the PHV actually can be + * evaluated within the RHS. Note that this works only because we should + * already have determined the final eval_at level for any PHV + * syntactically within this join. + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + Relids ph_syn_level = phinfo->ph_var->phrels; + + /* Ignore placeholder if it didn't syntactically come from RHS */ + if (!bms_is_subset(ph_syn_level, right_rels)) + continue; + + /* Else, prevent join from being formed before we eval the PHV */ + min_righthand = bms_add_members(min_righthand, phinfo->ph_eval_at); + } + + /* + * If we found nothing to put in min_lefthand, punt and make it the full + * LHS, to avoid having an empty min_lefthand which will confuse later + * processing. (We don't try to be smart about such cases, just correct.) + * Likewise for min_righthand. + */ + if (bms_is_empty(min_lefthand)) + min_lefthand = bms_copy(left_rels); + if (bms_is_empty(min_righthand)) + min_righthand = bms_copy(right_rels); + + /* Now they'd better be nonempty */ + Assert(!bms_is_empty(min_lefthand)); + Assert(!bms_is_empty(min_righthand)); + /* Shouldn't overlap either */ + Assert(!bms_overlap(min_lefthand, min_righthand)); + + sjinfo->min_lefthand = min_lefthand; + sjinfo->min_righthand = min_righthand; + + return sjinfo; +} + +/* + * compute_semijoin_info + * Fill semijoin-related fields of a new SpecialJoinInfo + * + * Note: this relies on only the jointype and syn_righthand fields of the + * SpecialJoinInfo; the rest may not be set yet. + */ +static void +compute_semijoin_info(PlannerInfo *root, SpecialJoinInfo *sjinfo, List *clause) +{ + List *semi_operators; + List *semi_rhs_exprs; + bool all_btree; + bool all_hash; + ListCell *lc; + + /* Initialize semijoin-related fields in case we can't unique-ify */ + sjinfo->semi_can_btree = false; + sjinfo->semi_can_hash = false; + sjinfo->semi_operators = NIL; + sjinfo->semi_rhs_exprs = NIL; + + /* Nothing more to do if it's not a semijoin */ + if (sjinfo->jointype != JOIN_SEMI) + return; + + /* + * Look to see whether the semijoin's join quals consist of AND'ed + * equality operators, with (only) RHS variables on only one side of each + * one. If so, we can figure out how to enforce uniqueness for the RHS. + * + * Note that the input clause list is the list of quals that are + * *syntactically* associated with the semijoin, which in practice means + * the synthesized comparison list for an IN or the WHERE of an EXISTS. + * Particularly in the latter case, it might contain clauses that aren't + * *semantically* associated with the join, but refer to just one side or + * the other. We can ignore such clauses here, as they will just drop + * down to be processed within one side or the other. (It is okay to + * consider only the syntactically-associated clauses here because for a + * semijoin, no higher-level quals could refer to the RHS, and so there + * can be no other quals that are semantically associated with this join. + * We do things this way because it is useful to have the set of potential + * unique-ification expressions before we can extract the list of quals + * that are actually semantically associated with the particular join.) + * + * Note that the semi_operators list consists of the joinqual operators + * themselves (but commuted if needed to put the RHS value on the right). + * These could be cross-type operators, in which case the operator + * actually needed for uniqueness is a related single-type operator. We + * assume here that that operator will be available from the btree or hash + * opclass when the time comes ... if not, create_unique_plan() will fail. + */ + semi_operators = NIL; + semi_rhs_exprs = NIL; + all_btree = true; + all_hash = enable_hashagg; /* don't consider hash if not enabled */ + foreach(lc, clause) + { + OpExpr *op = (OpExpr *) lfirst(lc); + Oid opno; + Node *left_expr; + Node *right_expr; + Relids left_varnos; + Relids right_varnos; + Relids all_varnos; + Oid opinputtype; + + /* Is it a binary opclause? */ + if (!IsA(op, OpExpr) || + list_length(op->args) != 2) + { + /* No, but does it reference both sides? */ + all_varnos = pull_varnos(root, (Node *) op); + if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || + bms_is_subset(all_varnos, sjinfo->syn_righthand)) + { + /* + * Clause refers to only one rel, so ignore it --- unless it + * contains volatile functions, in which case we'd better + * punt. + */ + if (contain_volatile_functions((Node *) op)) + return; + continue; + } + /* Non-operator clause referencing both sides, must punt */ + return; + } + + /* Extract data from binary opclause */ + opno = op->opno; + left_expr = linitial(op->args); + right_expr = lsecond(op->args); + left_varnos = pull_varnos(root, left_expr); + right_varnos = pull_varnos(root, right_expr); + all_varnos = bms_union(left_varnos, right_varnos); + opinputtype = exprType(left_expr); + + /* Does it reference both sides? */ + if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || + bms_is_subset(all_varnos, sjinfo->syn_righthand)) + { + /* + * Clause refers to only one rel, so ignore it --- unless it + * contains volatile functions, in which case we'd better punt. + */ + if (contain_volatile_functions((Node *) op)) + return; + continue; + } + + /* check rel membership of arguments */ + if (!bms_is_empty(right_varnos) && + bms_is_subset(right_varnos, sjinfo->syn_righthand) && + !bms_overlap(left_varnos, sjinfo->syn_righthand)) + { + /* typical case, right_expr is RHS variable */ + } + else if (!bms_is_empty(left_varnos) && + bms_is_subset(left_varnos, sjinfo->syn_righthand) && + !bms_overlap(right_varnos, sjinfo->syn_righthand)) + { + /* flipped case, left_expr is RHS variable */ + opno = get_commutator(opno); + if (!OidIsValid(opno)) + return; + right_expr = left_expr; + } + else + { + /* mixed membership of args, punt */ + return; + } + + /* all operators must be btree equality or hash equality */ + if (all_btree) + { + /* oprcanmerge is considered a hint... */ + if (!op_mergejoinable(opno, opinputtype) || + get_mergejoin_opfamilies(opno) == NIL) + all_btree = false; + } + if (all_hash) + { + /* ... but oprcanhash had better be correct */ + if (!op_hashjoinable(opno, opinputtype)) + all_hash = false; + } + if (!(all_btree || all_hash)) + return; + + /* so far so good, keep building lists */ + semi_operators = lappend_oid(semi_operators, opno); + semi_rhs_exprs = lappend(semi_rhs_exprs, copyObject(right_expr)); + } + + /* Punt if we didn't find at least one column to unique-ify */ + if (semi_rhs_exprs == NIL) + return; + + /* + * The expressions we'd need to unique-ify mustn't be volatile. + */ + if (contain_volatile_functions((Node *) semi_rhs_exprs)) + return; + + /* + * If we get here, we can unique-ify the semijoin's RHS using at least one + * of sorting and hashing. Save the information about how to do that. + */ + sjinfo->semi_can_btree = all_btree; + sjinfo->semi_can_hash = all_hash; + sjinfo->semi_operators = semi_operators; + sjinfo->semi_rhs_exprs = semi_rhs_exprs; +} + + +/***************************************************************************** + * + * QUALIFICATIONS + * + *****************************************************************************/ + +/* + * distribute_qual_to_rels + * Add clause information to either the baserestrictinfo or joininfo list + * (depending on whether the clause is a join) of each base relation + * mentioned in the clause. A RestrictInfo node is created and added to + * the appropriate list for each rel. Alternatively, if the clause uses a + * mergejoinable operator and is not delayed by outer-join rules, enter + * the left- and right-side expressions into the query's list of + * EquivalenceClasses. Alternatively, if the clause needs to be treated + * as belonging to a higher join level, just add it to postponed_qual_list. + * + * 'clause': the qual clause to be distributed + * 'below_outer_join': true if the qual is from a JOIN/ON that is below the + * nullable side of a higher-level outer join + * 'jointype': type of join the qual is from (JOIN_INNER for a WHERE clause) + * 'security_level': security_level to assign to the qual + * 'qualscope': set of baserels the qual's syntactic scope covers + * 'ojscope': NULL if not an outer-join qual, else the minimum set of baserels + * needed to form this join + * 'outerjoin_nonnullable': NULL if not an outer-join qual, else the set of + * baserels appearing on the outer (nonnullable) side of the join + * (for FULL JOIN this includes both sides of the join, and must in fact + * equal qualscope) + * 'postponed_qual_list': list of PostponedQual structs, which we can add + * this qual to if it turns out to belong to a higher join level. + * Can be NULL if caller knows postponement is impossible. + * + * 'qualscope' identifies what level of JOIN the qual came from syntactically. + * 'ojscope' is needed if we decide to force the qual up to the outer-join + * level, which will be ojscope not necessarily qualscope. + * + * At the time this is called, root->join_info_list must contain entries for + * all and only those special joins that are syntactically below this qual. + */ +static void +distribute_qual_to_rels(PlannerInfo *root, Node *clause, + bool below_outer_join, + JoinType jointype, + Index security_level, + Relids qualscope, + Relids ojscope, + Relids outerjoin_nonnullable, + List **postponed_qual_list) +{ + Relids relids; + bool is_pushed_down; + bool outerjoin_delayed; + bool pseudoconstant = false; + bool maybe_equivalence; + bool maybe_outer_join; + Relids nullable_relids; + RestrictInfo *restrictinfo; + + /* + * Retrieve all relids mentioned within the clause. + */ + relids = pull_varnos(root, clause); + + /* + * In ordinary SQL, a WHERE or JOIN/ON clause can't reference any rels + * that aren't within its syntactic scope; however, if we pulled up a + * LATERAL subquery then we might find such references in quals that have + * been pulled up. We need to treat such quals as belonging to the join + * level that includes every rel they reference. Although we could make + * pull_up_subqueries() place such quals correctly to begin with, it's + * easier to handle it here. When we find a clause that contains Vars + * outside its syntactic scope, we add it to the postponed-quals list, and + * process it once we've recursed back up to the appropriate join level. + */ + if (!bms_is_subset(relids, qualscope)) + { + PostponedQual *pq = (PostponedQual *) palloc(sizeof(PostponedQual)); + + Assert(root->hasLateralRTEs); /* shouldn't happen otherwise */ + Assert(jointype == JOIN_INNER); /* mustn't postpone past outer join */ + pq->qual = clause; + pq->relids = relids; + *postponed_qual_list = lappend(*postponed_qual_list, pq); + return; + } + + /* + * If it's an outer-join clause, also check that relids is a subset of + * ojscope. (This should not fail if the syntactic scope check passed.) + */ + if (ojscope && !bms_is_subset(relids, ojscope)) + elog(ERROR, "JOIN qualification cannot refer to other relations"); + + /* + * If the clause is variable-free, our normal heuristic for pushing it + * down to just the mentioned rels doesn't work, because there are none. + * + * If the clause is an outer-join clause, we must force it to the OJ's + * semantic level to preserve semantics. + * + * Otherwise, when the clause contains volatile functions, we force it to + * be evaluated at its original syntactic level. This preserves the + * expected semantics. + * + * When the clause contains no volatile functions either, it is actually a + * pseudoconstant clause that will not change value during any one + * execution of the plan, and hence can be used as a one-time qual in a + * gating Result plan node. We put such a clause into the regular + * RestrictInfo lists for the moment, but eventually createplan.c will + * pull it out and make a gating Result node immediately above whatever + * plan node the pseudoconstant clause is assigned to. It's usually best + * to put a gating node as high in the plan tree as possible. If we are + * not below an outer join, we can actually push the pseudoconstant qual + * all the way to the top of the tree. If we are below an outer join, we + * leave the qual at its original syntactic level (we could push it up to + * just below the outer join, but that seems more complex than it's + * worth). + */ + if (bms_is_empty(relids)) + { + if (ojscope) + { + /* clause is attached to outer join, eval it there */ + relids = bms_copy(ojscope); + /* mustn't use as gating qual, so don't mark pseudoconstant */ + } + else + { + /* eval at original syntactic level */ + relids = bms_copy(qualscope); + if (!contain_volatile_functions(clause)) + { + /* mark as gating qual */ + pseudoconstant = true; + /* tell createplan.c to check for gating quals */ + root->hasPseudoConstantQuals = true; + /* if not below outer join, push it to top of tree */ + if (!below_outer_join) + { + relids = + get_relids_in_jointree((Node *) root->parse->jointree, + false); + qualscope = bms_copy(relids); + } + } + } + } + + /*---------- + * Check to see if clause application must be delayed by outer-join + * considerations. + * + * A word about is_pushed_down: we mark the qual as "pushed down" if + * it is (potentially) applicable at a level different from its original + * syntactic level. This flag is used to distinguish OUTER JOIN ON quals + * from other quals pushed down to the same joinrel. The rules are: + * WHERE quals and INNER JOIN quals: is_pushed_down = true. + * Non-degenerate OUTER JOIN quals: is_pushed_down = false. + * Degenerate OUTER JOIN quals: is_pushed_down = true. + * A "degenerate" OUTER JOIN qual is one that doesn't mention the + * non-nullable side, and hence can be pushed down into the nullable side + * without changing the join result. It is correct to treat it as a + * regular filter condition at the level where it is evaluated. + * + * Note: it is not immediately obvious that a simple boolean is enough + * for this: if for some reason we were to attach a degenerate qual to + * its original join level, it would need to be treated as an outer join + * qual there. However, this cannot happen, because all the rels the + * clause mentions must be in the outer join's min_righthand, therefore + * the join it needs must be formed before the outer join; and we always + * attach quals to the lowest level where they can be evaluated. But + * if we were ever to re-introduce a mechanism for delaying evaluation + * of "expensive" quals, this area would need work. + * + * Note: generally, use of is_pushed_down has to go through the macro + * RINFO_IS_PUSHED_DOWN, because that flag alone is not always sufficient + * to tell whether a clause must be treated as pushed-down in context. + * This seems like another reason why it should perhaps be rethought. + *---------- + */ + if (bms_overlap(relids, outerjoin_nonnullable)) + { + /* + * The qual is attached to an outer join and mentions (some of the) + * rels on the nonnullable side, so it's not degenerate. + * + * We can't use such a clause to deduce equivalence (the left and + * right sides might be unequal above the join because one of them has + * gone to NULL) ... but we might be able to use it for more limited + * deductions, if it is mergejoinable. So consider adding it to the + * lists of set-aside outer-join clauses. + */ + is_pushed_down = false; + maybe_equivalence = false; + maybe_outer_join = true; + + /* Check to see if must be delayed by lower outer join */ + outerjoin_delayed = check_outerjoin_delay(root, + &relids, + &nullable_relids, + false); + + /* + * Now force the qual to be evaluated exactly at the level of joining + * corresponding to the outer join. We cannot let it get pushed down + * into the nonnullable side, since then we'd produce no output rows, + * rather than the intended single null-extended row, for any + * nonnullable-side rows failing the qual. + * + * (Do this step after calling check_outerjoin_delay, because that + * trashes relids.) + */ + Assert(ojscope); + relids = ojscope; + Assert(!pseudoconstant); + } + else + { + /* + * Normal qual clause or degenerate outer-join clause. Either way, we + * can mark it as pushed-down. + */ + is_pushed_down = true; + + /* Check to see if must be delayed by lower outer join */ + outerjoin_delayed = check_outerjoin_delay(root, + &relids, + &nullable_relids, + true); + + if (outerjoin_delayed) + { + /* Should still be a subset of current scope ... */ + Assert(root->hasLateralRTEs || bms_is_subset(relids, qualscope)); + Assert(ojscope == NULL || bms_is_subset(relids, ojscope)); + + /* + * Because application of the qual will be delayed by outer join, + * we mustn't assume its vars are equal everywhere. + */ + maybe_equivalence = false; + + /* + * It's possible that this is an IS NULL clause that's redundant + * with a lower antijoin; if so we can just discard it. We need + * not test in any of the other cases, because this will only be + * possible for pushed-down, delayed clauses. + */ + if (check_redundant_nullability_qual(root, clause)) + return; + } + else + { + /* + * Qual is not delayed by any lower outer-join restriction, so we + * can consider feeding it to the equivalence machinery. However, + * if it's itself within an outer-join clause, treat it as though + * it appeared below that outer join (note that we can only get + * here when the clause references only nullable-side rels). + */ + maybe_equivalence = true; + if (outerjoin_nonnullable != NULL) + below_outer_join = true; + } + + /* + * Since it doesn't mention the LHS, it's certainly not useful as a + * set-aside OJ clause, even if it's in an OJ. + */ + maybe_outer_join = false; + } + + /* + * Build the RestrictInfo node itself. + */ + restrictinfo = make_restrictinfo(root, + (Expr *) clause, + is_pushed_down, + outerjoin_delayed, + pseudoconstant, + security_level, + relids, + outerjoin_nonnullable, + nullable_relids); + + /* + * If it's a join clause (either naturally, or because delayed by + * outer-join rules), add vars used in the clause to targetlists of their + * relations, so that they will be emitted by the plan nodes that scan + * those relations (else they won't be available at the join node!). + * + * Note: if the clause gets absorbed into an EquivalenceClass then this + * may be unnecessary, but for now we have to do it to cover the case + * where the EC becomes ec_broken and we end up reinserting the original + * clauses into the plan. + */ + if (bms_membership(relids) == BMS_MULTIPLE) + { + List *vars = pull_var_clause(clause, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + + add_vars_to_targetlist(root, vars, relids, false); + list_free(vars); + } + + /* + * We check "mergejoinability" of every clause, not only join clauses, + * because we want to know about equivalences between vars of the same + * relation, or between vars and consts. + */ + check_mergejoinable(restrictinfo); + + /* + * If it is a true equivalence clause, send it to the EquivalenceClass + * machinery. We do *not* attach it directly to any restriction or join + * lists. The EC code will propagate it to the appropriate places later. + * + * If the clause has a mergejoinable operator and is not + * outerjoin-delayed, yet isn't an equivalence because it is an outer-join + * clause, the EC code may yet be able to do something with it. We add it + * to appropriate lists for further consideration later. Specifically: + * + * If it is a left or right outer-join qualification that relates the two + * sides of the outer join (no funny business like leftvar1 = leftvar2 + + * rightvar), we add it to root->left_join_clauses or + * root->right_join_clauses according to which side the nonnullable + * variable appears on. + * + * If it is a full outer-join qualification, we add it to + * root->full_join_clauses. (Ideally we'd discard cases that aren't + * leftvar = rightvar, as we do for left/right joins, but this routine + * doesn't have the info needed to do that; and the current usage of the + * full_join_clauses list doesn't require that, so it's not currently + * worth complicating this routine's API to make it possible.) + * + * If none of the above hold, pass it off to + * distribute_restrictinfo_to_rels(). + * + * In all cases, it's important to initialize the left_ec and right_ec + * fields of a mergejoinable clause, so that all possibly mergejoinable + * expressions have representations in EquivalenceClasses. If + * process_equivalence is successful, it will take care of that; + * otherwise, we have to call initialize_mergeclause_eclasses to do it. + */ + if (restrictinfo->mergeopfamilies) + { + if (maybe_equivalence) + { + if (check_equivalence_delay(root, restrictinfo) && + process_equivalence(root, &restrictinfo, below_outer_join)) + return; + /* EC rejected it, so set left_ec/right_ec the hard way ... */ + if (restrictinfo->mergeopfamilies) /* EC might have changed this */ + initialize_mergeclause_eclasses(root, restrictinfo); + /* ... and fall through to distribute_restrictinfo_to_rels */ + } + else if (maybe_outer_join && restrictinfo->can_join) + { + /* we need to set up left_ec/right_ec the hard way */ + initialize_mergeclause_eclasses(root, restrictinfo); + /* now see if it should go to any outer-join lists */ + if (bms_is_subset(restrictinfo->left_relids, + outerjoin_nonnullable) && + !bms_overlap(restrictinfo->right_relids, + outerjoin_nonnullable)) + { + /* we have outervar = innervar */ + root->left_join_clauses = lappend(root->left_join_clauses, + restrictinfo); + return; + } + if (bms_is_subset(restrictinfo->right_relids, + outerjoin_nonnullable) && + !bms_overlap(restrictinfo->left_relids, + outerjoin_nonnullable)) + { + /* we have innervar = outervar */ + root->right_join_clauses = lappend(root->right_join_clauses, + restrictinfo); + return; + } + if (jointype == JOIN_FULL) + { + /* FULL JOIN (above tests cannot match in this case) */ + root->full_join_clauses = lappend(root->full_join_clauses, + restrictinfo); + return; + } + /* nope, so fall through to distribute_restrictinfo_to_rels */ + } + else + { + /* we still need to set up left_ec/right_ec */ + initialize_mergeclause_eclasses(root, restrictinfo); + } + } + + /* No EC special case applies, so push it into the clause lists */ + distribute_restrictinfo_to_rels(root, restrictinfo); +} + +/* + * check_outerjoin_delay + * Detect whether a qual referencing the given relids must be delayed + * in application due to the presence of a lower outer join, and/or + * may force extra delay of higher-level outer joins. + * + * If the qual must be delayed, add relids to *relids_p to reflect the lowest + * safe level for evaluating the qual, and return true. Any extra delay for + * higher-level joins is reflected by setting delay_upper_joins to true in + * SpecialJoinInfo structs. We also compute nullable_relids, the set of + * referenced relids that are nullable by lower outer joins (note that this + * can be nonempty even for a non-delayed qual). + * + * For an is_pushed_down qual, we can evaluate the qual as soon as (1) we have + * all the rels it mentions, and (2) we are at or above any outer joins that + * can null any of these rels and are below the syntactic location of the + * given qual. We must enforce (2) because pushing down such a clause below + * the OJ might cause the OJ to emit null-extended rows that should not have + * been formed, or that should have been rejected by the clause. (This is + * only an issue for non-strict quals, since if we can prove a qual mentioning + * only nullable rels is strict, we'd have reduced the outer join to an inner + * join in reduce_outer_joins().) + * + * To enforce (2), scan the join_info_list and merge the required-relid sets of + * any such OJs into the clause's own reference list. At the time we are + * called, the join_info_list contains only outer joins below this qual. We + * have to repeat the scan until no new relids get added; this ensures that + * the qual is suitably delayed regardless of the order in which OJs get + * executed. As an example, if we have one OJ with LHS=A, RHS=B, and one with + * LHS=B, RHS=C, it is implied that these can be done in either order; if the + * B/C join is done first then the join to A can null C, so a qual actually + * mentioning only C cannot be applied below the join to A. + * + * For a non-pushed-down qual, this isn't going to determine where we place the + * qual, but we need to determine outerjoin_delayed and nullable_relids anyway + * for use later in the planning process. + * + * Lastly, a pushed-down qual that references the nullable side of any current + * join_info_list member and has to be evaluated above that OJ (because its + * required relids overlap the LHS too) causes that OJ's delay_upper_joins + * flag to be set true. This will prevent any higher-level OJs from + * being interchanged with that OJ, which would result in not having any + * correct place to evaluate the qual. (The case we care about here is a + * sub-select WHERE clause within the RHS of some outer join. The WHERE + * clause must effectively be treated as a degenerate clause of that outer + * join's condition. Rather than trying to match such clauses with joins + * directly, we set delay_upper_joins here, and when the upper outer join + * is processed by make_outerjoininfo, it will refrain from allowing the + * two OJs to commute.) + */ +static bool +check_outerjoin_delay(PlannerInfo *root, + Relids *relids_p, /* in/out parameter */ + Relids *nullable_relids_p, /* output parameter */ + bool is_pushed_down) +{ + Relids relids; + Relids nullable_relids; + bool outerjoin_delayed; + bool found_some; + + /* fast path if no special joins */ + if (root->join_info_list == NIL) + { + *nullable_relids_p = NULL; + return false; + } + + /* must copy relids because we need the original value at the end */ + relids = bms_copy(*relids_p); + nullable_relids = NULL; + outerjoin_delayed = false; + do + { + ListCell *l; + + found_some = false; + foreach(l, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); + + /* do we reference any nullable rels of this OJ? */ + if (bms_overlap(relids, sjinfo->min_righthand) || + (sjinfo->jointype == JOIN_FULL && + bms_overlap(relids, sjinfo->min_lefthand))) + { + /* yes; have we included all its rels in relids? */ + if (!bms_is_subset(sjinfo->min_lefthand, relids) || + !bms_is_subset(sjinfo->min_righthand, relids)) + { + /* no, so add them in */ + relids = bms_add_members(relids, sjinfo->min_lefthand); + relids = bms_add_members(relids, sjinfo->min_righthand); + outerjoin_delayed = true; + /* we'll need another iteration */ + found_some = true; + } + /* track all the nullable rels of relevant OJs */ + nullable_relids = bms_add_members(nullable_relids, + sjinfo->min_righthand); + if (sjinfo->jointype == JOIN_FULL) + nullable_relids = bms_add_members(nullable_relids, + sjinfo->min_lefthand); + /* set delay_upper_joins if needed */ + if (is_pushed_down && sjinfo->jointype != JOIN_FULL && + bms_overlap(relids, sjinfo->min_lefthand)) + sjinfo->delay_upper_joins = true; + } + } + } while (found_some); + + /* identify just the actually-referenced nullable rels */ + nullable_relids = bms_int_members(nullable_relids, *relids_p); + + /* replace *relids_p, and return nullable_relids */ + bms_free(*relids_p); + *relids_p = relids; + *nullable_relids_p = nullable_relids; + return outerjoin_delayed; +} + +/* + * check_equivalence_delay + * Detect whether a potential equivalence clause is rendered unsafe + * by outer-join-delay considerations. Return true if it's safe. + * + * The initial tests in distribute_qual_to_rels will consider a mergejoinable + * clause to be a potential equivalence clause if it is not outerjoin_delayed. + * But since the point of equivalence processing is that we will recombine the + * two sides of the clause with others, we have to check that each side + * satisfies the not-outerjoin_delayed condition on its own; otherwise it might + * not be safe to evaluate everywhere we could place a derived equivalence + * condition. + */ +static bool +check_equivalence_delay(PlannerInfo *root, + RestrictInfo *restrictinfo) +{ + Relids relids; + Relids nullable_relids; + + /* fast path if no special joins */ + if (root->join_info_list == NIL) + return true; + + /* must copy restrictinfo's relids to avoid changing it */ + relids = bms_copy(restrictinfo->left_relids); + /* check left side does not need delay */ + if (check_outerjoin_delay(root, &relids, &nullable_relids, true)) + return false; + + /* and similarly for the right side */ + relids = bms_copy(restrictinfo->right_relids); + if (check_outerjoin_delay(root, &relids, &nullable_relids, true)) + return false; + + return true; +} + +/* + * check_redundant_nullability_qual + * Check to see if the qual is an IS NULL qual that is redundant with + * a lower JOIN_ANTI join. + * + * We want to suppress redundant IS NULL quals, not so much to save cycles + * as to avoid generating bogus selectivity estimates for them. So if + * redundancy is detected here, distribute_qual_to_rels() just throws away + * the qual. + */ +static bool +check_redundant_nullability_qual(PlannerInfo *root, Node *clause) +{ + Var *forced_null_var; + Index forced_null_rel; + ListCell *lc; + + /* Check for IS NULL, and identify the Var forced to NULL */ + forced_null_var = find_forced_null_var(clause); + if (forced_null_var == NULL) + return false; + forced_null_rel = forced_null_var->varno; + + /* + * If the Var comes from the nullable side of a lower antijoin, the IS + * NULL condition is necessarily true. + */ + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + + if (sjinfo->jointype == JOIN_ANTI && + bms_is_member(forced_null_rel, sjinfo->syn_righthand)) + return true; + } + + return false; +} + +/* + * distribute_restrictinfo_to_rels + * Push a completed RestrictInfo into the proper restriction or join + * clause list(s). + * + * This is the last step of distribute_qual_to_rels() for ordinary qual + * clauses. Clauses that are interesting for equivalence-class processing + * are diverted to the EC machinery, but may ultimately get fed back here. + */ +void +distribute_restrictinfo_to_rels(PlannerInfo *root, + RestrictInfo *restrictinfo) +{ + Relids relids = restrictinfo->required_relids; + RelOptInfo *rel; + + switch (bms_membership(relids)) + { + case BMS_SINGLETON: + + /* + * There is only one relation participating in the clause, so it + * is a restriction clause for that relation. + */ + rel = find_base_rel(root, bms_singleton_member(relids)); + + /* Add clause to rel's restriction list */ + rel->baserestrictinfo = lappend(rel->baserestrictinfo, + restrictinfo); + /* Update security level info */ + rel->baserestrict_min_security = Min(rel->baserestrict_min_security, + restrictinfo->security_level); + break; + case BMS_MULTIPLE: + + /* + * The clause is a join clause, since there is more than one rel + * in its relid set. + */ + + /* + * Check for hashjoinable operators. (We don't bother setting the + * hashjoin info except in true join clauses.) + */ + check_hashjoinable(restrictinfo); + + /* + * Likewise, check if the clause is suitable to be used with a + * Memoize node to cache inner tuples during a parameterized + * nested loop. + */ + check_memoizable(restrictinfo); + + /* + * Add clause to the join lists of all the relevant relations. + */ + add_join_clause_to_rels(root, restrictinfo, relids); + break; + default: + + /* + * clause references no rels, and therefore we have no place to + * attach it. Shouldn't get here if callers are working properly. + */ + elog(ERROR, "cannot cope with variable-free clause"); + break; + } +} + +/* + * process_implied_equality + * Create a restrictinfo item that says "item1 op item2", and push it + * into the appropriate lists. (In practice opno is always a btree + * equality operator.) + * + * "qualscope" is the nominal syntactic level to impute to the restrictinfo. + * This must contain at least all the rels used in the expressions, but it + * is used only to set the qual application level when both exprs are + * variable-free. Otherwise the qual is applied at the lowest join level + * that provides all its variables. + * + * "nullable_relids" is the set of relids used in the expressions that are + * potentially nullable below the expressions. (This has to be supplied by + * caller because this function is used after deconstruct_jointree, so we + * don't have knowledge of where the clause items came from.) + * + * "security_level" is the security level to assign to the new restrictinfo. + * + * "both_const" indicates whether both items are known pseudo-constant; + * in this case it is worth applying eval_const_expressions() in case we + * can produce constant TRUE or constant FALSE. (Otherwise it's not, + * because the expressions went through eval_const_expressions already.) + * + * Returns the generated RestrictInfo, if any. The result will be NULL + * if both_const is true and we successfully reduced the clause to + * constant TRUE. + * + * Note: this function will copy item1 and item2, but it is caller's + * responsibility to make sure that the Relids parameters are fresh copies + * not shared with other uses. + * + * Note: we do not do initialize_mergeclause_eclasses() here. It is + * caller's responsibility that left_ec/right_ec be set as necessary. + */ +RestrictInfo * +process_implied_equality(PlannerInfo *root, + Oid opno, + Oid collation, + Expr *item1, + Expr *item2, + Relids qualscope, + Relids nullable_relids, + Index security_level, + bool below_outer_join, + bool both_const) +{ + RestrictInfo *restrictinfo; + Node *clause; + Relids relids; + bool pseudoconstant = false; + + /* + * Build the new clause. Copy to ensure it shares no substructure with + * original (this is necessary in case there are subselects in there...) + */ + clause = (Node *) make_opclause(opno, + BOOLOID, /* opresulttype */ + false, /* opretset */ + copyObject(item1), + copyObject(item2), + InvalidOid, + collation); + + /* If both constant, try to reduce to a boolean constant. */ + if (both_const) + { + clause = eval_const_expressions(root, clause); + + /* If we produced const TRUE, just drop the clause */ + if (clause && IsA(clause, Const)) + { + Const *cclause = (Const *) clause; + + Assert(cclause->consttype == BOOLOID); + if (!cclause->constisnull && DatumGetBool(cclause->constvalue)) + return NULL; + } + } + + /* + * The rest of this is a very cut-down version of distribute_qual_to_rels. + * We can skip most of the work therein, but there are a couple of special + * cases we still have to handle. + * + * Retrieve all relids mentioned within the possibly-simplified clause. + */ + relids = pull_varnos(root, clause); + Assert(bms_is_subset(relids, qualscope)); + + /* + * If the clause is variable-free, our normal heuristic for pushing it + * down to just the mentioned rels doesn't work, because there are none. + * Apply at the given qualscope, or at the top of tree if it's nonvolatile + * (which it very likely is, but we'll check, just to be sure). + */ + if (bms_is_empty(relids)) + { + /* eval at original syntactic level */ + relids = bms_copy(qualscope); + if (!contain_volatile_functions(clause)) + { + /* mark as gating qual */ + pseudoconstant = true; + /* tell createplan.c to check for gating quals */ + root->hasPseudoConstantQuals = true; + /* if not below outer join, push it to top of tree */ + if (!below_outer_join) + { + relids = + get_relids_in_jointree((Node *) root->parse->jointree, + false); + } + } + } + + /* + * Build the RestrictInfo node itself. + */ + restrictinfo = make_restrictinfo(root, + (Expr *) clause, + true, /* is_pushed_down */ + false, /* outerjoin_delayed */ + pseudoconstant, + security_level, + relids, + NULL, /* outer_relids */ + nullable_relids); + + /* + * If it's a join clause, add vars used in the clause to targetlists of + * their relations, so that they will be emitted by the plan nodes that + * scan those relations (else they won't be available at the join node!). + * + * Typically, we'd have already done this when the component expressions + * were first seen by distribute_qual_to_rels; but it is possible that + * some of the Vars could have missed having that done because they only + * appeared in single-relation clauses originally. So do it here for + * safety. + */ + if (bms_membership(relids) == BMS_MULTIPLE) + { + List *vars = pull_var_clause(clause, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + + add_vars_to_targetlist(root, vars, relids, false); + list_free(vars); + } + + /* + * Check mergejoinability. This will usually succeed, since the op came + * from an EquivalenceClass; but we could have reduced the original clause + * to a constant. + */ + check_mergejoinable(restrictinfo); + + /* + * Note we don't do initialize_mergeclause_eclasses(); the caller can + * handle that much more cheaply than we can. It's okay to call + * distribute_restrictinfo_to_rels() before that happens. + */ + + /* + * Push the new clause into all the appropriate restrictinfo lists. + */ + distribute_restrictinfo_to_rels(root, restrictinfo); + + return restrictinfo; +} + +/* + * build_implied_join_equality --- build a RestrictInfo for a derived equality + * + * This overlaps the functionality of process_implied_equality(), but we + * must not push the RestrictInfo into the joininfo tree. + * + * Note: this function will copy item1 and item2, but it is caller's + * responsibility to make sure that the Relids parameters are fresh copies + * not shared with other uses. + * + * Note: we do not do initialize_mergeclause_eclasses() here. It is + * caller's responsibility that left_ec/right_ec be set as necessary. + */ +RestrictInfo * +build_implied_join_equality(PlannerInfo *root, + Oid opno, + Oid collation, + Expr *item1, + Expr *item2, + Relids qualscope, + Relids nullable_relids, + Index security_level) +{ + RestrictInfo *restrictinfo; + Expr *clause; + + /* + * Build the new clause. Copy to ensure it shares no substructure with + * original (this is necessary in case there are subselects in there...) + */ + clause = make_opclause(opno, + BOOLOID, /* opresulttype */ + false, /* opretset */ + copyObject(item1), + copyObject(item2), + InvalidOid, + collation); + + /* + * Build the RestrictInfo node itself. + */ + restrictinfo = make_restrictinfo(root, + clause, + true, /* is_pushed_down */ + false, /* outerjoin_delayed */ + false, /* pseudoconstant */ + security_level, /* security_level */ + qualscope, /* required_relids */ + NULL, /* outer_relids */ + nullable_relids); /* nullable_relids */ + + /* Set mergejoinability/hashjoinability flags */ + check_mergejoinable(restrictinfo); + check_hashjoinable(restrictinfo); + check_memoizable(restrictinfo); + + return restrictinfo; +} + + +/* + * match_foreign_keys_to_quals + * Match foreign-key constraints to equivalence classes and join quals + * + * The idea here is to see which query join conditions match equality + * constraints of a foreign-key relationship. For such join conditions, + * we can use the FK semantics to make selectivity estimates that are more + * reliable than estimating from statistics, especially for multiple-column + * FKs, where the normal assumption of independent conditions tends to fail. + * + * In this function we annotate the ForeignKeyOptInfos in root->fkey_list + * with info about which eclasses and join qual clauses they match, and + * discard any ForeignKeyOptInfos that are irrelevant for the query. + */ +void +match_foreign_keys_to_quals(PlannerInfo *root) +{ + List *newlist = NIL; + ListCell *lc; + + foreach(lc, root->fkey_list) + { + ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); + RelOptInfo *con_rel; + RelOptInfo *ref_rel; + int colno; + + /* + * Either relid might identify a rel that is in the query's rtable but + * isn't referenced by the jointree so won't have a RelOptInfo. Hence + * don't use find_base_rel() here. We can ignore such FKs. + */ + if (fkinfo->con_relid >= root->simple_rel_array_size || + fkinfo->ref_relid >= root->simple_rel_array_size) + continue; /* just paranoia */ + con_rel = root->simple_rel_array[fkinfo->con_relid]; + if (con_rel == NULL) + continue; + ref_rel = root->simple_rel_array[fkinfo->ref_relid]; + if (ref_rel == NULL) + continue; + + /* + * Ignore FK unless both rels are baserels. This gets rid of FKs that + * link to inheritance child rels (otherrels) and those that link to + * rels removed by join removal (dead rels). + */ + if (con_rel->reloptkind != RELOPT_BASEREL || + ref_rel->reloptkind != RELOPT_BASEREL) + continue; + + /* + * Scan the columns and try to match them to eclasses and quals. + * + * Note: for simple inner joins, any match should be in an eclass. + * "Loose" quals that syntactically match an FK equality must have + * been rejected for EC status because they are outer-join quals or + * similar. We can still consider them to match the FK if they are + * not outerjoin_delayed. + */ + for (colno = 0; colno < fkinfo->nkeys; colno++) + { + EquivalenceClass *ec; + AttrNumber con_attno, + ref_attno; + Oid fpeqop; + ListCell *lc2; + + ec = match_eclasses_to_foreign_key_col(root, fkinfo, colno); + /* Don't bother looking for loose quals if we got an EC match */ + if (ec != NULL) + { + fkinfo->nmatched_ec++; + if (ec->ec_has_const) + fkinfo->nconst_ec++; + continue; + } + + /* + * Scan joininfo list for relevant clauses. Either rel's joininfo + * list would do equally well; we use con_rel's. + */ + con_attno = fkinfo->conkey[colno]; + ref_attno = fkinfo->confkey[colno]; + fpeqop = InvalidOid; /* we'll look this up only if needed */ + + foreach(lc2, con_rel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2); + OpExpr *clause = (OpExpr *) rinfo->clause; + Var *leftvar; + Var *rightvar; + + /* Ignore outerjoin-delayed clauses */ + if (rinfo->outerjoin_delayed) + continue; + + /* Only binary OpExprs are useful for consideration */ + if (!IsA(clause, OpExpr) || + list_length(clause->args) != 2) + continue; + leftvar = (Var *) get_leftop((Expr *) clause); + rightvar = (Var *) get_rightop((Expr *) clause); + + /* Operands must be Vars, possibly with RelabelType */ + while (leftvar && IsA(leftvar, RelabelType)) + leftvar = (Var *) ((RelabelType *) leftvar)->arg; + if (!(leftvar && IsA(leftvar, Var))) + continue; + while (rightvar && IsA(rightvar, RelabelType)) + rightvar = (Var *) ((RelabelType *) rightvar)->arg; + if (!(rightvar && IsA(rightvar, Var))) + continue; + + /* Now try to match the vars to the current foreign key cols */ + if (fkinfo->ref_relid == leftvar->varno && + ref_attno == leftvar->varattno && + fkinfo->con_relid == rightvar->varno && + con_attno == rightvar->varattno) + { + /* Vars match, but is it the right operator? */ + if (clause->opno == fkinfo->conpfeqop[colno]) + { + fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno], + rinfo); + fkinfo->nmatched_ri++; + } + } + else if (fkinfo->ref_relid == rightvar->varno && + ref_attno == rightvar->varattno && + fkinfo->con_relid == leftvar->varno && + con_attno == leftvar->varattno) + { + /* + * Reverse match, must check commutator operator. Look it + * up if we didn't already. (In the worst case we might + * do multiple lookups here, but that would require an FK + * equality operator without commutator, which is + * unlikely.) + */ + if (!OidIsValid(fpeqop)) + fpeqop = get_commutator(fkinfo->conpfeqop[colno]); + if (clause->opno == fpeqop) + { + fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno], + rinfo); + fkinfo->nmatched_ri++; + } + } + } + /* If we found any matching loose quals, count col as matched */ + if (fkinfo->rinfos[colno]) + fkinfo->nmatched_rcols++; + } + + /* + * Currently, we drop multicolumn FKs that aren't fully matched to the + * query. Later we might figure out how to derive some sort of + * estimate from them, in which case this test should be weakened to + * "if ((fkinfo->nmatched_ec + fkinfo->nmatched_rcols) > 0)". + */ + if ((fkinfo->nmatched_ec + fkinfo->nmatched_rcols) == fkinfo->nkeys) + newlist = lappend(newlist, fkinfo); + } + /* Replace fkey_list, thereby discarding any useless entries */ + root->fkey_list = newlist; +} + + +/***************************************************************************** + * + * CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES + * + *****************************************************************************/ + +/* + * check_mergejoinable + * If the restrictinfo's clause is mergejoinable, set the mergejoin + * info fields in the restrictinfo. + * + * Currently, we support mergejoin for binary opclauses where + * the operator is a mergejoinable operator. The arguments can be + * anything --- as long as there are no volatile functions in them. + */ +static void +check_mergejoinable(RestrictInfo *restrictinfo) +{ + Expr *clause = restrictinfo->clause; + Oid opno; + Node *leftarg; + + if (restrictinfo->pseudoconstant) + return; + if (!is_opclause(clause)) + return; + if (list_length(((OpExpr *) clause)->args) != 2) + return; + + opno = ((OpExpr *) clause)->opno; + leftarg = linitial(((OpExpr *) clause)->args); + + if (op_mergejoinable(opno, exprType(leftarg)) && + !contain_volatile_functions((Node *) restrictinfo)) + restrictinfo->mergeopfamilies = get_mergejoin_opfamilies(opno); + + /* + * Note: op_mergejoinable is just a hint; if we fail to find the operator + * in any btree opfamilies, mergeopfamilies remains NIL and so the clause + * is not treated as mergejoinable. + */ +} + +/* + * check_hashjoinable + * If the restrictinfo's clause is hashjoinable, set the hashjoin + * info fields in the restrictinfo. + * + * Currently, we support hashjoin for binary opclauses where + * the operator is a hashjoinable operator. The arguments can be + * anything --- as long as there are no volatile functions in them. + */ +static void +check_hashjoinable(RestrictInfo *restrictinfo) +{ + Expr *clause = restrictinfo->clause; + Oid opno; + Node *leftarg; + + if (restrictinfo->pseudoconstant) + return; + if (!is_opclause(clause)) + return; + if (list_length(((OpExpr *) clause)->args) != 2) + return; + + opno = ((OpExpr *) clause)->opno; + leftarg = linitial(((OpExpr *) clause)->args); + + if (op_hashjoinable(opno, exprType(leftarg)) && + !contain_volatile_functions((Node *) restrictinfo)) + restrictinfo->hashjoinoperator = opno; +} + +/* + * check_memoizable + * If the restrictinfo's clause is suitable to be used for a Memoize node, + * set the lefthasheqoperator and righthasheqoperator to the hash equality + * operator that will be needed during caching. + */ +static void +check_memoizable(RestrictInfo *restrictinfo) +{ + TypeCacheEntry *typentry; + Expr *clause = restrictinfo->clause; + Oid lefttype; + Oid righttype; + + if (restrictinfo->pseudoconstant) + return; + if (!is_opclause(clause)) + return; + if (list_length(((OpExpr *) clause)->args) != 2) + return; + + lefttype = exprType(linitial(((OpExpr *) clause)->args)); + + typentry = lookup_type_cache(lefttype, TYPECACHE_HASH_PROC | + TYPECACHE_EQ_OPR); + + if (OidIsValid(typentry->hash_proc) && OidIsValid(typentry->eq_opr)) + restrictinfo->left_hasheqoperator = typentry->eq_opr; + + righttype = exprType(lsecond(((OpExpr *) clause)->args)); + + /* + * Lookup the right type, unless it's the same as the left type, in which + * case typentry is already pointing to the required TypeCacheEntry. + */ + if (lefttype != righttype) + typentry = lookup_type_cache(righttype, TYPECACHE_HASH_PROC | + TYPECACHE_EQ_OPR); + + if (OidIsValid(typentry->hash_proc) && OidIsValid(typentry->eq_opr)) + restrictinfo->right_hasheqoperator = typentry->eq_opr; +} diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c new file mode 100644 index 0000000..9330908 --- /dev/null +++ b/src/backend/optimizer/plan/planagg.c @@ -0,0 +1,513 @@ +/*------------------------------------------------------------------------- + * + * planagg.c + * Special planning for aggregate queries. + * + * This module tries to replace MIN/MAX aggregate functions by subqueries + * of the form + * (SELECT col FROM tab + * WHERE col IS NOT NULL AND existing-quals + * ORDER BY col ASC/DESC + * LIMIT 1) + * Given a suitable index on tab.col, this can be much faster than the + * generic scan-all-the-rows aggregation plan. We can handle multiple + * MIN/MAX aggregates by generating multiple subqueries, and their + * orderings can be different. However, if the query contains any + * non-optimizable aggregates, there's no point since we'll have to + * scan all the rows anyway. + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/planagg.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_type.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/planmain.h" +#include "optimizer/subselect.h" +#include "optimizer/tlist.h" +#include "parser/parse_clause.h" +#include "parser/parsetree.h" +#include "rewrite/rewriteManip.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + +static bool can_minmax_aggs(PlannerInfo *root, List **context); +static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, + Oid eqop, Oid sortop, bool nulls_first); +static void minmax_qp_callback(PlannerInfo *root, void *extra); +static Oid fetch_agg_sort_op(Oid aggfnoid); + + +/* + * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates + * + * Check to see whether the query contains MIN/MAX aggregate functions that + * might be optimizable via indexscans. If it does, and all the aggregates + * are potentially optimizable, then create a MinMaxAggPath and add it to + * the (UPPERREL_GROUP_AGG, NULL) upperrel. + * + * This should be called by grouping_planner() just before it's ready to call + * query_planner(), because we generate indexscan paths by cloning the + * planner's state and invoking query_planner() on a modified version of + * the query parsetree. Thus, all preprocessing needed before query_planner() + * must already be done. This relies on the list of aggregates in + * root->agginfos, so preprocess_aggrefs() must have been called already, too. + */ +void +preprocess_minmax_aggregates(PlannerInfo *root) +{ + Query *parse = root->parse; + FromExpr *jtnode; + RangeTblRef *rtr; + RangeTblEntry *rte; + List *aggs_list; + RelOptInfo *grouped_rel; + ListCell *lc; + + /* minmax_aggs list should be empty at this point */ + Assert(root->minmax_aggs == NIL); + + /* Nothing to do if query has no aggregates */ + if (!parse->hasAggs) + return; + + Assert(!parse->setOperations); /* shouldn't get here if a setop */ + Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */ + + /* + * Reject unoptimizable cases. + * + * We don't handle GROUP BY or windowing, because our current + * implementations of grouping require looking at all the rows anyway, and + * so there's not much point in optimizing MIN/MAX. + */ + if (parse->groupClause || list_length(parse->groupingSets) > 1 || + parse->hasWindowFuncs) + return; + + /* + * Reject if query contains any CTEs; there's no way to build an indexscan + * on one so we couldn't succeed here. (If the CTEs are unreferenced, + * that's not true, but it doesn't seem worth expending cycles to check.) + */ + if (parse->cteList) + return; + + /* + * We also restrict the query to reference exactly one table, since join + * conditions can't be handled reasonably. (We could perhaps handle a + * query containing cartesian-product joins, but it hardly seems worth the + * trouble.) However, the single table could be buried in several levels + * of FromExpr due to subqueries. Note the "single" table could be an + * inheritance parent, too, including the case of a UNION ALL subquery + * that's been flattened to an appendrel. + */ + jtnode = parse->jointree; + while (IsA(jtnode, FromExpr)) + { + if (list_length(jtnode->fromlist) != 1) + return; + jtnode = linitial(jtnode->fromlist); + } + if (!IsA(jtnode, RangeTblRef)) + return; + rtr = (RangeTblRef *) jtnode; + rte = planner_rt_fetch(rtr->rtindex, root); + if (rte->rtekind == RTE_RELATION) + /* ordinary relation, ok */ ; + else if (rte->rtekind == RTE_SUBQUERY && rte->inh) + /* flattened UNION ALL subquery, ok */ ; + else + return; + + /* + * Scan the tlist and HAVING qual to find all the aggregates and verify + * all are MIN/MAX aggregates. Stop as soon as we find one that isn't. + */ + aggs_list = NIL; + if (!can_minmax_aggs(root, &aggs_list)) + return; + + /* + * OK, there is at least the possibility of performing the optimization. + * Build an access path for each aggregate. If any of the aggregates + * prove to be non-indexable, give up; there is no point in optimizing + * just some of them. + */ + foreach(lc, aggs_list) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + Oid eqop; + bool reverse; + + /* + * We'll need the equality operator that goes with the aggregate's + * ordering operator. + */ + eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse); + if (!OidIsValid(eqop)) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + mminfo->aggsortop); + + /* + * We can use either an ordering that gives NULLS FIRST or one that + * gives NULLS LAST; furthermore there's unlikely to be much + * performance difference between them, so it doesn't seem worth + * costing out both ways if we get a hit on the first one. NULLS + * FIRST is more likely to be available if the operator is a + * reverse-sort operator, so try that first if reverse. + */ + if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse)) + continue; + if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse)) + continue; + + /* No indexable path for this aggregate, so fail */ + return; + } + + /* + * OK, we can do the query this way. Prepare to create a MinMaxAggPath + * node. + * + * First, create an output Param node for each agg. (If we end up not + * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg, + * which is not worth worrying about. We can't wait till create_plan time + * to decide whether to make the Param, unfortunately.) + */ + foreach(lc, aggs_list) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + + mminfo->param = + SS_make_initplan_output_param(root, + exprType((Node *) mminfo->target), + -1, + exprCollation((Node *) mminfo->target)); + } + + /* + * Create a MinMaxAggPath node with the appropriate estimated costs and + * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where + * it will compete against the standard aggregate implementation. (It + * will likely always win, but we need not assume that here.) + * + * Note: grouping_planner won't have created this upperrel yet, but it's + * fine for us to create it first. We will not have inserted the correct + * consider_parallel value in it, but MinMaxAggPath paths are currently + * never parallel-safe anyway, so that doesn't matter. Likewise, it + * doesn't matter that we haven't filled FDW-related fields in the rel. + * Also, because there are no rowmarks, we know that the processed_tlist + * doesn't need to change anymore, so making the pathtarget now is safe. + */ + grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); + add_path(grouped_rel, (Path *) + create_minmaxagg_path(root, grouped_rel, + create_pathtarget(root, + root->processed_tlist), + aggs_list, + (List *) parse->havingQual)); +} + +/* + * can_minmax_aggs + * Walk through all the aggregates in the query, and check + * if they are all MIN/MAX aggregates. If so, build a list of the + * distinct aggregate calls in the tree. + * + * Returns false if a non-MIN/MAX aggregate is found, true otherwise. + * + * This does not descend into subqueries, and so should be used only after + * reduction of sublinks to subplans. There mustn't be outer-aggregate + * references either. + */ +static bool +can_minmax_aggs(PlannerInfo *root, List **context) +{ + ListCell *lc; + + foreach(lc, root->agginfos) + { + AggInfo *agginfo = (AggInfo *) lfirst(lc); + Aggref *aggref = agginfo->representative_aggref; + Oid aggsortop; + TargetEntry *curTarget; + MinMaxAggInfo *mminfo; + + Assert(aggref->agglevelsup == 0); + if (list_length(aggref->args) != 1) + return false; /* it couldn't be MIN/MAX */ + + /* + * ORDER BY is usually irrelevant for MIN/MAX, but it can change the + * outcome if the aggsortop's operator class recognizes non-identical + * values as equal. For example, 4.0 and 4.00 are equal according to + * numeric_ops, yet distinguishable. If MIN() receives more than one + * value equal to 4.0 and no value less than 4.0, it is unspecified + * which of those equal values MIN() returns. An ORDER BY expression + * that differs for each of those equal values of the argument + * expression makes the result predictable once again. This is a + * niche requirement, and we do not implement it with subquery paths. + * In any case, this test lets us reject ordered-set aggregates + * quickly. + */ + if (aggref->aggorder != NIL) + return false; + /* note: we do not care if DISTINCT is mentioned ... */ + + /* + * We might implement the optimization when a FILTER clause is present + * by adding the filter to the quals of the generated subquery. For + * now, just punt. + */ + if (aggref->aggfilter != NULL) + return false; + + aggsortop = fetch_agg_sort_op(aggref->aggfnoid); + if (!OidIsValid(aggsortop)) + return false; /* not a MIN/MAX aggregate */ + + curTarget = (TargetEntry *) linitial(aggref->args); + + if (contain_mutable_functions((Node *) curTarget->expr)) + return false; /* not potentially indexable */ + + if (type_is_rowtype(exprType((Node *) curTarget->expr))) + return false; /* IS NOT NULL would have weird semantics */ + + mminfo = makeNode(MinMaxAggInfo); + mminfo->aggfnoid = aggref->aggfnoid; + mminfo->aggsortop = aggsortop; + mminfo->target = curTarget->expr; + mminfo->subroot = NULL; /* don't compute path yet */ + mminfo->path = NULL; + mminfo->pathcost = 0; + mminfo->param = NULL; + + *context = lappend(*context, mminfo); + } + return true; +} + +/* + * build_minmax_path + * Given a MIN/MAX aggregate, try to build an indexscan Path it can be + * optimized with. + * + * If successful, stash the best path in *mminfo and return true. + * Otherwise, return false. + */ +static bool +build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, + Oid eqop, Oid sortop, bool nulls_first) +{ + PlannerInfo *subroot; + Query *parse; + TargetEntry *tle; + List *tlist; + NullTest *ntest; + SortGroupClause *sortcl; + RelOptInfo *final_rel; + Path *sorted_path; + Cost path_cost; + double path_fraction; + + /* + * We are going to construct what is effectively a sub-SELECT query, so + * clone the current query level's state and adjust it to make it look + * like a subquery. Any outer references will now be one level higher + * than before. (This means that when we are done, there will be no Vars + * of level 1, which is why the subquery can become an initplan.) + */ + subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo)); + memcpy(subroot, root, sizeof(PlannerInfo)); + subroot->query_level++; + subroot->parent_root = root; + /* reset subplan-related stuff */ + subroot->plan_params = NIL; + subroot->outer_params = NULL; + subroot->init_plans = NIL; + subroot->agginfos = NIL; + subroot->aggtransinfos = NIL; + + subroot->parse = parse = copyObject(root->parse); + IncrementVarSublevelsUp((Node *) parse, 1, 1); + + /* append_rel_list might contain outer Vars? */ + subroot->append_rel_list = copyObject(root->append_rel_list); + IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1); + /* There shouldn't be any OJ info to translate, as yet */ + Assert(subroot->join_info_list == NIL); + /* and we haven't made equivalence classes, either */ + Assert(subroot->eq_classes == NIL); + /* and we haven't created PlaceHolderInfos, either */ + Assert(subroot->placeholder_list == NIL); + + /*---------- + * Generate modified query of the form + * (SELECT col FROM tab + * WHERE col IS NOT NULL AND existing-quals + * ORDER BY col ASC/DESC + * LIMIT 1) + *---------- + */ + /* single tlist entry that is the aggregate target */ + tle = makeTargetEntry(copyObject(mminfo->target), + (AttrNumber) 1, + pstrdup("agg_target"), + false); + tlist = list_make1(tle); + subroot->processed_tlist = parse->targetList = tlist; + + /* No HAVING, no DISTINCT, no aggregates anymore */ + parse->havingQual = NULL; + subroot->hasHavingQual = false; + parse->distinctClause = NIL; + parse->hasDistinctOn = false; + parse->hasAggs = false; + + /* Build "target IS NOT NULL" expression */ + ntest = makeNode(NullTest); + ntest->nulltesttype = IS_NOT_NULL; + ntest->arg = copyObject(mminfo->target); + /* we checked it wasn't a rowtype in find_minmax_aggs_walker */ + ntest->argisrow = false; + ntest->location = -1; + + /* User might have had that in WHERE already */ + if (!list_member((List *) parse->jointree->quals, ntest)) + parse->jointree->quals = (Node *) + lcons(ntest, (List *) parse->jointree->quals); + + /* Build suitable ORDER BY clause */ + sortcl = makeNode(SortGroupClause); + sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist); + sortcl->eqop = eqop; + sortcl->sortop = sortop; + sortcl->nulls_first = nulls_first; + sortcl->hashable = false; /* no need to make this accurate */ + parse->sortClause = list_make1(sortcl); + + /* set up expressions for LIMIT 1 */ + parse->limitOffset = NULL; + parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, + sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); + + /* + * Generate the best paths for this query, telling query_planner that we + * have LIMIT 1. + */ + subroot->tuple_fraction = 1.0; + subroot->limit_tuples = 1.0; + + final_rel = query_planner(subroot, minmax_qp_callback, NULL); + + /* + * Since we didn't go through subquery_planner() to handle the subquery, + * we have to do some of the same cleanup it would do, in particular cope + * with params and initplans used within this subquery. (This won't + * matter if we end up not using the subplan.) + */ + SS_identify_outer_params(subroot); + SS_charge_for_initplans(subroot, final_rel); + + /* + * Get the best presorted path, that being the one that's cheapest for + * fetching just one row. If there's no such path, fail. + */ + if (final_rel->rows > 1.0) + path_fraction = 1.0 / final_rel->rows; + else + path_fraction = 1.0; + + sorted_path = + get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, + subroot->query_pathkeys, + NULL, + path_fraction); + if (!sorted_path) + return false; + + /* + * The path might not return exactly what we want, so fix that. (We + * assume that this won't change any conclusions about which was the + * cheapest path.) + */ + sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path, + create_pathtarget(subroot, + subroot->processed_tlist)); + + /* + * Determine cost to get just the first row of the presorted path. + * + * Note: cost calculation here should match + * compare_fractional_path_costs(). + */ + path_cost = sorted_path->startup_cost + + path_fraction * (sorted_path->total_cost - sorted_path->startup_cost); + + /* Save state for further processing */ + mminfo->subroot = subroot; + mminfo->path = sorted_path; + mminfo->pathcost = path_cost; + + return true; +} + +/* + * Compute query_pathkeys and other pathkeys during query_planner() + */ +static void +minmax_qp_callback(PlannerInfo *root, void *extra) +{ + root->group_pathkeys = NIL; + root->window_pathkeys = NIL; + root->distinct_pathkeys = NIL; + + root->sort_pathkeys = + make_pathkeys_for_sortclauses(root, + root->parse->sortClause, + root->parse->targetList); + + root->query_pathkeys = root->sort_pathkeys; +} + +/* + * Get the OID of the sort operator, if any, associated with an aggregate. + * Returns InvalidOid if there is no such operator. + */ +static Oid +fetch_agg_sort_op(Oid aggfnoid) +{ + HeapTuple aggTuple; + Form_pg_aggregate aggform; + Oid aggsortop; + + /* fetch aggregate entry from pg_aggregate */ + aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid)); + if (!HeapTupleIsValid(aggTuple)) + return InvalidOid; + aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); + aggsortop = aggform->aggsortop; + ReleaseSysCache(aggTuple); + + return aggsortop; +} diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c new file mode 100644 index 0000000..5a1d006 --- /dev/null +++ b/src/backend/optimizer/plan/planmain.c @@ -0,0 +1,284 @@ +/*------------------------------------------------------------------------- + * + * planmain.c + * Routines to plan a single query + * + * What's in a name, anyway? The top-level entry point of the planner/ + * optimizer is over in planner.c, not here as you might think from the + * file name. But this is the main code for planning a basic join operation, + * shorn of features like subselects, inheritance, aggregates, grouping, + * and so on. (Those are the things planner.c deals with.) + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/planmain.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "optimizer/appendinfo.h" +#include "optimizer/clauses.h" +#include "optimizer/inherit.h" +#include "optimizer/optimizer.h" +#include "optimizer/orclauses.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/placeholder.h" +#include "optimizer/planmain.h" + + +/* + * query_planner + * Generate a path (that is, a simplified plan) for a basic query, + * which may involve joins but not any fancier features. + * + * Since query_planner does not handle the toplevel processing (grouping, + * sorting, etc) it cannot select the best path by itself. Instead, it + * returns the RelOptInfo for the top level of joining, and the caller + * (grouping_planner) can choose among the surviving paths for the rel. + * + * root describes the query to plan + * qp_callback is a function to compute query_pathkeys once it's safe to do so + * qp_extra is optional extra data to pass to qp_callback + * + * Note: the PlannerInfo node also includes a query_pathkeys field, which + * tells query_planner the sort order that is desired in the final output + * plan. This value is *not* available at call time, but is computed by + * qp_callback once we have completed merging the query's equivalence classes. + * (We cannot construct canonical pathkeys until that's done.) + */ +RelOptInfo * +query_planner(PlannerInfo *root, + query_pathkeys_callback qp_callback, void *qp_extra) +{ + Query *parse = root->parse; + List *joinlist; + RelOptInfo *final_rel; + + /* + * Init planner lists to empty. + * + * NOTE: append_rel_list was set up by subquery_planner, so do not touch + * here. + */ + root->join_rel_list = NIL; + root->join_rel_hash = NULL; + root->join_rel_level = NULL; + root->join_cur_level = 0; + root->canon_pathkeys = NIL; + root->left_join_clauses = NIL; + root->right_join_clauses = NIL; + root->full_join_clauses = NIL; + root->join_info_list = NIL; + root->placeholder_list = NIL; + root->fkey_list = NIL; + root->initial_rels = NIL; + + /* + * Set up arrays for accessing base relations and AppendRelInfos. + */ + setup_simple_rel_arrays(root); + + /* + * In the trivial case where the jointree is a single RTE_RESULT relation, + * bypass all the rest of this function and just make a RelOptInfo and its + * one access path. This is worth optimizing because it applies for + * common cases like "SELECT expression" and "INSERT ... VALUES()". + */ + Assert(parse->jointree->fromlist != NIL); + if (list_length(parse->jointree->fromlist) == 1) + { + Node *jtnode = (Node *) linitial(parse->jointree->fromlist); + + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + RangeTblEntry *rte = root->simple_rte_array[varno]; + + Assert(rte != NULL); + if (rte->rtekind == RTE_RESULT) + { + /* Make the RelOptInfo for it directly */ + final_rel = build_simple_rel(root, varno, NULL); + + /* + * If query allows parallelism in general, check whether the + * quals are parallel-restricted. (We need not check + * final_rel->reltarget because it's empty at this point. + * Anything parallel-restricted in the query tlist will be + * dealt with later.) This is normally pretty silly, because + * a Result-only plan would never be interesting to + * parallelize. However, if force_parallel_mode is on, then + * we want to execute the Result in a parallel worker if + * possible, so we must do this. + */ + if (root->glob->parallelModeOK && + force_parallel_mode != FORCE_PARALLEL_OFF) + final_rel->consider_parallel = + is_parallel_safe(root, parse->jointree->quals); + + /* + * The only path for it is a trivial Result path. We cheat a + * bit here by using a GroupResultPath, because that way we + * can just jam the quals into it without preprocessing them. + * (But, if you hold your head at the right angle, a FROM-less + * SELECT is a kind of degenerate-grouping case, so it's not + * that much of a cheat.) + */ + add_path(final_rel, (Path *) + create_group_result_path(root, final_rel, + final_rel->reltarget, + (List *) parse->jointree->quals)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(final_rel); + + /* + * We don't need to run generate_base_implied_equalities, but + * we do need to pretend that EC merging is complete. + */ + root->ec_merging_done = true; + + /* + * We still are required to call qp_callback, in case it's + * something like "SELECT 2+2 ORDER BY 1". + */ + (*qp_callback) (root, qp_extra); + + return final_rel; + } + } + } + + /* + * Construct RelOptInfo nodes for all base relations used in the query. + * Appendrel member relations ("other rels") will be added later. + * + * Note: the reason we find the baserels by searching the jointree, rather + * than scanning the rangetable, is that the rangetable may contain RTEs + * for rels not actively part of the query, for example views. We don't + * want to make RelOptInfos for them. + */ + add_base_rels_to_query(root, (Node *) parse->jointree); + + /* + * Examine the targetlist and join tree, adding entries to baserel + * targetlists for all referenced Vars, and generating PlaceHolderInfo + * entries for all referenced PlaceHolderVars. Restrict and join clauses + * are added to appropriate lists belonging to the mentioned relations. We + * also build EquivalenceClasses for provably equivalent expressions. The + * SpecialJoinInfo list is also built to hold information about join order + * restrictions. Finally, we form a target joinlist for make_one_rel() to + * work from. + */ + build_base_rel_tlists(root, root->processed_tlist); + + find_placeholders_in_jointree(root); + + find_lateral_references(root); + + joinlist = deconstruct_jointree(root); + + /* + * Reconsider any postponed outer-join quals now that we have built up + * equivalence classes. (This could result in further additions or + * mergings of classes.) + */ + reconsider_outer_join_clauses(root); + + /* + * If we formed any equivalence classes, generate additional restriction + * clauses as appropriate. (Implied join clauses are formed on-the-fly + * later.) + */ + generate_base_implied_equalities(root); + + /* + * We have completed merging equivalence sets, so it's now possible to + * generate pathkeys in canonical form; so compute query_pathkeys and + * other pathkeys fields in PlannerInfo. + */ + (*qp_callback) (root, qp_extra); + + /* + * Examine any "placeholder" expressions generated during subquery pullup. + * Make sure that the Vars they need are marked as needed at the relevant + * join level. This must be done before join removal because it might + * cause Vars or placeholders to be needed above a join when they weren't + * so marked before. + */ + fix_placeholder_input_needed_levels(root); + + /* + * Remove any useless outer joins. Ideally this would be done during + * jointree preprocessing, but the necessary information isn't available + * until we've built baserel data structures and classified qual clauses. + */ + joinlist = remove_useless_joins(root, joinlist); + + /* + * Also, reduce any semijoins with unique inner rels to plain inner joins. + * Likewise, this can't be done until now for lack of needed info. + */ + reduce_unique_semijoins(root); + + /* + * Now distribute "placeholders" to base rels as needed. This has to be + * done after join removal because removal could change whether a + * placeholder is evaluable at a base rel. + */ + add_placeholders_to_base_rels(root); + + /* + * Construct the lateral reference sets now that we have finalized + * PlaceHolderVar eval levels. + */ + create_lateral_join_info(root); + + /* + * Match foreign keys to equivalence classes and join quals. This must be + * done after finalizing equivalence classes, and it's useful to wait till + * after join removal so that we can skip processing foreign keys + * involving removed relations. + */ + match_foreign_keys_to_quals(root); + + /* + * Look for join OR clauses that we can extract single-relation + * restriction OR clauses from. + */ + extract_restriction_or_clauses(root); + + /* + * Now expand appendrels by adding "otherrels" for their children. We + * delay this to the end so that we have as much information as possible + * available for each baserel, including all restriction clauses. That + * let us prune away partitions that don't satisfy a restriction clause. + * Also note that some information such as lateral_relids is propagated + * from baserels to otherrels here, so we must have computed it already. + */ + add_other_rels_to_query(root); + + /* + * Distribute any UPDATE/DELETE/MERGE row identity variables to the target + * relations. This can't be done till we've finished expansion of + * appendrels. + */ + distribute_row_identity_vars(root); + + /* + * Ready to do the primary planning. + */ + final_rel = make_one_rel(root, joinlist); + + /* Check that we got at least one usable path */ + if (!final_rel || !final_rel->cheapest_total_path || + final_rel->cheapest_total_path->param_info != NULL) + elog(ERROR, "failed to construct the join relation"); + + return final_rel; +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c new file mode 100644 index 0000000..bd4e4ce --- /dev/null +++ b/src/backend/optimizer/plan/planner.c @@ -0,0 +1,7492 @@ +/*------------------------------------------------------------------------- + * + * planner.c + * The query optimizer external interface. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/planner.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <limits.h> +#include <math.h> + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/parallel.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "executor/nodeAgg.h" +#include "foreign/fdwapi.h" +#include "jit/jit.h" +#include "lib/bipartite_match.h" +#include "lib/knapsack.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#ifdef OPTIMIZER_DEBUG +#include "nodes/print.h" +#endif +#include "optimizer/appendinfo.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/inherit.h" +#include "optimizer/optimizer.h" +#include "optimizer/paramassign.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/plancat.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/prep.h" +#include "optimizer/subselect.h" +#include "optimizer/tlist.h" +#include "parser/analyze.h" +#include "parser/parse_agg.h" +#include "parser/parsetree.h" +#include "partitioning/partdesc.h" +#include "rewrite/rewriteManip.h" +#include "storage/dsm_impl.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/selfuncs.h" +#include "utils/syscache.h" + +/* GUC parameters */ +double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION; +int force_parallel_mode = FORCE_PARALLEL_OFF; +bool parallel_leader_participation = true; + +/* Hook for plugins to get control in planner() */ +planner_hook_type planner_hook = NULL; + +/* Hook for plugins to get control when grouping_planner() plans upper rels */ +create_upper_paths_hook_type create_upper_paths_hook = NULL; + + +/* Expression kind codes for preprocess_expression */ +#define EXPRKIND_QUAL 0 +#define EXPRKIND_TARGET 1 +#define EXPRKIND_RTFUNC 2 +#define EXPRKIND_RTFUNC_LATERAL 3 +#define EXPRKIND_VALUES 4 +#define EXPRKIND_VALUES_LATERAL 5 +#define EXPRKIND_LIMIT 6 +#define EXPRKIND_APPINFO 7 +#define EXPRKIND_PHV 8 +#define EXPRKIND_TABLESAMPLE 9 +#define EXPRKIND_ARBITER_ELEM 10 +#define EXPRKIND_TABLEFUNC 11 +#define EXPRKIND_TABLEFUNC_LATERAL 12 + +/* Passthrough data for standard_qp_callback */ +typedef struct +{ + List *activeWindows; /* active windows, if any */ + List *groupClause; /* overrides parse->groupClause */ +} standard_qp_extra; + +/* + * Data specific to grouping sets + */ + +typedef struct +{ + List *rollups; + List *hash_sets_idx; + double dNumHashGroups; + bool any_hashable; + Bitmapset *unsortable_refs; + Bitmapset *unhashable_refs; + List *unsortable_sets; + int *tleref_to_colnum_map; +} grouping_sets_data; + +/* + * Temporary structure for use during WindowClause reordering in order to be + * able to sort WindowClauses on partitioning/ordering prefix. + */ +typedef struct +{ + WindowClause *wc; + List *uniqueOrder; /* A List of unique ordering/partitioning + * clauses per Window */ +} WindowClauseSortData; + +/* Local functions */ +static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); +static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); +static void grouping_planner(PlannerInfo *root, double tuple_fraction); +static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root); +static List *remap_to_groupclause_idx(List *groupClause, List *gsets, + int *tleref_to_colnum_map); +static void preprocess_rowmarks(PlannerInfo *root); +static double preprocess_limit(PlannerInfo *root, + double tuple_fraction, + int64 *offset_est, int64 *count_est); +static void remove_useless_groupby_columns(PlannerInfo *root); +static List *preprocess_groupclause(PlannerInfo *root, List *force); +static List *extract_rollup_sets(List *groupingSets); +static List *reorder_grouping_sets(List *groupingSets, List *sortclause); +static void standard_qp_callback(PlannerInfo *root, void *extra); +static double get_number_of_groups(PlannerInfo *root, + double path_rows, + grouping_sets_data *gd, + List *target_list); +static RelOptInfo *create_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + PathTarget *target, + bool target_parallel_safe, + grouping_sets_data *gd); +static bool is_degenerate_grouping(PlannerInfo *root); +static void create_degenerate_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + RelOptInfo *grouped_rel); +static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, + PathTarget *target, bool target_parallel_safe, + Node *havingQual); +static void create_ordinary_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + RelOptInfo *grouped_rel, + const AggClauseCosts *agg_costs, + grouping_sets_data *gd, + GroupPathExtraData *extra, + RelOptInfo **partially_grouped_rel_p); +static void consider_groupingsets_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + Path *path, + bool is_sorted, + bool can_hash, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs, + double dNumGroups); +static RelOptInfo *create_window_paths(PlannerInfo *root, + RelOptInfo *input_rel, + PathTarget *input_target, + PathTarget *output_target, + bool output_target_parallel_safe, + WindowFuncLists *wflists, + List *activeWindows); +static void create_one_window_path(PlannerInfo *root, + RelOptInfo *window_rel, + Path *path, + PathTarget *input_target, + PathTarget *output_target, + WindowFuncLists *wflists, + List *activeWindows); +static RelOptInfo *create_distinct_paths(PlannerInfo *root, + RelOptInfo *input_rel); +static void create_partial_distinct_paths(PlannerInfo *root, + RelOptInfo *input_rel, + RelOptInfo *final_distinct_rel); +static RelOptInfo *create_final_distinct_paths(PlannerInfo *root, + RelOptInfo *input_rel, + RelOptInfo *distinct_rel); +static RelOptInfo *create_ordered_paths(PlannerInfo *root, + RelOptInfo *input_rel, + PathTarget *target, + bool target_parallel_safe, + double limit_tuples); +static PathTarget *make_group_input_target(PlannerInfo *root, + PathTarget *final_target); +static PathTarget *make_partial_grouping_target(PlannerInfo *root, + PathTarget *grouping_target, + Node *havingQual); +static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist); +static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists); +static PathTarget *make_window_input_target(PlannerInfo *root, + PathTarget *final_target, + List *activeWindows); +static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, + List *tlist); +static PathTarget *make_sort_input_target(PlannerInfo *root, + PathTarget *final_target, + bool *have_postponed_srfs); +static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, + List *targets, List *targets_contain_srfs); +static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, + RelOptInfo *grouped_rel, + RelOptInfo *partially_grouped_rel, + const AggClauseCosts *agg_costs, + grouping_sets_data *gd, + double dNumGroups, + GroupPathExtraData *extra); +static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + RelOptInfo *input_rel, + grouping_sets_data *gd, + GroupPathExtraData *extra, + bool force_rel_creation); +static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel); +static bool can_partial_agg(PlannerInfo *root); +static void apply_scanjoin_target_to_paths(PlannerInfo *root, + RelOptInfo *rel, + List *scanjoin_targets, + List *scanjoin_targets_contain_srfs, + bool scanjoin_target_parallel_safe, + bool tlist_same_exprs); +static void create_partitionwise_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + RelOptInfo *grouped_rel, + RelOptInfo *partially_grouped_rel, + const AggClauseCosts *agg_costs, + grouping_sets_data *gd, + PartitionwiseAggregateType patype, + GroupPathExtraData *extra); +static bool group_by_has_partkey(RelOptInfo *input_rel, + List *targetList, + List *groupClause); +static int common_prefix_cmp(const void *a, const void *b); + + +/***************************************************************************** + * + * Query optimizer entry point + * + * To support loadable plugins that monitor or modify planner behavior, + * we provide a hook variable that lets a plugin get control before and + * after the standard planning process. The plugin would normally call + * standard_planner(). + * + * Note to plugin authors: standard_planner() scribbles on its Query input, + * so you'd better copy that data structure if you want to plan more than once. + * + *****************************************************************************/ +PlannedStmt * +planner(Query *parse, const char *query_string, int cursorOptions, + ParamListInfo boundParams) +{ + PlannedStmt *result; + + if (planner_hook) + result = (*planner_hook) (parse, query_string, cursorOptions, boundParams); + else + result = standard_planner(parse, query_string, cursorOptions, boundParams); + return result; +} + +PlannedStmt * +standard_planner(Query *parse, const char *query_string, int cursorOptions, + ParamListInfo boundParams) +{ + PlannedStmt *result; + PlannerGlobal *glob; + double tuple_fraction; + PlannerInfo *root; + RelOptInfo *final_rel; + Path *best_path; + Plan *top_plan; + ListCell *lp, + *lr; + + /* + * Set up global state for this planner invocation. This data is needed + * across all levels of sub-Query that might exist in the given command, + * so we keep it in a separate struct that's linked to by each per-Query + * PlannerInfo. + */ + glob = makeNode(PlannerGlobal); + + glob->boundParams = boundParams; + glob->subplans = NIL; + glob->subroots = NIL; + glob->rewindPlanIDs = NULL; + glob->finalrtable = NIL; + glob->finalrowmarks = NIL; + glob->resultRelations = NIL; + glob->appendRelations = NIL; + glob->relationOids = NIL; + glob->invalItems = NIL; + glob->paramExecTypes = NIL; + glob->lastPHId = 0; + glob->lastRowMarkId = 0; + glob->lastPlanNodeId = 0; + glob->transientPlan = false; + glob->dependsOnRole = false; + + /* + * Assess whether it's feasible to use parallel mode for this query. We + * can't do this in a standalone backend, or if the command will try to + * modify any data, or if this is a cursor operation, or if GUCs are set + * to values that don't permit parallelism, or if parallel-unsafe + * functions are present in the query tree. + * + * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE + * MATERIALIZED VIEW to use parallel plans, but this is safe only because + * the command is writing into a completely new table which workers won't + * be able to see. If the workers could see the table, the fact that + * group locking would cause them to ignore the leader's heavyweight + * GIN page locks would make this unsafe. We'll have to fix that somehow + * if we want to allow parallel inserts in general; updates and deletes + * have additional problems especially around combo CIDs.) + * + * For now, we don't try to use parallel mode if we're running inside a + * parallel worker. We might eventually be able to relax this + * restriction, but for now it seems best not to have parallel workers + * trying to create their own parallel workers. + */ + if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 && + IsUnderPostmaster && + parse->commandType == CMD_SELECT && + !parse->hasModifyingCTE && + max_parallel_workers_per_gather > 0 && + !IsParallelWorker()) + { + /* all the cheap tests pass, so scan the query tree */ + glob->maxParallelHazard = max_parallel_hazard(parse); + glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE); + } + else + { + /* skip the query tree scan, just assume it's unsafe */ + glob->maxParallelHazard = PROPARALLEL_UNSAFE; + glob->parallelModeOK = false; + } + + /* + * glob->parallelModeNeeded is normally set to false here and changed to + * true during plan creation if a Gather or Gather Merge plan is actually + * created (cf. create_gather_plan, create_gather_merge_plan). + * + * However, if force_parallel_mode = on or force_parallel_mode = regress, + * then we impose parallel mode whenever it's safe to do so, even if the + * final plan doesn't use parallelism. It's not safe to do so if the + * query contains anything parallel-unsafe; parallelModeOK will be false + * in that case. Note that parallelModeOK can't change after this point. + * Otherwise, everything in the query is either parallel-safe or + * parallel-restricted, and in either case it should be OK to impose + * parallel-mode restrictions. If that ends up breaking something, then + * either some function the user included in the query is incorrectly + * labeled as parallel-safe or parallel-restricted when in reality it's + * parallel-unsafe, or else the query planner itself has a bug. + */ + glob->parallelModeNeeded = glob->parallelModeOK && + (force_parallel_mode != FORCE_PARALLEL_OFF); + + /* Determine what fraction of the plan is likely to be scanned */ + if (cursorOptions & CURSOR_OPT_FAST_PLAN) + { + /* + * We have no real idea how many tuples the user will ultimately FETCH + * from a cursor, but it is often the case that he doesn't want 'em + * all, or would prefer a fast-start plan anyway so that he can + * process some of the tuples sooner. Use a GUC parameter to decide + * what fraction to optimize for. + */ + tuple_fraction = cursor_tuple_fraction; + + /* + * We document cursor_tuple_fraction as simply being a fraction, which + * means the edge cases 0 and 1 have to be treated specially here. We + * convert 1 to 0 ("all the tuples") and 0 to a very small fraction. + */ + if (tuple_fraction >= 1.0) + tuple_fraction = 0.0; + else if (tuple_fraction <= 0.0) + tuple_fraction = 1e-10; + } + else + { + /* Default assumption is we need all the tuples */ + tuple_fraction = 0.0; + } + + /* primary planning entry point (may recurse for subqueries) */ + root = subquery_planner(glob, parse, NULL, + false, tuple_fraction); + + /* Select best Path and turn it into a Plan */ + final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); + best_path = get_cheapest_fractional_path(final_rel, tuple_fraction); + + top_plan = create_plan(root, best_path); + + /* + * If creating a plan for a scrollable cursor, make sure it can run + * backwards on demand. Add a Material node at the top at need. + */ + if (cursorOptions & CURSOR_OPT_SCROLL) + { + if (!ExecSupportsBackwardScan(top_plan)) + top_plan = materialize_finished_plan(top_plan); + } + + /* + * Optionally add a Gather node for testing purposes, provided this is + * actually a safe thing to do. + */ + if (force_parallel_mode != FORCE_PARALLEL_OFF && top_plan->parallel_safe) + { + Gather *gather = makeNode(Gather); + + /* + * Top plan must not have any initPlans, else it shouldn't have been + * marked parallel-safe. + */ + Assert(top_plan->initPlan == NIL); + + gather->plan.targetlist = top_plan->targetlist; + gather->plan.qual = NIL; + gather->plan.lefttree = top_plan; + gather->plan.righttree = NULL; + gather->num_workers = 1; + gather->single_copy = true; + gather->invisible = (force_parallel_mode == FORCE_PARALLEL_REGRESS); + + /* + * Since this Gather has no parallel-aware descendants to signal to, + * we don't need a rescan Param. + */ + gather->rescan_param = -1; + + /* + * Ideally we'd use cost_gather here, but setting up dummy path data + * to satisfy it doesn't seem much cleaner than knowing what it does. + */ + gather->plan.startup_cost = top_plan->startup_cost + + parallel_setup_cost; + gather->plan.total_cost = top_plan->total_cost + + parallel_setup_cost + parallel_tuple_cost * top_plan->plan_rows; + gather->plan.plan_rows = top_plan->plan_rows; + gather->plan.plan_width = top_plan->plan_width; + gather->plan.parallel_aware = false; + gather->plan.parallel_safe = false; + + /* use parallel mode for parallel plans. */ + root->glob->parallelModeNeeded = true; + + top_plan = &gather->plan; + } + + /* + * If any Params were generated, run through the plan tree and compute + * each plan node's extParam/allParam sets. Ideally we'd merge this into + * set_plan_references' tree traversal, but for now it has to be separate + * because we need to visit subplans before not after main plan. + */ + if (glob->paramExecTypes != NIL) + { + Assert(list_length(glob->subplans) == list_length(glob->subroots)); + forboth(lp, glob->subplans, lr, glob->subroots) + { + Plan *subplan = (Plan *) lfirst(lp); + PlannerInfo *subroot = lfirst_node(PlannerInfo, lr); + + SS_finalize_plan(subroot, subplan); + } + SS_finalize_plan(root, top_plan); + } + + /* final cleanup of the plan */ + Assert(glob->finalrtable == NIL); + Assert(glob->finalrowmarks == NIL); + Assert(glob->resultRelations == NIL); + Assert(glob->appendRelations == NIL); + top_plan = set_plan_references(root, top_plan); + /* ... and the subplans (both regular subplans and initplans) */ + Assert(list_length(glob->subplans) == list_length(glob->subroots)); + forboth(lp, glob->subplans, lr, glob->subroots) + { + Plan *subplan = (Plan *) lfirst(lp); + PlannerInfo *subroot = lfirst_node(PlannerInfo, lr); + + lfirst(lp) = set_plan_references(subroot, subplan); + } + + /* build the PlannedStmt result */ + result = makeNode(PlannedStmt); + + result->commandType = parse->commandType; + result->queryId = parse->queryId; + result->hasReturning = (parse->returningList != NIL); + result->hasModifyingCTE = parse->hasModifyingCTE; + result->canSetTag = parse->canSetTag; + result->transientPlan = glob->transientPlan; + result->dependsOnRole = glob->dependsOnRole; + result->parallelModeNeeded = glob->parallelModeNeeded; + result->planTree = top_plan; + result->rtable = glob->finalrtable; + result->resultRelations = glob->resultRelations; + result->appendRelations = glob->appendRelations; + result->subplans = glob->subplans; + result->rewindPlanIDs = glob->rewindPlanIDs; + result->rowMarks = glob->finalrowmarks; + result->relationOids = glob->relationOids; + result->invalItems = glob->invalItems; + result->paramExecTypes = glob->paramExecTypes; + /* utilityStmt should be null, but we might as well copy it */ + result->utilityStmt = parse->utilityStmt; + result->stmt_location = parse->stmt_location; + result->stmt_len = parse->stmt_len; + + result->jitFlags = PGJIT_NONE; + if (jit_enabled && jit_above_cost >= 0 && + top_plan->total_cost > jit_above_cost) + { + result->jitFlags |= PGJIT_PERFORM; + + /* + * Decide how much effort should be put into generating better code. + */ + if (jit_optimize_above_cost >= 0 && + top_plan->total_cost > jit_optimize_above_cost) + result->jitFlags |= PGJIT_OPT3; + if (jit_inline_above_cost >= 0 && + top_plan->total_cost > jit_inline_above_cost) + result->jitFlags |= PGJIT_INLINE; + + /* + * Decide which operations should be JITed. + */ + if (jit_expressions) + result->jitFlags |= PGJIT_EXPR; + if (jit_tuple_deforming) + result->jitFlags |= PGJIT_DEFORM; + } + + if (glob->partition_directory != NULL) + DestroyPartitionDirectory(glob->partition_directory); + + return result; +} + + +/*-------------------- + * subquery_planner + * Invokes the planner on a subquery. We recurse to here for each + * sub-SELECT found in the query tree. + * + * glob is the global state for the current planner run. + * parse is the querytree produced by the parser & rewriter. + * parent_root is the immediate parent Query's info (NULL at the top level). + * hasRecursion is true if this is a recursive WITH query. + * tuple_fraction is the fraction of tuples we expect will be retrieved. + * tuple_fraction is interpreted as explained for grouping_planner, below. + * + * Basically, this routine does the stuff that should only be done once + * per Query object. It then calls grouping_planner. At one time, + * grouping_planner could be invoked recursively on the same Query object; + * that's not currently true, but we keep the separation between the two + * routines anyway, in case we need it again someday. + * + * subquery_planner will be called recursively to handle sub-Query nodes + * found within the query's expressions and rangetable. + * + * Returns the PlannerInfo struct ("root") that contains all data generated + * while planning the subquery. In particular, the Path(s) attached to + * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the + * cheapest way(s) to implement the query. The top level will select the + * best Path and pass it through createplan.c to produce a finished Plan. + *-------------------- + */ +PlannerInfo * +subquery_planner(PlannerGlobal *glob, Query *parse, + PlannerInfo *parent_root, + bool hasRecursion, double tuple_fraction) +{ + PlannerInfo *root; + List *newWithCheckOptions; + List *newHaving; + bool hasOuterJoins; + bool hasResultRTEs; + RelOptInfo *final_rel; + ListCell *l; + + /* Create a PlannerInfo data structure for this subquery */ + root = makeNode(PlannerInfo); + root->parse = parse; + root->glob = glob; + root->query_level = parent_root ? parent_root->query_level + 1 : 1; + root->parent_root = parent_root; + root->plan_params = NIL; + root->outer_params = NULL; + root->planner_cxt = CurrentMemoryContext; + root->init_plans = NIL; + root->cte_plan_ids = NIL; + root->multiexpr_params = NIL; + root->eq_classes = NIL; + root->ec_merging_done = false; + root->all_result_relids = + parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL; + root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */ + root->append_rel_list = NIL; + root->row_identity_vars = NIL; + root->rowMarks = NIL; + memset(root->upper_rels, 0, sizeof(root->upper_rels)); + memset(root->upper_targets, 0, sizeof(root->upper_targets)); + root->processed_tlist = NIL; + root->update_colnos = NIL; + root->grouping_map = NULL; + root->minmax_aggs = NIL; + root->qual_security_level = 0; + root->hasPseudoConstantQuals = false; + root->hasAlternativeSubPlans = false; + root->hasRecursion = hasRecursion; + if (hasRecursion) + root->wt_param_id = assign_special_exec_param(root); + else + root->wt_param_id = -1; + root->non_recursive_path = NULL; + root->partColsUpdated = false; + + /* + * If there is a WITH list, process each WITH query and either convert it + * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it. + */ + if (parse->cteList) + SS_process_ctes(root); + + /* + * If it's a MERGE command, transform the joinlist as appropriate. + */ + transform_MERGE_to_join(parse); + + /* + * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so + * that we don't need so many special cases to deal with that situation. + */ + replace_empty_jointree(parse); + + /* + * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try + * to transform them into joins. Note that this step does not descend + * into subqueries; if we pull up any subqueries below, their SubLinks are + * processed just before pulling them up. + */ + if (parse->hasSubLinks) + pull_up_sublinks(root); + + /* + * Scan the rangetable for function RTEs, do const-simplification on them, + * and then inline them if possible (producing subqueries that might get + * pulled up next). Recursion issues here are handled in the same way as + * for SubLinks. + */ + preprocess_function_rtes(root); + + /* + * Check to see if any subqueries in the jointree can be merged into this + * query. + */ + pull_up_subqueries(root); + + /* + * If this is a simple UNION ALL query, flatten it into an appendrel. We + * do this now because it requires applying pull_up_subqueries to the leaf + * queries of the UNION ALL, which weren't touched above because they + * weren't referenced by the jointree (they will be after we do this). + */ + if (parse->setOperations) + flatten_simple_union_all(root); + + /* + * Survey the rangetable to see what kinds of entries are present. We can + * skip some later processing if relevant SQL features are not used; for + * example if there are no JOIN RTEs we can avoid the expense of doing + * flatten_join_alias_vars(). This must be done after we have finished + * adding rangetable entries, of course. (Note: actually, processing of + * inherited or partitioned rels can cause RTEs for their child tables to + * get added later; but those must all be RTE_RELATION entries, so they + * don't invalidate the conclusions drawn here.) + */ + root->hasJoinRTEs = false; + root->hasLateralRTEs = false; + hasOuterJoins = false; + hasResultRTEs = false; + foreach(l, parse->rtable) + { + RangeTblEntry *rte = lfirst_node(RangeTblEntry, l); + + switch (rte->rtekind) + { + case RTE_RELATION: + if (rte->inh) + { + /* + * Check to see if the relation actually has any children; + * if not, clear the inh flag so we can treat it as a + * plain base relation. + * + * Note: this could give a false-positive result, if the + * rel once had children but no longer does. We used to + * be able to clear rte->inh later on when we discovered + * that, but no more; we have to handle such cases as + * full-fledged inheritance. + */ + rte->inh = has_subclass(rte->relid); + } + break; + case RTE_JOIN: + root->hasJoinRTEs = true; + if (IS_OUTER_JOIN(rte->jointype)) + hasOuterJoins = true; + break; + case RTE_RESULT: + hasResultRTEs = true; + break; + default: + /* No work here for other RTE types */ + break; + } + + if (rte->lateral) + root->hasLateralRTEs = true; + + /* + * We can also determine the maximum security level required for any + * securityQuals now. Addition of inheritance-child RTEs won't affect + * this, because child tables don't have their own securityQuals; see + * expand_single_inheritance_child(). + */ + if (rte->securityQuals) + root->qual_security_level = Max(root->qual_security_level, + list_length(rte->securityQuals)); + } + + /* + * If we have now verified that the query target relation is + * non-inheriting, mark it as a leaf target. + */ + if (parse->resultRelation) + { + RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable); + + if (!rte->inh) + root->leaf_result_relids = + bms_make_singleton(parse->resultRelation); + } + + /* + * Preprocess RowMark information. We need to do this after subquery + * pullup, so that all base relations are present. + */ + preprocess_rowmarks(root); + + /* + * Set hasHavingQual to remember if HAVING clause is present. Needed + * because preprocess_expression will reduce a constant-true condition to + * an empty qual list ... but "HAVING TRUE" is not a semantic no-op. + */ + root->hasHavingQual = (parse->havingQual != NULL); + + /* + * Do expression preprocessing on targetlist and quals, as well as other + * random expressions in the querytree. Note that we do not need to + * handle sort/group expressions explicitly, because they are actually + * part of the targetlist. + */ + parse->targetList = (List *) + preprocess_expression(root, (Node *) parse->targetList, + EXPRKIND_TARGET); + + /* Constant-folding might have removed all set-returning functions */ + if (parse->hasTargetSRFs) + parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList); + + newWithCheckOptions = NIL; + foreach(l, parse->withCheckOptions) + { + WithCheckOption *wco = lfirst_node(WithCheckOption, l); + + wco->qual = preprocess_expression(root, wco->qual, + EXPRKIND_QUAL); + if (wco->qual != NULL) + newWithCheckOptions = lappend(newWithCheckOptions, wco); + } + parse->withCheckOptions = newWithCheckOptions; + + parse->returningList = (List *) + preprocess_expression(root, (Node *) parse->returningList, + EXPRKIND_TARGET); + + preprocess_qual_conditions(root, (Node *) parse->jointree); + + parse->havingQual = preprocess_expression(root, parse->havingQual, + EXPRKIND_QUAL); + + foreach(l, parse->windowClause) + { + WindowClause *wc = lfirst_node(WindowClause, l); + + /* partitionClause/orderClause are sort/group expressions */ + wc->startOffset = preprocess_expression(root, wc->startOffset, + EXPRKIND_LIMIT); + wc->endOffset = preprocess_expression(root, wc->endOffset, + EXPRKIND_LIMIT); + wc->runCondition = (List *) preprocess_expression(root, + (Node *) wc->runCondition, + EXPRKIND_TARGET); + } + + parse->limitOffset = preprocess_expression(root, parse->limitOffset, + EXPRKIND_LIMIT); + parse->limitCount = preprocess_expression(root, parse->limitCount, + EXPRKIND_LIMIT); + + if (parse->onConflict) + { + parse->onConflict->arbiterElems = (List *) + preprocess_expression(root, + (Node *) parse->onConflict->arbiterElems, + EXPRKIND_ARBITER_ELEM); + parse->onConflict->arbiterWhere = + preprocess_expression(root, + parse->onConflict->arbiterWhere, + EXPRKIND_QUAL); + parse->onConflict->onConflictSet = (List *) + preprocess_expression(root, + (Node *) parse->onConflict->onConflictSet, + EXPRKIND_TARGET); + parse->onConflict->onConflictWhere = + preprocess_expression(root, + parse->onConflict->onConflictWhere, + EXPRKIND_QUAL); + /* exclRelTlist contains only Vars, so no preprocessing needed */ + } + + foreach(l, parse->mergeActionList) + { + MergeAction *action = (MergeAction *) lfirst(l); + + action->targetList = (List *) + preprocess_expression(root, + (Node *) action->targetList, + EXPRKIND_TARGET); + action->qual = + preprocess_expression(root, + (Node *) action->qual, + EXPRKIND_QUAL); + } + + root->append_rel_list = (List *) + preprocess_expression(root, (Node *) root->append_rel_list, + EXPRKIND_APPINFO); + + /* Also need to preprocess expressions within RTEs */ + foreach(l, parse->rtable) + { + RangeTblEntry *rte = lfirst_node(RangeTblEntry, l); + int kind; + ListCell *lcsq; + + if (rte->rtekind == RTE_RELATION) + { + if (rte->tablesample) + rte->tablesample = (TableSampleClause *) + preprocess_expression(root, + (Node *) rte->tablesample, + EXPRKIND_TABLESAMPLE); + } + else if (rte->rtekind == RTE_SUBQUERY) + { + /* + * We don't want to do all preprocessing yet on the subquery's + * expressions, since that will happen when we plan it. But if it + * contains any join aliases of our level, those have to get + * expanded now, because planning of the subquery won't do it. + * That's only possible if the subquery is LATERAL. + */ + if (rte->lateral && root->hasJoinRTEs) + rte->subquery = (Query *) + flatten_join_alias_vars(root->parse, + (Node *) rte->subquery); + } + else if (rte->rtekind == RTE_FUNCTION) + { + /* Preprocess the function expression(s) fully */ + kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC; + rte->functions = (List *) + preprocess_expression(root, (Node *) rte->functions, kind); + } + else if (rte->rtekind == RTE_TABLEFUNC) + { + /* Preprocess the function expression(s) fully */ + kind = rte->lateral ? EXPRKIND_TABLEFUNC_LATERAL : EXPRKIND_TABLEFUNC; + rte->tablefunc = (TableFunc *) + preprocess_expression(root, (Node *) rte->tablefunc, kind); + } + else if (rte->rtekind == RTE_VALUES) + { + /* Preprocess the values lists fully */ + kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES; + rte->values_lists = (List *) + preprocess_expression(root, (Node *) rte->values_lists, kind); + } + + /* + * Process each element of the securityQuals list as if it were a + * separate qual expression (as indeed it is). We need to do it this + * way to get proper canonicalization of AND/OR structure. Note that + * this converts each element into an implicit-AND sublist. + */ + foreach(lcsq, rte->securityQuals) + { + lfirst(lcsq) = preprocess_expression(root, + (Node *) lfirst(lcsq), + EXPRKIND_QUAL); + } + } + + /* + * Now that we are done preprocessing expressions, and in particular done + * flattening join alias variables, get rid of the joinaliasvars lists. + * They no longer match what expressions in the rest of the tree look + * like, because we have not preprocessed expressions in those lists (and + * do not want to; for example, expanding a SubLink there would result in + * a useless unreferenced subplan). Leaving them in place simply creates + * a hazard for later scans of the tree. We could try to prevent that by + * using QTW_IGNORE_JOINALIASES in every tree scan done after this point, + * but that doesn't sound very reliable. + */ + if (root->hasJoinRTEs) + { + foreach(l, parse->rtable) + { + RangeTblEntry *rte = lfirst_node(RangeTblEntry, l); + + rte->joinaliasvars = NIL; + } + } + + /* + * In some cases we may want to transfer a HAVING clause into WHERE. We + * cannot do so if the HAVING clause contains aggregates (obviously) or + * volatile functions (since a HAVING clause is supposed to be executed + * only once per group). We also can't do this if there are any nonempty + * grouping sets; moving such a clause into WHERE would potentially change + * the results, if any referenced column isn't present in all the grouping + * sets. (If there are only empty grouping sets, then the HAVING clause + * must be degenerate as discussed below.) + * + * Also, it may be that the clause is so expensive to execute that we're + * better off doing it only once per group, despite the loss of + * selectivity. This is hard to estimate short of doing the entire + * planning process twice, so we use a heuristic: clauses containing + * subplans are left in HAVING. Otherwise, we move or copy the HAVING + * clause into WHERE, in hopes of eliminating tuples before aggregation + * instead of after. + * + * If the query has explicit grouping then we can simply move such a + * clause into WHERE; any group that fails the clause will not be in the + * output because none of its tuples will reach the grouping or + * aggregation stage. Otherwise we must have a degenerate (variable-free) + * HAVING clause, which we put in WHERE so that query_planner() can use it + * in a gating Result node, but also keep in HAVING to ensure that we + * don't emit a bogus aggregated row. (This could be done better, but it + * seems not worth optimizing.) + * + * Note that both havingQual and parse->jointree->quals are in + * implicitly-ANDed-list form at this point, even though they are declared + * as Node *. + */ + newHaving = NIL; + foreach(l, (List *) parse->havingQual) + { + Node *havingclause = (Node *) lfirst(l); + + if ((parse->groupClause && parse->groupingSets) || + contain_agg_clause(havingclause) || + contain_volatile_functions(havingclause) || + contain_subplans(havingclause)) + { + /* keep it in HAVING */ + newHaving = lappend(newHaving, havingclause); + } + else if (parse->groupClause && !parse->groupingSets) + { + /* move it to WHERE */ + parse->jointree->quals = (Node *) + lappend((List *) parse->jointree->quals, havingclause); + } + else + { + /* put a copy in WHERE, keep it in HAVING */ + parse->jointree->quals = (Node *) + lappend((List *) parse->jointree->quals, + copyObject(havingclause)); + newHaving = lappend(newHaving, havingclause); + } + } + parse->havingQual = (Node *) newHaving; + + /* Remove any redundant GROUP BY columns */ + remove_useless_groupby_columns(root); + + /* + * If we have any outer joins, try to reduce them to plain inner joins. + * This step is most easily done after we've done expression + * preprocessing. + */ + if (hasOuterJoins) + reduce_outer_joins(root); + + /* + * If we have any RTE_RESULT relations, see if they can be deleted from + * the jointree. This step is most effectively done after we've done + * expression preprocessing and outer join reduction. + */ + if (hasResultRTEs) + remove_useless_result_rtes(root); + + /* + * Do the main planning. + */ + grouping_planner(root, tuple_fraction); + + /* + * Capture the set of outer-level param IDs we have access to, for use in + * extParam/allParam calculations later. + */ + SS_identify_outer_params(root); + + /* + * If any initPlans were created in this query level, adjust the surviving + * Paths' costs and parallel-safety flags to account for them. The + * initPlans won't actually get attached to the plan tree till + * create_plan() runs, but we must include their effects now. + */ + final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); + SS_charge_for_initplans(root, final_rel); + + /* + * Make sure we've identified the cheapest Path for the final rel. (By + * doing this here not in grouping_planner, we include initPlan costs in + * the decision, though it's unlikely that will change anything.) + */ + set_cheapest(final_rel); + + return root; +} + +/* + * preprocess_expression + * Do subquery_planner's preprocessing work for an expression, + * which can be a targetlist, a WHERE clause (including JOIN/ON + * conditions), a HAVING clause, or a few other things. + */ +static Node * +preprocess_expression(PlannerInfo *root, Node *expr, int kind) +{ + /* + * Fall out quickly if expression is empty. This occurs often enough to + * be worth checking. Note that null->null is the correct conversion for + * implicit-AND result format, too. + */ + if (expr == NULL) + return NULL; + + /* + * If the query has any join RTEs, replace join alias variables with + * base-relation variables. We must do this first, since any expressions + * we may extract from the joinaliasvars lists have not been preprocessed. + * For example, if we did this after sublink processing, sublinks expanded + * out from join aliases would not get processed. But we can skip this in + * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since + * they can't contain any Vars of the current query level. + */ + if (root->hasJoinRTEs && + !(kind == EXPRKIND_RTFUNC || + kind == EXPRKIND_VALUES || + kind == EXPRKIND_TABLESAMPLE || + kind == EXPRKIND_TABLEFUNC)) + expr = flatten_join_alias_vars(root->parse, expr); + + /* + * Simplify constant expressions. For function RTEs, this was already + * done by preprocess_function_rtes. (But note we must do it again for + * EXPRKIND_RTFUNC_LATERAL, because those might by now contain + * un-simplified subexpressions inserted by flattening of subqueries or + * join alias variables.) + * + * Note: an essential effect of this is to convert named-argument function + * calls to positional notation and insert the current actual values of + * any default arguments for functions. To ensure that happens, we *must* + * process all expressions here. Previous PG versions sometimes skipped + * const-simplification if it didn't seem worth the trouble, but we can't + * do that anymore. + * + * Note: this also flattens nested AND and OR expressions into N-argument + * form. All processing of a qual expression after this point must be + * careful to maintain AND/OR flatness --- that is, do not generate a tree + * with AND directly under AND, nor OR directly under OR. + */ + if (kind != EXPRKIND_RTFUNC) + expr = eval_const_expressions(root, expr); + + /* + * If it's a qual or havingQual, canonicalize it. + */ + if (kind == EXPRKIND_QUAL) + { + expr = (Node *) canonicalize_qual((Expr *) expr, false); + +#ifdef OPTIMIZER_DEBUG + printf("After canonicalize_qual()\n"); + pprint(expr); +#endif + } + + /* + * Check for ANY ScalarArrayOpExpr with Const arrays and set the + * hashfuncid of any that might execute more quickly by using hash lookups + * instead of a linear search. + */ + if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET) + { + convert_saop_to_hashed_saop(expr); + } + + /* Expand SubLinks to SubPlans */ + if (root->parse->hasSubLinks) + expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL)); + + /* + * XXX do not insert anything here unless you have grokked the comments in + * SS_replace_correlation_vars ... + */ + + /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */ + if (root->query_level > 1) + expr = SS_replace_correlation_vars(root, expr); + + /* + * If it's a qual or havingQual, convert it to implicit-AND format. (We + * don't want to do this before eval_const_expressions, since the latter + * would be unable to simplify a top-level AND correctly. Also, + * SS_process_sublinks expects explicit-AND format.) + */ + if (kind == EXPRKIND_QUAL) + expr = (Node *) make_ands_implicit((Expr *) expr); + + return expr; +} + +/* + * preprocess_qual_conditions + * Recursively scan the query's jointree and do subquery_planner's + * preprocessing work on each qual condition found therein. + */ +static void +preprocess_qual_conditions(PlannerInfo *root, Node *jtnode) +{ + if (jtnode == NULL) + return; + if (IsA(jtnode, RangeTblRef)) + { + /* nothing to do here */ + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *l; + + foreach(l, f->fromlist) + preprocess_qual_conditions(root, lfirst(l)); + + f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + preprocess_qual_conditions(root, j->larg); + preprocess_qual_conditions(root, j->rarg); + + j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL); + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); +} + +/* + * preprocess_phv_expression + * Do preprocessing on a PlaceHolderVar expression that's been pulled up. + * + * If a LATERAL subquery references an output of another subquery, and that + * output must be wrapped in a PlaceHolderVar because of an intermediate outer + * join, then we'll push the PlaceHolderVar expression down into the subquery + * and later pull it back up during find_lateral_references, which runs after + * subquery_planner has preprocessed all the expressions that were in the + * current query level to start with. So we need to preprocess it then. + */ +Expr * +preprocess_phv_expression(PlannerInfo *root, Expr *expr) +{ + return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV); +} + +/*-------------------- + * grouping_planner + * Perform planning steps related to grouping, aggregation, etc. + * + * This function adds all required top-level processing to the scan/join + * Path(s) produced by query_planner. + * + * tuple_fraction is the fraction of tuples we expect will be retrieved. + * tuple_fraction is interpreted as follows: + * 0: expect all tuples to be retrieved (normal case) + * 0 < tuple_fraction < 1: expect the given fraction of tuples available + * from the plan to be retrieved + * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples + * expected to be retrieved (ie, a LIMIT specification) + * + * Returns nothing; the useful output is in the Paths we attach to the + * (UPPERREL_FINAL, NULL) upperrel in *root. In addition, + * root->processed_tlist contains the final processed targetlist. + * + * Note that we have not done set_cheapest() on the final rel; it's convenient + * to leave this to the caller. + *-------------------- + */ +static void +grouping_planner(PlannerInfo *root, double tuple_fraction) +{ + Query *parse = root->parse; + int64 offset_est = 0; + int64 count_est = 0; + double limit_tuples = -1.0; + bool have_postponed_srfs = false; + PathTarget *final_target; + List *final_targets; + List *final_targets_contain_srfs; + bool final_target_parallel_safe; + RelOptInfo *current_rel; + RelOptInfo *final_rel; + FinalPathExtraData extra; + ListCell *lc; + + /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ + if (parse->limitCount || parse->limitOffset) + { + tuple_fraction = preprocess_limit(root, tuple_fraction, + &offset_est, &count_est); + + /* + * If we have a known LIMIT, and don't have an unknown OFFSET, we can + * estimate the effects of using a bounded sort. + */ + if (count_est > 0 && offset_est >= 0) + limit_tuples = (double) count_est + (double) offset_est; + } + + /* Make tuple_fraction accessible to lower-level routines */ + root->tuple_fraction = tuple_fraction; + + if (parse->setOperations) + { + /* + * If there's a top-level ORDER BY, assume we have to fetch all the + * tuples. This might be too simplistic given all the hackery below + * to possibly avoid the sort; but the odds of accurate estimates here + * are pretty low anyway. XXX try to get rid of this in favor of + * letting plan_set_operations generate both fast-start and + * cheapest-total paths. + */ + if (parse->sortClause) + root->tuple_fraction = 0.0; + + /* + * Construct Paths for set operations. The results will not need any + * work except perhaps a top-level sort and/or LIMIT. Note that any + * special work for recursive unions is the responsibility of + * plan_set_operations. + */ + current_rel = plan_set_operations(root); + + /* + * We should not need to call preprocess_targetlist, since we must be + * in a SELECT query node. Instead, use the processed_tlist returned + * by plan_set_operations (since this tells whether it returned any + * resjunk columns!), and transfer any sort key information from the + * original tlist. + */ + Assert(parse->commandType == CMD_SELECT); + + /* for safety, copy processed_tlist instead of modifying in-place */ + root->processed_tlist = + postprocess_setop_tlist(copyObject(root->processed_tlist), + parse->targetList); + + /* Also extract the PathTarget form of the setop result tlist */ + final_target = current_rel->cheapest_total_path->pathtarget; + + /* And check whether it's parallel safe */ + final_target_parallel_safe = + is_parallel_safe(root, (Node *) final_target->exprs); + + /* The setop result tlist couldn't contain any SRFs */ + Assert(!parse->hasTargetSRFs); + final_targets = final_targets_contain_srfs = NIL; + + /* + * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have + * checked already, but let's make sure). + */ + if (parse->rowMarks) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /*------ + translator: %s is a SQL row locking clause such as FOR UPDATE */ + errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT", + LCS_asString(linitial_node(RowMarkClause, + parse->rowMarks)->strength)))); + + /* + * Calculate pathkeys that represent result ordering requirements + */ + Assert(parse->distinctClause == NIL); + root->sort_pathkeys = make_pathkeys_for_sortclauses(root, + parse->sortClause, + root->processed_tlist); + } + else + { + /* No set operations, do regular planning */ + PathTarget *sort_input_target; + List *sort_input_targets; + List *sort_input_targets_contain_srfs; + bool sort_input_target_parallel_safe; + PathTarget *grouping_target; + List *grouping_targets; + List *grouping_targets_contain_srfs; + bool grouping_target_parallel_safe; + PathTarget *scanjoin_target; + List *scanjoin_targets; + List *scanjoin_targets_contain_srfs; + bool scanjoin_target_parallel_safe; + bool scanjoin_target_same_exprs; + bool have_grouping; + WindowFuncLists *wflists = NULL; + List *activeWindows = NIL; + grouping_sets_data *gset_data = NULL; + standard_qp_extra qp_extra; + + /* A recursive query should always have setOperations */ + Assert(!root->hasRecursion); + + /* Preprocess grouping sets and GROUP BY clause, if any */ + if (parse->groupingSets) + { + gset_data = preprocess_grouping_sets(root); + } + else + { + /* Preprocess regular GROUP BY clause, if any */ + if (parse->groupClause) + parse->groupClause = preprocess_groupclause(root, NIL); + } + + /* + * Preprocess targetlist. Note that much of the remaining planning + * work will be done with the PathTarget representation of tlists, but + * we must also maintain the full representation of the final tlist so + * that we can transfer its decoration (resnames etc) to the topmost + * tlist of the finished Plan. This is kept in processed_tlist. + */ + preprocess_targetlist(root); + + /* + * Mark all the aggregates with resolved aggtranstypes, and detect + * aggregates that are duplicates or can share transition state. We + * must do this before slicing and dicing the tlist into various + * pathtargets, else some copies of the Aggref nodes might escape + * being marked. + */ + if (parse->hasAggs) + { + preprocess_aggrefs(root, (Node *) root->processed_tlist); + preprocess_aggrefs(root, (Node *) parse->havingQual); + } + + /* + * Locate any window functions in the tlist. (We don't need to look + * anywhere else, since expressions used in ORDER BY will be in there + * too.) Note that they could all have been eliminated by constant + * folding, in which case we don't need to do any more work. + */ + if (parse->hasWindowFuncs) + { + wflists = find_window_functions((Node *) root->processed_tlist, + list_length(parse->windowClause)); + if (wflists->numWindowFuncs > 0) + activeWindows = select_active_windows(root, wflists); + else + parse->hasWindowFuncs = false; + } + + /* + * Preprocess MIN/MAX aggregates, if any. Note: be careful about + * adding logic between here and the query_planner() call. Anything + * that is needed in MIN/MAX-optimizable cases will have to be + * duplicated in planagg.c. + */ + if (parse->hasAggs) + preprocess_minmax_aggregates(root); + + /* + * Figure out whether there's a hard limit on the number of rows that + * query_planner's result subplan needs to return. Even if we know a + * hard limit overall, it doesn't apply if the query has any + * grouping/aggregation operations, or SRFs in the tlist. + */ + if (parse->groupClause || + parse->groupingSets || + parse->distinctClause || + parse->hasAggs || + parse->hasWindowFuncs || + parse->hasTargetSRFs || + root->hasHavingQual) + root->limit_tuples = -1.0; + else + root->limit_tuples = limit_tuples; + + /* Set up data needed by standard_qp_callback */ + qp_extra.activeWindows = activeWindows; + qp_extra.groupClause = (gset_data + ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL) + : parse->groupClause); + + /* + * Generate the best unsorted and presorted paths for the scan/join + * portion of this Query, ie the processing represented by the + * FROM/WHERE clauses. (Note there may not be any presorted paths.) + * We also generate (in standard_qp_callback) pathkey representations + * of the query's sort clause, distinct clause, etc. + */ + current_rel = query_planner(root, standard_qp_callback, &qp_extra); + + /* + * Convert the query's result tlist into PathTarget format. + * + * Note: this cannot be done before query_planner() has performed + * appendrel expansion, because that might add resjunk entries to + * root->processed_tlist. Waiting till afterwards is also helpful + * because the target width estimates can use per-Var width numbers + * that were obtained within query_planner(). + */ + final_target = create_pathtarget(root, root->processed_tlist); + final_target_parallel_safe = + is_parallel_safe(root, (Node *) final_target->exprs); + + /* + * If ORDER BY was given, consider whether we should use a post-sort + * projection, and compute the adjusted target for preceding steps if + * so. + */ + if (parse->sortClause) + { + sort_input_target = make_sort_input_target(root, + final_target, + &have_postponed_srfs); + sort_input_target_parallel_safe = + is_parallel_safe(root, (Node *) sort_input_target->exprs); + } + else + { + sort_input_target = final_target; + sort_input_target_parallel_safe = final_target_parallel_safe; + } + + /* + * If we have window functions to deal with, the output from any + * grouping step needs to be what the window functions want; + * otherwise, it should be sort_input_target. + */ + if (activeWindows) + { + grouping_target = make_window_input_target(root, + final_target, + activeWindows); + grouping_target_parallel_safe = + is_parallel_safe(root, (Node *) grouping_target->exprs); + } + else + { + grouping_target = sort_input_target; + grouping_target_parallel_safe = sort_input_target_parallel_safe; + } + + /* + * If we have grouping or aggregation to do, the topmost scan/join + * plan node must emit what the grouping step wants; otherwise, it + * should emit grouping_target. + */ + have_grouping = (parse->groupClause || parse->groupingSets || + parse->hasAggs || root->hasHavingQual); + if (have_grouping) + { + scanjoin_target = make_group_input_target(root, final_target); + scanjoin_target_parallel_safe = + is_parallel_safe(root, (Node *) scanjoin_target->exprs); + } + else + { + scanjoin_target = grouping_target; + scanjoin_target_parallel_safe = grouping_target_parallel_safe; + } + + /* + * If there are any SRFs in the targetlist, we must separate each of + * these PathTargets into SRF-computing and SRF-free targets. Replace + * each of the named targets with a SRF-free version, and remember the + * list of additional projection steps we need to add afterwards. + */ + if (parse->hasTargetSRFs) + { + /* final_target doesn't recompute any SRFs in sort_input_target */ + split_pathtarget_at_srfs(root, final_target, sort_input_target, + &final_targets, + &final_targets_contain_srfs); + final_target = linitial_node(PathTarget, final_targets); + Assert(!linitial_int(final_targets_contain_srfs)); + /* likewise for sort_input_target vs. grouping_target */ + split_pathtarget_at_srfs(root, sort_input_target, grouping_target, + &sort_input_targets, + &sort_input_targets_contain_srfs); + sort_input_target = linitial_node(PathTarget, sort_input_targets); + Assert(!linitial_int(sort_input_targets_contain_srfs)); + /* likewise for grouping_target vs. scanjoin_target */ + split_pathtarget_at_srfs(root, grouping_target, scanjoin_target, + &grouping_targets, + &grouping_targets_contain_srfs); + grouping_target = linitial_node(PathTarget, grouping_targets); + Assert(!linitial_int(grouping_targets_contain_srfs)); + /* scanjoin_target will not have any SRFs precomputed for it */ + split_pathtarget_at_srfs(root, scanjoin_target, NULL, + &scanjoin_targets, + &scanjoin_targets_contain_srfs); + scanjoin_target = linitial_node(PathTarget, scanjoin_targets); + Assert(!linitial_int(scanjoin_targets_contain_srfs)); + } + else + { + /* initialize lists; for most of these, dummy values are OK */ + final_targets = final_targets_contain_srfs = NIL; + sort_input_targets = sort_input_targets_contain_srfs = NIL; + grouping_targets = grouping_targets_contain_srfs = NIL; + scanjoin_targets = list_make1(scanjoin_target); + scanjoin_targets_contain_srfs = NIL; + } + + /* Apply scan/join target. */ + scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1 + && equal(scanjoin_target->exprs, current_rel->reltarget->exprs); + apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets, + scanjoin_targets_contain_srfs, + scanjoin_target_parallel_safe, + scanjoin_target_same_exprs); + + /* + * Save the various upper-rel PathTargets we just computed into + * root->upper_targets[]. The core code doesn't use this, but it + * provides a convenient place for extensions to get at the info. For + * consistency, we save all the intermediate targets, even though some + * of the corresponding upperrels might not be needed for this query. + */ + root->upper_targets[UPPERREL_FINAL] = final_target; + root->upper_targets[UPPERREL_ORDERED] = final_target; + root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target; + root->upper_targets[UPPERREL_DISTINCT] = sort_input_target; + root->upper_targets[UPPERREL_WINDOW] = sort_input_target; + root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target; + + /* + * If we have grouping and/or aggregation, consider ways to implement + * that. We build a new upperrel representing the output of this + * phase. + */ + if (have_grouping) + { + current_rel = create_grouping_paths(root, + current_rel, + grouping_target, + grouping_target_parallel_safe, + gset_data); + /* Fix things up if grouping_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + grouping_targets, + grouping_targets_contain_srfs); + } + + /* + * If we have window functions, consider ways to implement those. We + * build a new upperrel representing the output of this phase. + */ + if (activeWindows) + { + current_rel = create_window_paths(root, + current_rel, + grouping_target, + sort_input_target, + sort_input_target_parallel_safe, + wflists, + activeWindows); + /* Fix things up if sort_input_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + sort_input_targets, + sort_input_targets_contain_srfs); + } + + /* + * If there is a DISTINCT clause, consider ways to implement that. We + * build a new upperrel representing the output of this phase. + */ + if (parse->distinctClause) + { + current_rel = create_distinct_paths(root, + current_rel); + } + } /* end of if (setOperations) */ + + /* + * If ORDER BY was given, consider ways to implement that, and generate a + * new upperrel containing only paths that emit the correct ordering and + * project the correct final_target. We can apply the original + * limit_tuples limit in sort costing here, but only if there are no + * postponed SRFs. + */ + if (parse->sortClause) + { + current_rel = create_ordered_paths(root, + current_rel, + final_target, + final_target_parallel_safe, + have_postponed_srfs ? -1.0 : + limit_tuples); + /* Fix things up if final_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + final_targets, + final_targets_contain_srfs); + } + + /* + * Now we are prepared to build the final-output upperrel. + */ + final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); + + /* + * If the input rel is marked consider_parallel and there's nothing that's + * not parallel-safe in the LIMIT clause, then the final_rel can be marked + * consider_parallel as well. Note that if the query has rowMarks or is + * not a SELECT, consider_parallel will be false for every relation in the + * query. + */ + if (current_rel->consider_parallel && + is_parallel_safe(root, parse->limitOffset) && + is_parallel_safe(root, parse->limitCount)) + final_rel->consider_parallel = true; + + /* + * If the current_rel belongs to a single FDW, so does the final_rel. + */ + final_rel->serverid = current_rel->serverid; + final_rel->userid = current_rel->userid; + final_rel->useridiscurrent = current_rel->useridiscurrent; + final_rel->fdwroutine = current_rel->fdwroutine; + + /* + * Generate paths for the final_rel. Insert all surviving paths, with + * LockRows, Limit, and/or ModifyTable steps added if needed. + */ + foreach(lc, current_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + /* + * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node. + * (Note: we intentionally test parse->rowMarks not root->rowMarks + * here. If there are only non-locking rowmarks, they should be + * handled by the ModifyTable node instead. However, root->rowMarks + * is what goes into the LockRows node.) + */ + if (parse->rowMarks) + { + path = (Path *) create_lockrows_path(root, final_rel, path, + root->rowMarks, + assign_special_exec_param(root)); + } + + /* + * If there is a LIMIT/OFFSET clause, add the LIMIT node. + */ + if (limit_needed(parse)) + { + path = (Path *) create_limit_path(root, final_rel, path, + parse->limitOffset, + parse->limitCount, + parse->limitOption, + offset_est, count_est); + } + + /* + * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node. + */ + if (parse->commandType != CMD_SELECT) + { + Index rootRelation; + List *resultRelations = NIL; + List *updateColnosLists = NIL; + List *withCheckOptionLists = NIL; + List *returningLists = NIL; + List *mergeActionLists = NIL; + List *rowMarks; + + if (bms_membership(root->all_result_relids) == BMS_MULTIPLE) + { + /* Inherited UPDATE/DELETE/MERGE */ + RelOptInfo *top_result_rel = find_base_rel(root, + parse->resultRelation); + int resultRelation = -1; + + /* Pass the root result rel forward to the executor. */ + rootRelation = parse->resultRelation; + + /* Add only leaf children to ModifyTable. */ + while ((resultRelation = bms_next_member(root->leaf_result_relids, + resultRelation)) >= 0) + { + RelOptInfo *this_result_rel = find_base_rel(root, + resultRelation); + + /* + * Also exclude any leaf rels that have turned dummy since + * being added to the list, for example, by being excluded + * by constraint exclusion. + */ + if (IS_DUMMY_REL(this_result_rel)) + continue; + + /* Build per-target-rel lists needed by ModifyTable */ + resultRelations = lappend_int(resultRelations, + resultRelation); + if (parse->commandType == CMD_UPDATE) + { + List *update_colnos = root->update_colnos; + + if (this_result_rel != top_result_rel) + update_colnos = + adjust_inherited_attnums_multilevel(root, + update_colnos, + this_result_rel->relid, + top_result_rel->relid); + updateColnosLists = lappend(updateColnosLists, + update_colnos); + } + if (parse->withCheckOptions) + { + List *withCheckOptions = parse->withCheckOptions; + + if (this_result_rel != top_result_rel) + withCheckOptions = (List *) + adjust_appendrel_attrs_multilevel(root, + (Node *) withCheckOptions, + this_result_rel->relids, + top_result_rel->relids); + withCheckOptionLists = lappend(withCheckOptionLists, + withCheckOptions); + } + if (parse->returningList) + { + List *returningList = parse->returningList; + + if (this_result_rel != top_result_rel) + returningList = (List *) + adjust_appendrel_attrs_multilevel(root, + (Node *) returningList, + this_result_rel->relids, + top_result_rel->relids); + returningLists = lappend(returningLists, + returningList); + } + if (parse->mergeActionList) + { + ListCell *l; + List *mergeActionList = NIL; + + /* + * Copy MergeActions and translate stuff that + * references attribute numbers. + */ + foreach(l, parse->mergeActionList) + { + MergeAction *action = lfirst(l), + *leaf_action = copyObject(action); + + leaf_action->qual = + adjust_appendrel_attrs_multilevel(root, + (Node *) action->qual, + this_result_rel->relids, + top_result_rel->relids); + leaf_action->targetList = (List *) + adjust_appendrel_attrs_multilevel(root, + (Node *) action->targetList, + this_result_rel->relids, + top_result_rel->relids); + if (leaf_action->commandType == CMD_UPDATE) + leaf_action->updateColnos = + adjust_inherited_attnums_multilevel(root, + action->updateColnos, + this_result_rel->relid, + top_result_rel->relid); + mergeActionList = lappend(mergeActionList, + leaf_action); + } + + mergeActionLists = lappend(mergeActionLists, + mergeActionList); + } + } + + if (resultRelations == NIL) + { + /* + * We managed to exclude every child rel, so generate a + * dummy one-relation plan using info for the top target + * rel (even though that may not be a leaf target). + * Although it's clear that no data will be updated or + * deleted, we still need to have a ModifyTable node so + * that any statement triggers will be executed. (This + * could be cleaner if we fixed nodeModifyTable.c to allow + * zero target relations, but that probably wouldn't be a + * net win.) + */ + resultRelations = list_make1_int(parse->resultRelation); + if (parse->commandType == CMD_UPDATE) + updateColnosLists = list_make1(root->update_colnos); + if (parse->withCheckOptions) + withCheckOptionLists = list_make1(parse->withCheckOptions); + if (parse->returningList) + returningLists = list_make1(parse->returningList); + if (parse->mergeActionList) + mergeActionLists = list_make1(parse->mergeActionList); + } + } + else + { + /* Single-relation INSERT/UPDATE/DELETE/MERGE. */ + rootRelation = 0; /* there's no separate root rel */ + resultRelations = list_make1_int(parse->resultRelation); + if (parse->commandType == CMD_UPDATE) + updateColnosLists = list_make1(root->update_colnos); + if (parse->withCheckOptions) + withCheckOptionLists = list_make1(parse->withCheckOptions); + if (parse->returningList) + returningLists = list_make1(parse->returningList); + if (parse->mergeActionList) + mergeActionLists = list_make1(parse->mergeActionList); + } + + /* + * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node + * will have dealt with fetching non-locked marked rows, else we + * need to have ModifyTable do that. + */ + if (parse->rowMarks) + rowMarks = NIL; + else + rowMarks = root->rowMarks; + + path = (Path *) + create_modifytable_path(root, final_rel, + path, + parse->commandType, + parse->canSetTag, + parse->resultRelation, + rootRelation, + root->partColsUpdated, + resultRelations, + updateColnosLists, + withCheckOptionLists, + returningLists, + rowMarks, + parse->onConflict, + mergeActionLists, + assign_special_exec_param(root)); + } + + /* And shove it into final_rel */ + add_path(final_rel, path); + } + + /* + * Generate partial paths for final_rel, too, if outer query levels might + * be able to make use of them. + */ + if (final_rel->consider_parallel && root->query_level > 1 && + !limit_needed(parse)) + { + Assert(!parse->rowMarks && parse->commandType == CMD_SELECT); + foreach(lc, current_rel->partial_pathlist) + { + Path *partial_path = (Path *) lfirst(lc); + + add_partial_path(final_rel, partial_path); + } + } + + extra.limit_needed = limit_needed(parse); + extra.limit_tuples = limit_tuples; + extra.count_est = count_est; + extra.offset_est = offset_est; + + /* + * If there is an FDW that's responsible for all baserels of the query, + * let it consider adding ForeignPaths. + */ + if (final_rel->fdwroutine && + final_rel->fdwroutine->GetForeignUpperPaths) + final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL, + current_rel, final_rel, + &extra); + + /* Let extensions possibly add some more paths */ + if (create_upper_paths_hook) + (*create_upper_paths_hook) (root, UPPERREL_FINAL, + current_rel, final_rel, &extra); + + /* Note: currently, we leave it to callers to do set_cheapest() */ +} + +/* + * Do preprocessing for groupingSets clause and related data. This handles the + * preliminary steps of expanding the grouping sets, organizing them into lists + * of rollups, and preparing annotations which will later be filled in with + * size estimates. + */ +static grouping_sets_data * +preprocess_grouping_sets(PlannerInfo *root) +{ + Query *parse = root->parse; + List *sets; + int maxref = 0; + ListCell *lc; + ListCell *lc_set; + grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data)); + + parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1); + + gd->any_hashable = false; + gd->unhashable_refs = NULL; + gd->unsortable_refs = NULL; + gd->unsortable_sets = NIL; + + if (parse->groupClause) + { + ListCell *lc; + + foreach(lc, parse->groupClause) + { + SortGroupClause *gc = lfirst_node(SortGroupClause, lc); + Index ref = gc->tleSortGroupRef; + + if (ref > maxref) + maxref = ref; + + if (!gc->hashable) + gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref); + + if (!OidIsValid(gc->sortop)) + gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref); + } + } + + /* Allocate workspace array for remapping */ + gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int)); + + /* + * If we have any unsortable sets, we must extract them before trying to + * prepare rollups. Unsortable sets don't go through + * reorder_grouping_sets, so we must apply the GroupingSetData annotation + * here. + */ + if (!bms_is_empty(gd->unsortable_refs)) + { + List *sortable_sets = NIL; + + foreach(lc, parse->groupingSets) + { + List *gset = (List *) lfirst(lc); + + if (bms_overlap_list(gd->unsortable_refs, gset)) + { + GroupingSetData *gs = makeNode(GroupingSetData); + + gs->set = gset; + gd->unsortable_sets = lappend(gd->unsortable_sets, gs); + + /* + * We must enforce here that an unsortable set is hashable; + * later code assumes this. Parse analysis only checks that + * every individual column is either hashable or sortable. + * + * Note that passing this test doesn't guarantee we can + * generate a plan; there might be other showstoppers. + */ + if (bms_overlap_list(gd->unhashable_refs, gset)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement GROUP BY"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + } + else + sortable_sets = lappend(sortable_sets, gset); + } + + if (sortable_sets) + sets = extract_rollup_sets(sortable_sets); + else + sets = NIL; + } + else + sets = extract_rollup_sets(parse->groupingSets); + + foreach(lc_set, sets) + { + List *current_sets = (List *) lfirst(lc_set); + RollupData *rollup = makeNode(RollupData); + GroupingSetData *gs; + + /* + * Reorder the current list of grouping sets into correct prefix + * order. If only one aggregation pass is needed, try to make the + * list match the ORDER BY clause; if more than one pass is needed, we + * don't bother with that. + * + * Note that this reorders the sets from smallest-member-first to + * largest-member-first, and applies the GroupingSetData annotations, + * though the data will be filled in later. + */ + current_sets = reorder_grouping_sets(current_sets, + (list_length(sets) == 1 + ? parse->sortClause + : NIL)); + + /* + * Get the initial (and therefore largest) grouping set. + */ + gs = linitial_node(GroupingSetData, current_sets); + + /* + * Order the groupClause appropriately. If the first grouping set is + * empty, then the groupClause must also be empty; otherwise we have + * to force the groupClause to match that grouping set's order. + * + * (The first grouping set can be empty even though parse->groupClause + * is not empty only if all non-empty grouping sets are unsortable. + * The groupClauses for hashed grouping sets are built later on.) + */ + if (gs->set) + rollup->groupClause = preprocess_groupclause(root, gs->set); + else + rollup->groupClause = NIL; + + /* + * Is it hashable? We pretend empty sets are hashable even though we + * actually force them not to be hashed later. But don't bother if + * there's nothing but empty sets (since in that case we can't hash + * anything). + */ + if (gs->set && + !bms_overlap_list(gd->unhashable_refs, gs->set)) + { + rollup->hashable = true; + gd->any_hashable = true; + } + + /* + * Now that we've pinned down an order for the groupClause for this + * list of grouping sets, we need to remap the entries in the grouping + * sets from sortgrouprefs to plain indices (0-based) into the + * groupClause for this collection of grouping sets. We keep the + * original form for later use, though. + */ + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + current_sets, + gd->tleref_to_colnum_map); + rollup->gsets_data = current_sets; + + gd->rollups = lappend(gd->rollups, rollup); + } + + if (gd->unsortable_sets) + { + /* + * We have not yet pinned down a groupclause for this, but we will + * need index-based lists for estimation purposes. Construct + * hash_sets_idx based on the entire original groupclause for now. + */ + gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause, + gd->unsortable_sets, + gd->tleref_to_colnum_map); + gd->any_hashable = true; + } + + return gd; +} + +/* + * Given a groupclause and a list of GroupingSetData, return equivalent sets + * (without annotation) mapped to indexes into the given groupclause. + */ +static List * +remap_to_groupclause_idx(List *groupClause, + List *gsets, + int *tleref_to_colnum_map) +{ + int ref = 0; + List *result = NIL; + ListCell *lc; + + foreach(lc, groupClause) + { + SortGroupClause *gc = lfirst_node(SortGroupClause, lc); + + tleref_to_colnum_map[gc->tleSortGroupRef] = ref++; + } + + foreach(lc, gsets) + { + List *set = NIL; + ListCell *lc2; + GroupingSetData *gs = lfirst_node(GroupingSetData, lc); + + foreach(lc2, gs->set) + { + set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]); + } + + result = lappend(result, set); + } + + return result; +} + + +/* + * preprocess_rowmarks - set up PlanRowMarks if needed + */ +static void +preprocess_rowmarks(PlannerInfo *root) +{ + Query *parse = root->parse; + Bitmapset *rels; + List *prowmarks; + ListCell *l; + int i; + + if (parse->rowMarks) + { + /* + * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside + * grouping, since grouping renders a reference to individual tuple + * CTIDs invalid. This is also checked at parse time, but that's + * insufficient because of rule substitution, query pullup, etc. + */ + CheckSelectLocking(parse, linitial_node(RowMarkClause, + parse->rowMarks)->strength); + } + else + { + /* + * We only need rowmarks for UPDATE, DELETE, MERGE, or FOR [KEY] + * UPDATE/SHARE. + */ + if (parse->commandType != CMD_UPDATE && + parse->commandType != CMD_DELETE && + parse->commandType != CMD_MERGE) + return; + } + + /* + * We need to have rowmarks for all base relations except the target. We + * make a bitmapset of all base rels and then remove the items we don't + * need or have FOR [KEY] UPDATE/SHARE marks for. + */ + rels = get_relids_in_jointree((Node *) parse->jointree, false); + if (parse->resultRelation) + rels = bms_del_member(rels, parse->resultRelation); + + /* + * Convert RowMarkClauses to PlanRowMark representation. + */ + prowmarks = NIL; + foreach(l, parse->rowMarks) + { + RowMarkClause *rc = lfirst_node(RowMarkClause, l); + RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable); + PlanRowMark *newrc; + + /* + * Currently, it is syntactically impossible to have FOR UPDATE et al + * applied to an update/delete target rel. If that ever becomes + * possible, we should drop the target from the PlanRowMark list. + */ + Assert(rc->rti != parse->resultRelation); + + /* + * Ignore RowMarkClauses for subqueries; they aren't real tables and + * can't support true locking. Subqueries that got flattened into the + * main query should be ignored completely. Any that didn't will get + * ROW_MARK_COPY items in the next loop. + */ + if (rte->rtekind != RTE_RELATION) + continue; + + rels = bms_del_member(rels, rc->rti); + + newrc = makeNode(PlanRowMark); + newrc->rti = newrc->prti = rc->rti; + newrc->rowmarkId = ++(root->glob->lastRowMarkId); + newrc->markType = select_rowmark_type(rte, rc->strength); + newrc->allMarkTypes = (1 << newrc->markType); + newrc->strength = rc->strength; + newrc->waitPolicy = rc->waitPolicy; + newrc->isParent = false; + + prowmarks = lappend(prowmarks, newrc); + } + + /* + * Now, add rowmarks for any non-target, non-locked base relations. + */ + i = 0; + foreach(l, parse->rtable) + { + RangeTblEntry *rte = lfirst_node(RangeTblEntry, l); + PlanRowMark *newrc; + + i++; + if (!bms_is_member(i, rels)) + continue; + + newrc = makeNode(PlanRowMark); + newrc->rti = newrc->prti = i; + newrc->rowmarkId = ++(root->glob->lastRowMarkId); + newrc->markType = select_rowmark_type(rte, LCS_NONE); + newrc->allMarkTypes = (1 << newrc->markType); + newrc->strength = LCS_NONE; + newrc->waitPolicy = LockWaitBlock; /* doesn't matter */ + newrc->isParent = false; + + prowmarks = lappend(prowmarks, newrc); + } + + root->rowMarks = prowmarks; +} + +/* + * Select RowMarkType to use for a given table + */ +RowMarkType +select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength) +{ + if (rte->rtekind != RTE_RELATION) + { + /* If it's not a table at all, use ROW_MARK_COPY */ + return ROW_MARK_COPY; + } + else if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + /* Let the FDW select the rowmark type, if it wants to */ + FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid); + + if (fdwroutine->GetForeignRowMarkType != NULL) + return fdwroutine->GetForeignRowMarkType(rte, strength); + /* Otherwise, use ROW_MARK_COPY by default */ + return ROW_MARK_COPY; + } + else + { + /* Regular table, apply the appropriate lock type */ + switch (strength) + { + case LCS_NONE: + + /* + * We don't need a tuple lock, only the ability to re-fetch + * the row. + */ + return ROW_MARK_REFERENCE; + break; + case LCS_FORKEYSHARE: + return ROW_MARK_KEYSHARE; + break; + case LCS_FORSHARE: + return ROW_MARK_SHARE; + break; + case LCS_FORNOKEYUPDATE: + return ROW_MARK_NOKEYEXCLUSIVE; + break; + case LCS_FORUPDATE: + return ROW_MARK_EXCLUSIVE; + break; + } + elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength); + return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */ + } +} + +/* + * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses + * + * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the + * results back in *count_est and *offset_est. These variables are set to + * 0 if the corresponding clause is not present, and -1 if it's present + * but we couldn't estimate the value for it. (The "0" convention is OK + * for OFFSET but a little bit bogus for LIMIT: effectively we estimate + * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's + * usual practice of never estimating less than one row.) These values will + * be passed to create_limit_path, which see if you change this code. + * + * The return value is the suitably adjusted tuple_fraction to use for + * planning the query. This adjustment is not overridable, since it reflects + * plan actions that grouping_planner() will certainly take, not assumptions + * about context. + */ +static double +preprocess_limit(PlannerInfo *root, double tuple_fraction, + int64 *offset_est, int64 *count_est) +{ + Query *parse = root->parse; + Node *est; + double limit_fraction; + + /* Should not be called unless LIMIT or OFFSET */ + Assert(parse->limitCount || parse->limitOffset); + + /* + * Try to obtain the clause values. We use estimate_expression_value + * primarily because it can sometimes do something useful with Params. + */ + if (parse->limitCount) + { + est = estimate_expression_value(root, parse->limitCount); + if (est && IsA(est, Const)) + { + if (((Const *) est)->constisnull) + { + /* NULL indicates LIMIT ALL, ie, no limit */ + *count_est = 0; /* treat as not present */ + } + else + { + *count_est = DatumGetInt64(((Const *) est)->constvalue); + if (*count_est <= 0) + *count_est = 1; /* force to at least 1 */ + } + } + else + *count_est = -1; /* can't estimate */ + } + else + *count_est = 0; /* not present */ + + if (parse->limitOffset) + { + est = estimate_expression_value(root, parse->limitOffset); + if (est && IsA(est, Const)) + { + if (((Const *) est)->constisnull) + { + /* Treat NULL as no offset; the executor will too */ + *offset_est = 0; /* treat as not present */ + } + else + { + *offset_est = DatumGetInt64(((Const *) est)->constvalue); + if (*offset_est < 0) + *offset_est = 0; /* treat as not present */ + } + } + else + *offset_est = -1; /* can't estimate */ + } + else + *offset_est = 0; /* not present */ + + if (*count_est != 0) + { + /* + * A LIMIT clause limits the absolute number of tuples returned. + * However, if it's not a constant LIMIT then we have to guess; for + * lack of a better idea, assume 10% of the plan's result is wanted. + */ + if (*count_est < 0 || *offset_est < 0) + { + /* LIMIT or OFFSET is an expression ... punt ... */ + limit_fraction = 0.10; + } + else + { + /* LIMIT (plus OFFSET, if any) is max number of tuples needed */ + limit_fraction = (double) *count_est + (double) *offset_est; + } + + /* + * If we have absolute limits from both caller and LIMIT, use the + * smaller value; likewise if they are both fractional. If one is + * fractional and the other absolute, we can't easily determine which + * is smaller, but we use the heuristic that the absolute will usually + * be smaller. + */ + if (tuple_fraction >= 1.0) + { + if (limit_fraction >= 1.0) + { + /* both absolute */ + tuple_fraction = Min(tuple_fraction, limit_fraction); + } + else + { + /* caller absolute, limit fractional; use caller's value */ + } + } + else if (tuple_fraction > 0.0) + { + if (limit_fraction >= 1.0) + { + /* caller fractional, limit absolute; use limit */ + tuple_fraction = limit_fraction; + } + else + { + /* both fractional */ + tuple_fraction = Min(tuple_fraction, limit_fraction); + } + } + else + { + /* no info from caller, just use limit */ + tuple_fraction = limit_fraction; + } + } + else if (*offset_est != 0 && tuple_fraction > 0.0) + { + /* + * We have an OFFSET but no LIMIT. This acts entirely differently + * from the LIMIT case: here, we need to increase rather than decrease + * the caller's tuple_fraction, because the OFFSET acts to cause more + * tuples to be fetched instead of fewer. This only matters if we got + * a tuple_fraction > 0, however. + * + * As above, use 10% if OFFSET is present but unestimatable. + */ + if (*offset_est < 0) + limit_fraction = 0.10; + else + limit_fraction = (double) *offset_est; + + /* + * If we have absolute counts from both caller and OFFSET, add them + * together; likewise if they are both fractional. If one is + * fractional and the other absolute, we want to take the larger, and + * we heuristically assume that's the fractional one. + */ + if (tuple_fraction >= 1.0) + { + if (limit_fraction >= 1.0) + { + /* both absolute, so add them together */ + tuple_fraction += limit_fraction; + } + else + { + /* caller absolute, limit fractional; use limit */ + tuple_fraction = limit_fraction; + } + } + else + { + if (limit_fraction >= 1.0) + { + /* caller fractional, limit absolute; use caller's value */ + } + else + { + /* both fractional, so add them together */ + tuple_fraction += limit_fraction; + if (tuple_fraction >= 1.0) + tuple_fraction = 0.0; /* assume fetch all */ + } + } + } + + return tuple_fraction; +} + +/* + * limit_needed - do we actually need a Limit plan node? + * + * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding + * a Limit node. This is worth checking for because "OFFSET 0" is a common + * locution for an optimization fence. (Because other places in the planner + * merely check whether parse->limitOffset isn't NULL, it will still work as + * an optimization fence --- we're just suppressing unnecessary run-time + * overhead.) + * + * This might look like it could be merged into preprocess_limit, but there's + * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas + * in preprocess_limit it's good enough to consider estimated values. + */ +bool +limit_needed(Query *parse) +{ + Node *node; + + node = parse->limitCount; + if (node) + { + if (IsA(node, Const)) + { + /* NULL indicates LIMIT ALL, ie, no limit */ + if (!((Const *) node)->constisnull) + return true; /* LIMIT with a constant value */ + } + else + return true; /* non-constant LIMIT */ + } + + node = parse->limitOffset; + if (node) + { + if (IsA(node, Const)) + { + /* Treat NULL as no offset; the executor would too */ + if (!((Const *) node)->constisnull) + { + int64 offset = DatumGetInt64(((Const *) node)->constvalue); + + if (offset != 0) + return true; /* OFFSET with a nonzero value */ + } + } + else + return true; /* non-constant OFFSET */ + } + + return false; /* don't need a Limit plan node */ +} + + +/* + * remove_useless_groupby_columns + * Remove any columns in the GROUP BY clause that are redundant due to + * being functionally dependent on other GROUP BY columns. + * + * Since some other DBMSes do not allow references to ungrouped columns, it's + * not unusual to find all columns listed in GROUP BY even though listing the + * primary-key columns would be sufficient. Deleting such excess columns + * avoids redundant sorting work, so it's worth doing. + * + * Relcache invalidations will ensure that cached plans become invalidated + * when the underlying index of the pkey constraint is dropped. + * + * Currently, we only make use of pkey constraints for this, however, we may + * wish to take this further in the future and also use unique constraints + * which have NOT NULL columns. In that case, plan invalidation will still + * work since relations will receive a relcache invalidation when a NOT NULL + * constraint is dropped. + */ +static void +remove_useless_groupby_columns(PlannerInfo *root) +{ + Query *parse = root->parse; + Bitmapset **groupbyattnos; + Bitmapset **surplusvars; + ListCell *lc; + int relid; + + /* No chance to do anything if there are less than two GROUP BY items */ + if (list_length(parse->groupClause) < 2) + return; + + /* Don't fiddle with the GROUP BY clause if the query has grouping sets */ + if (parse->groupingSets) + return; + + /* + * Scan the GROUP BY clause to find GROUP BY items that are simple Vars. + * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k + * that are GROUP BY items. + */ + groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) * + (list_length(parse->rtable) + 1)); + foreach(lc, parse->groupClause) + { + SortGroupClause *sgc = lfirst_node(SortGroupClause, lc); + TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList); + Var *var = (Var *) tle->expr; + + /* + * Ignore non-Vars and Vars from other query levels. + * + * XXX in principle, stable expressions containing Vars could also be + * removed, if all the Vars are functionally dependent on other GROUP + * BY items. But it's not clear that such cases occur often enough to + * be worth troubling over. + */ + if (!IsA(var, Var) || + var->varlevelsup > 0) + continue; + + /* OK, remember we have this Var */ + relid = var->varno; + Assert(relid <= list_length(parse->rtable)); + groupbyattnos[relid] = bms_add_member(groupbyattnos[relid], + var->varattno - FirstLowInvalidHeapAttributeNumber); + } + + /* + * Consider each relation and see if it is possible to remove some of its + * Vars from GROUP BY. For simplicity and speed, we do the actual removal + * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset + * of the column attnos of RTE k that are removable GROUP BY items. + */ + surplusvars = NULL; /* don't allocate array unless required */ + relid = 0; + foreach(lc, parse->rtable) + { + RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc); + Bitmapset *relattnos; + Bitmapset *pkattnos; + Oid constraintOid; + + relid++; + + /* Only plain relations could have primary-key constraints */ + if (rte->rtekind != RTE_RELATION) + continue; + + /* + * We must skip inheritance parent tables as some of the child rels + * may cause duplicate rows. This cannot happen with partitioned + * tables, however. + */ + if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE) + continue; + + /* Nothing to do unless this rel has multiple Vars in GROUP BY */ + relattnos = groupbyattnos[relid]; + if (bms_membership(relattnos) != BMS_MULTIPLE) + continue; + + /* + * Can't remove any columns for this rel if there is no suitable + * (i.e., nondeferrable) primary key constraint. + */ + pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid); + if (pkattnos == NULL) + continue; + + /* + * If the primary key is a proper subset of relattnos then we have + * some items in the GROUP BY that can be removed. + */ + if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1) + { + /* + * To easily remember whether we've found anything to do, we don't + * allocate the surplusvars[] array until we find something. + */ + if (surplusvars == NULL) + surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) * + (list_length(parse->rtable) + 1)); + + /* Remember the attnos of the removable columns */ + surplusvars[relid] = bms_difference(relattnos, pkattnos); + } + } + + /* + * If we found any surplus Vars, build a new GROUP BY clause without them. + * (Note: this may leave some TLEs with unreferenced ressortgroupref + * markings, but that's harmless.) + */ + if (surplusvars != NULL) + { + List *new_groupby = NIL; + + foreach(lc, parse->groupClause) + { + SortGroupClause *sgc = lfirst_node(SortGroupClause, lc); + TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList); + Var *var = (Var *) tle->expr; + + /* + * New list must include non-Vars, outer Vars, and anything not + * marked as surplus. + */ + if (!IsA(var, Var) || + var->varlevelsup > 0 || + !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber, + surplusvars[var->varno])) + new_groupby = lappend(new_groupby, sgc); + } + + parse->groupClause = new_groupby; + } +} + +/* + * preprocess_groupclause - do preparatory work on GROUP BY clause + * + * The idea here is to adjust the ordering of the GROUP BY elements + * (which in itself is semantically insignificant) to match ORDER BY, + * thereby allowing a single sort operation to both implement the ORDER BY + * requirement and set up for a Unique step that implements GROUP BY. + * + * In principle it might be interesting to consider other orderings of the + * GROUP BY elements, which could match the sort ordering of other + * possible plans (eg an indexscan) and thereby reduce cost. We don't + * bother with that, though. Hashed grouping will frequently win anyway. + * + * Note: we need no comparable processing of the distinctClause because + * the parser already enforced that that matches ORDER BY. + * + * For grouping sets, the order of items is instead forced to agree with that + * of the grouping set (and items not in the grouping set are skipped). The + * work of sorting the order of grouping set elements to match the ORDER BY if + * possible is done elsewhere. + */ +static List * +preprocess_groupclause(PlannerInfo *root, List *force) +{ + Query *parse = root->parse; + List *new_groupclause = NIL; + bool partial_match; + ListCell *sl; + ListCell *gl; + + /* For grouping sets, we need to force the ordering */ + if (force) + { + foreach(sl, force) + { + Index ref = lfirst_int(sl); + SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause); + + new_groupclause = lappend(new_groupclause, cl); + } + + return new_groupclause; + } + + /* If no ORDER BY, nothing useful to do here */ + if (parse->sortClause == NIL) + return parse->groupClause; + + /* + * Scan the ORDER BY clause and construct a list of matching GROUP BY + * items, but only as far as we can make a matching prefix. + * + * This code assumes that the sortClause contains no duplicate items. + */ + foreach(sl, parse->sortClause) + { + SortGroupClause *sc = lfirst_node(SortGroupClause, sl); + + foreach(gl, parse->groupClause) + { + SortGroupClause *gc = lfirst_node(SortGroupClause, gl); + + if (equal(gc, sc)) + { + new_groupclause = lappend(new_groupclause, gc); + break; + } + } + if (gl == NULL) + break; /* no match, so stop scanning */ + } + + /* Did we match all of the ORDER BY list, or just some of it? */ + partial_match = (sl != NULL); + + /* If no match at all, no point in reordering GROUP BY */ + if (new_groupclause == NIL) + return parse->groupClause; + + /* + * Add any remaining GROUP BY items to the new list, but only if we were + * able to make a complete match. In other words, we only rearrange the + * GROUP BY list if the result is that one list is a prefix of the other + * --- otherwise there's no possibility of a common sort. Also, give up + * if there are any non-sortable GROUP BY items, since then there's no + * hope anyway. + */ + foreach(gl, parse->groupClause) + { + SortGroupClause *gc = lfirst_node(SortGroupClause, gl); + + if (list_member_ptr(new_groupclause, gc)) + continue; /* it matched an ORDER BY item */ + if (partial_match) + return parse->groupClause; /* give up, no common sort possible */ + if (!OidIsValid(gc->sortop)) + return parse->groupClause; /* give up, GROUP BY can't be sorted */ + new_groupclause = lappend(new_groupclause, gc); + } + + /* Success --- install the rearranged GROUP BY list */ + Assert(list_length(parse->groupClause) == list_length(new_groupclause)); + return new_groupclause; +} + +/* + * Extract lists of grouping sets that can be implemented using a single + * rollup-type aggregate pass each. Returns a list of lists of grouping sets. + * + * Input must be sorted with smallest sets first. Result has each sublist + * sorted with smallest sets first. + * + * We want to produce the absolute minimum possible number of lists here to + * avoid excess sorts. Fortunately, there is an algorithm for this; the problem + * of finding the minimal partition of a partially-ordered set into chains + * (which is what we need, taking the list of grouping sets as a poset ordered + * by set inclusion) can be mapped to the problem of finding the maximum + * cardinality matching on a bipartite graph, which is solvable in polynomial + * time with a worst case of no worse than O(n^2.5) and usually much + * better. Since our N is at most 4096, we don't need to consider fallbacks to + * heuristic or approximate methods. (Planning time for a 12-d cube is under + * half a second on my modest system even with optimization off and assertions + * on.) + */ +static List * +extract_rollup_sets(List *groupingSets) +{ + int num_sets_raw = list_length(groupingSets); + int num_empty = 0; + int num_sets = 0; /* distinct sets */ + int num_chains = 0; + List *result = NIL; + List **results; + List **orig_sets; + Bitmapset **set_masks; + int *chains; + short **adjacency; + short *adjacency_buf; + BipartiteMatchState *state; + int i; + int j; + int j_size; + ListCell *lc1 = list_head(groupingSets); + ListCell *lc; + + /* + * Start by stripping out empty sets. The algorithm doesn't require this, + * but the planner currently needs all empty sets to be returned in the + * first list, so we strip them here and add them back after. + */ + while (lc1 && lfirst(lc1) == NIL) + { + ++num_empty; + lc1 = lnext(groupingSets, lc1); + } + + /* bail out now if it turns out that all we had were empty sets. */ + if (!lc1) + return list_make1(groupingSets); + + /*---------- + * We don't strictly need to remove duplicate sets here, but if we don't, + * they tend to become scattered through the result, which is a bit + * confusing (and irritating if we ever decide to optimize them out). + * So we remove them here and add them back after. + * + * For each non-duplicate set, we fill in the following: + * + * orig_sets[i] = list of the original set lists + * set_masks[i] = bitmapset for testing inclusion + * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices + * + * chains[i] will be the result group this set is assigned to. + * + * We index all of these from 1 rather than 0 because it is convenient + * to leave 0 free for the NIL node in the graph algorithm. + *---------- + */ + orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *)); + set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *)); + adjacency = palloc0((num_sets_raw + 1) * sizeof(short *)); + adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short)); + + j_size = 0; + j = 0; + i = 1; + + for_each_cell(lc, groupingSets, lc1) + { + List *candidate = (List *) lfirst(lc); + Bitmapset *candidate_set = NULL; + ListCell *lc2; + int dup_of = 0; + + foreach(lc2, candidate) + { + candidate_set = bms_add_member(candidate_set, lfirst_int(lc2)); + } + + /* we can only be a dup if we're the same length as a previous set */ + if (j_size == list_length(candidate)) + { + int k; + + for (k = j; k < i; ++k) + { + if (bms_equal(set_masks[k], candidate_set)) + { + dup_of = k; + break; + } + } + } + else if (j_size < list_length(candidate)) + { + j_size = list_length(candidate); + j = i; + } + + if (dup_of > 0) + { + orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate); + bms_free(candidate_set); + } + else + { + int k; + int n_adj = 0; + + orig_sets[i] = list_make1(candidate); + set_masks[i] = candidate_set; + + /* fill in adjacency list; no need to compare equal-size sets */ + + for (k = j - 1; k > 0; --k) + { + if (bms_is_subset(set_masks[k], candidate_set)) + adjacency_buf[++n_adj] = k; + } + + if (n_adj > 0) + { + adjacency_buf[0] = n_adj; + adjacency[i] = palloc((n_adj + 1) * sizeof(short)); + memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short)); + } + else + adjacency[i] = NULL; + + ++i; + } + } + + num_sets = i - 1; + + /* + * Apply the graph matching algorithm to do the work. + */ + state = BipartiteMatch(num_sets, num_sets, adjacency); + + /* + * Now, the state->pair* fields have the info we need to assign sets to + * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or + * pair_vu[v] = u (both will be true, but we check both so that we can do + * it in one pass) + */ + chains = palloc0((num_sets + 1) * sizeof(int)); + + for (i = 1; i <= num_sets; ++i) + { + int u = state->pair_vu[i]; + int v = state->pair_uv[i]; + + if (u > 0 && u < i) + chains[i] = chains[u]; + else if (v > 0 && v < i) + chains[i] = chains[v]; + else + chains[i] = ++num_chains; + } + + /* build result lists. */ + results = palloc0((num_chains + 1) * sizeof(List *)); + + for (i = 1; i <= num_sets; ++i) + { + int c = chains[i]; + + Assert(c > 0); + + results[c] = list_concat(results[c], orig_sets[i]); + } + + /* push any empty sets back on the first list. */ + while (num_empty-- > 0) + results[1] = lcons(NIL, results[1]); + + /* make result list */ + for (i = 1; i <= num_chains; ++i) + result = lappend(result, results[i]); + + /* + * Free all the things. + * + * (This is over-fussy for small sets but for large sets we could have + * tied up a nontrivial amount of memory.) + */ + BipartiteMatchFree(state); + pfree(results); + pfree(chains); + for (i = 1; i <= num_sets; ++i) + if (adjacency[i]) + pfree(adjacency[i]); + pfree(adjacency); + pfree(adjacency_buf); + pfree(orig_sets); + for (i = 1; i <= num_sets; ++i) + bms_free(set_masks[i]); + pfree(set_masks); + + return result; +} + +/* + * Reorder the elements of a list of grouping sets such that they have correct + * prefix relationships. Also inserts the GroupingSetData annotations. + * + * The input must be ordered with smallest sets first; the result is returned + * with largest sets first. Note that the result shares no list substructure + * with the input, so it's safe for the caller to modify it later. + * + * If we're passed in a sortclause, we follow its order of columns to the + * extent possible, to minimize the chance that we add unnecessary sorts. + * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a + * gets implemented in one pass.) + */ +static List * +reorder_grouping_sets(List *groupingsets, List *sortclause) +{ + ListCell *lc; + List *previous = NIL; + List *result = NIL; + + foreach(lc, groupingsets) + { + List *candidate = (List *) lfirst(lc); + List *new_elems = list_difference_int(candidate, previous); + GroupingSetData *gs = makeNode(GroupingSetData); + + while (list_length(sortclause) > list_length(previous) && + list_length(new_elems) > 0) + { + SortGroupClause *sc = list_nth(sortclause, list_length(previous)); + int ref = sc->tleSortGroupRef; + + if (list_member_int(new_elems, ref)) + { + previous = lappend_int(previous, ref); + new_elems = list_delete_int(new_elems, ref); + } + else + { + /* diverged from the sortclause; give up on it */ + sortclause = NIL; + break; + } + } + + previous = list_concat(previous, new_elems); + + gs->set = list_copy(previous); + result = lcons(gs, result); + } + + list_free(previous); + + return result; +} + +/* + * Compute query_pathkeys and other pathkeys during plan generation + */ +static void +standard_qp_callback(PlannerInfo *root, void *extra) +{ + Query *parse = root->parse; + standard_qp_extra *qp_extra = (standard_qp_extra *) extra; + List *tlist = root->processed_tlist; + List *activeWindows = qp_extra->activeWindows; + + /* + * Calculate pathkeys that represent grouping/ordering requirements. The + * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not + * be, in which case we just leave their pathkeys empty. + */ + if (qp_extra->groupClause && + grouping_is_sortable(qp_extra->groupClause)) + root->group_pathkeys = + make_pathkeys_for_sortclauses(root, + qp_extra->groupClause, + tlist); + else + root->group_pathkeys = NIL; + + /* We consider only the first (bottom) window in pathkeys logic */ + if (activeWindows != NIL) + { + WindowClause *wc = linitial_node(WindowClause, activeWindows); + + root->window_pathkeys = make_pathkeys_for_window(root, + wc, + tlist); + } + else + root->window_pathkeys = NIL; + + if (parse->distinctClause && + grouping_is_sortable(parse->distinctClause)) + root->distinct_pathkeys = + make_pathkeys_for_sortclauses(root, + parse->distinctClause, + tlist); + else + root->distinct_pathkeys = NIL; + + root->sort_pathkeys = + make_pathkeys_for_sortclauses(root, + parse->sortClause, + tlist); + + /* + * Figure out whether we want a sorted result from query_planner. + * + * If we have a sortable GROUP BY clause, then we want a result sorted + * properly for grouping. Otherwise, if we have window functions to + * evaluate, we try to sort for the first window. Otherwise, if there's a + * sortable DISTINCT clause that's more rigorous than the ORDER BY clause, + * we try to produce output that's sufficiently well sorted for the + * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort + * by the ORDER BY clause. + * + * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset + * of GROUP BY, it would be tempting to request sort by ORDER BY --- but + * that might just leave us failing to exploit an available sort order at + * all. Needs more thought. The choice for DISTINCT versus ORDER BY is + * much easier, since we know that the parser ensured that one is a + * superset of the other. + */ + if (root->group_pathkeys) + root->query_pathkeys = root->group_pathkeys; + else if (root->window_pathkeys) + root->query_pathkeys = root->window_pathkeys; + else if (list_length(root->distinct_pathkeys) > + list_length(root->sort_pathkeys)) + root->query_pathkeys = root->distinct_pathkeys; + else if (root->sort_pathkeys) + root->query_pathkeys = root->sort_pathkeys; + else + root->query_pathkeys = NIL; +} + +/* + * Estimate number of groups produced by grouping clauses (1 if not grouping) + * + * path_rows: number of output rows from scan/join step + * gd: grouping sets data including list of grouping sets and their clauses + * target_list: target list containing group clause references + * + * If doing grouping sets, we also annotate the gsets data with the estimates + * for each set and each individual rollup list, with a view to later + * determining whether some combination of them could be hashed instead. + */ +static double +get_number_of_groups(PlannerInfo *root, + double path_rows, + grouping_sets_data *gd, + List *target_list) +{ + Query *parse = root->parse; + double dNumGroups; + + if (parse->groupClause) + { + List *groupExprs; + + if (parse->groupingSets) + { + /* Add up the estimates for each grouping set */ + ListCell *lc; + ListCell *lc2; + + Assert(gd); /* keep Coverity happy */ + + dNumGroups = 0; + + foreach(lc, gd->rollups) + { + RollupData *rollup = lfirst_node(RollupData, lc); + ListCell *lc; + + groupExprs = get_sortgrouplist_exprs(rollup->groupClause, + target_list); + + rollup->numGroups = 0.0; + + forboth(lc, rollup->gsets, lc2, rollup->gsets_data) + { + List *gset = (List *) lfirst(lc); + GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); + double numGroups = estimate_num_groups(root, + groupExprs, + path_rows, + &gset, + NULL); + + gs->numGroups = numGroups; + rollup->numGroups += numGroups; + } + + dNumGroups += rollup->numGroups; + } + + if (gd->hash_sets_idx) + { + ListCell *lc; + + gd->dNumHashGroups = 0; + + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + target_list); + + forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets) + { + List *gset = (List *) lfirst(lc); + GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); + double numGroups = estimate_num_groups(root, + groupExprs, + path_rows, + &gset, + NULL); + + gs->numGroups = numGroups; + gd->dNumHashGroups += numGroups; + } + + dNumGroups += gd->dNumHashGroups; + } + } + else + { + /* Plain GROUP BY */ + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + target_list); + + dNumGroups = estimate_num_groups(root, groupExprs, path_rows, + NULL, NULL); + } + } + else if (parse->groupingSets) + { + /* Empty grouping sets ... one result row for each one */ + dNumGroups = list_length(parse->groupingSets); + } + else if (parse->hasAggs || root->hasHavingQual) + { + /* Plain aggregation, one result row */ + dNumGroups = 1; + } + else + { + /* Not grouping */ + dNumGroups = 1; + } + + return dNumGroups; +} + +/* + * create_grouping_paths + * + * Build a new upperrel containing Paths for grouping and/or aggregation. + * Along the way, we also build an upperrel for Paths which are partially + * grouped and/or aggregated. A partially grouped and/or aggregated path + * needs a FinalizeAggregate node to complete the aggregation. Currently, + * the only partially grouped paths we build are also partial paths; that + * is, they need a Gather and then a FinalizeAggregate. + * + * input_rel: contains the source-data Paths + * target: the pathtarget for the result Paths to compute + * gd: grouping sets data including list of grouping sets and their clauses + * + * Note: all Paths in input_rel are expected to return the target computed + * by make_group_input_target. + */ +static RelOptInfo * +create_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + PathTarget *target, + bool target_parallel_safe, + grouping_sets_data *gd) +{ + Query *parse = root->parse; + RelOptInfo *grouped_rel; + RelOptInfo *partially_grouped_rel; + AggClauseCosts agg_costs; + + MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); + get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs); + + /* + * Create grouping relation to hold fully aggregated grouping and/or + * aggregation paths. + */ + grouped_rel = make_grouping_rel(root, input_rel, target, + target_parallel_safe, parse->havingQual); + + /* + * Create either paths for a degenerate grouping or paths for ordinary + * grouping, as appropriate. + */ + if (is_degenerate_grouping(root)) + create_degenerate_grouping_paths(root, input_rel, grouped_rel); + else + { + int flags = 0; + GroupPathExtraData extra; + + /* + * Determine whether it's possible to perform sort-based + * implementations of grouping. (Note that if groupClause is empty, + * grouping_is_sortable() is trivially true, and all the + * pathkeys_contained_in() tests will succeed too, so that we'll + * consider every surviving input path.) + * + * If we have grouping sets, we might be able to sort some but not all + * of them; in this case, we need can_sort to be true as long as we + * must consider any sorted-input plan. + */ + if ((gd && gd->rollups != NIL) + || grouping_is_sortable(parse->groupClause)) + flags |= GROUPING_CAN_USE_SORT; + + /* + * Determine whether we should consider hash-based implementations of + * grouping. + * + * Hashed aggregation only applies if we're grouping. If we have + * grouping sets, some groups might be hashable but others not; in + * this case we set can_hash true as long as there is nothing globally + * preventing us from hashing (and we should therefore consider plans + * with hashes). + * + * Executor doesn't support hashed aggregation with DISTINCT or ORDER + * BY aggregates. (Doing so would imply storing *all* the input + * values in the hash table, and/or running many sorts in parallel, + * either of which seems like a certain loser.) We similarly don't + * support ordered-set aggregates in hashed aggregation, but that case + * is also included in the numOrderedAggs count. + * + * Note: grouping_is_hashable() is much more expensive to check than + * the other gating conditions, so we want to do it last. + */ + if ((parse->groupClause != NIL && + root->numOrderedAggs == 0 && + (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)))) + flags |= GROUPING_CAN_USE_HASH; + + /* + * Determine whether partial aggregation is possible. + */ + if (can_partial_agg(root)) + flags |= GROUPING_CAN_PARTIAL_AGG; + + extra.flags = flags; + extra.target_parallel_safe = target_parallel_safe; + extra.havingQual = parse->havingQual; + extra.targetList = parse->targetList; + extra.partial_costs_set = false; + + /* + * Determine whether partitionwise aggregation is in theory possible. + * It can be disabled by the user, and for now, we don't try to + * support grouping sets. create_ordinary_grouping_paths() will check + * additional conditions, such as whether input_rel is partitioned. + */ + if (enable_partitionwise_aggregate && !parse->groupingSets) + extra.patype = PARTITIONWISE_AGGREGATE_FULL; + else + extra.patype = PARTITIONWISE_AGGREGATE_NONE; + + create_ordinary_grouping_paths(root, input_rel, grouped_rel, + &agg_costs, gd, &extra, + &partially_grouped_rel); + } + + set_cheapest(grouped_rel); + return grouped_rel; +} + +/* + * make_grouping_rel + * + * Create a new grouping rel and set basic properties. + * + * input_rel represents the underlying scan/join relation. + * target is the output expected from the grouping relation. + */ +static RelOptInfo * +make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, + PathTarget *target, bool target_parallel_safe, + Node *havingQual) +{ + RelOptInfo *grouped_rel; + + if (IS_OTHER_REL(input_rel)) + { + grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, + input_rel->relids); + grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL; + } + else + { + /* + * By tradition, the relids set for the main grouping relation is + * NULL. (This could be changed, but might require adjustments + * elsewhere.) + */ + grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); + } + + /* Set target. */ + grouped_rel->reltarget = target; + + /* + * If the input relation is not parallel-safe, then the grouped relation + * can't be parallel-safe, either. Otherwise, it's parallel-safe if the + * target list and HAVING quals are parallel-safe. + */ + if (input_rel->consider_parallel && target_parallel_safe && + is_parallel_safe(root, (Node *) havingQual)) + grouped_rel->consider_parallel = true; + + /* + * If the input rel belongs to a single FDW, so does the grouped rel. + */ + grouped_rel->serverid = input_rel->serverid; + grouped_rel->userid = input_rel->userid; + grouped_rel->useridiscurrent = input_rel->useridiscurrent; + grouped_rel->fdwroutine = input_rel->fdwroutine; + + return grouped_rel; +} + +/* + * is_degenerate_grouping + * + * A degenerate grouping is one in which the query has a HAVING qual and/or + * grouping sets, but no aggregates and no GROUP BY (which implies that the + * grouping sets are all empty). + */ +static bool +is_degenerate_grouping(PlannerInfo *root) +{ + Query *parse = root->parse; + + return (root->hasHavingQual || parse->groupingSets) && + !parse->hasAggs && parse->groupClause == NIL; +} + +/* + * create_degenerate_grouping_paths + * + * When the grouping is degenerate (see is_degenerate_grouping), we are + * supposed to emit either zero or one row for each grouping set depending on + * whether HAVING succeeds. Furthermore, there cannot be any variables in + * either HAVING or the targetlist, so we actually do not need the FROM table + * at all! We can just throw away the plan-so-far and generate a Result node. + * This is a sufficiently unusual corner case that it's not worth contorting + * the structure of this module to avoid having to generate the earlier paths + * in the first place. + */ +static void +create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, + RelOptInfo *grouped_rel) +{ + Query *parse = root->parse; + int nrows; + Path *path; + + nrows = list_length(parse->groupingSets); + if (nrows > 1) + { + /* + * Doesn't seem worthwhile writing code to cons up a generate_series + * or a values scan to emit multiple rows. Instead just make N clones + * and append them. (With a volatile HAVING clause, this means you + * might get between 0 and N output rows. Offhand I think that's + * desired.) + */ + List *paths = NIL; + + while (--nrows >= 0) + { + path = (Path *) + create_group_result_path(root, grouped_rel, + grouped_rel->reltarget, + (List *) parse->havingQual); + paths = lappend(paths, path); + } + path = (Path *) + create_append_path(root, + grouped_rel, + paths, + NIL, + NIL, + NULL, + 0, + false, + -1); + } + else + { + /* No grouping sets, or just one, so one output row */ + path = (Path *) + create_group_result_path(root, grouped_rel, + grouped_rel->reltarget, + (List *) parse->havingQual); + } + + add_path(grouped_rel, path); +} + +/* + * create_ordinary_grouping_paths + * + * Create grouping paths for the ordinary (that is, non-degenerate) case. + * + * We need to consider sorted and hashed aggregation in the same function, + * because otherwise (1) it would be harder to throw an appropriate error + * message if neither way works, and (2) we should not allow hashtable size + * considerations to dissuade us from using hashing if sorting is not possible. + * + * *partially_grouped_rel_p will be set to the partially grouped rel which this + * function creates, or to NULL if it doesn't create one. + */ +static void +create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, + RelOptInfo *grouped_rel, + const AggClauseCosts *agg_costs, + grouping_sets_data *gd, + GroupPathExtraData *extra, + RelOptInfo **partially_grouped_rel_p) +{ + Path *cheapest_path = input_rel->cheapest_total_path; + RelOptInfo *partially_grouped_rel = NULL; + double dNumGroups; + PartitionwiseAggregateType patype = PARTITIONWISE_AGGREGATE_NONE; + + /* + * If this is the topmost grouping relation or if the parent relation is + * doing some form of partitionwise aggregation, then we may be able to do + * it at this level also. However, if the input relation is not + * partitioned, partitionwise aggregate is impossible. + */ + if (extra->patype != PARTITIONWISE_AGGREGATE_NONE && + IS_PARTITIONED_REL(input_rel)) + { + /* + * If this is the topmost relation or if the parent relation is doing + * full partitionwise aggregation, then we can do full partitionwise + * aggregation provided that the GROUP BY clause contains all of the + * partitioning columns at this level. Otherwise, we can do at most + * partial partitionwise aggregation. But if partial aggregation is + * not supported in general then we can't use it for partitionwise + * aggregation either. + */ + if (extra->patype == PARTITIONWISE_AGGREGATE_FULL && + group_by_has_partkey(input_rel, extra->targetList, + root->parse->groupClause)) + patype = PARTITIONWISE_AGGREGATE_FULL; + else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0) + patype = PARTITIONWISE_AGGREGATE_PARTIAL; + else + patype = PARTITIONWISE_AGGREGATE_NONE; + } + + /* + * Before generating paths for grouped_rel, we first generate any possible + * partially grouped paths; that way, later code can easily consider both + * parallel and non-parallel approaches to grouping. + */ + if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0) + { + bool force_rel_creation; + + /* + * If we're doing partitionwise aggregation at this level, force + * creation of a partially_grouped_rel so we can add partitionwise + * paths to it. + */ + force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL); + + partially_grouped_rel = + create_partial_grouping_paths(root, + grouped_rel, + input_rel, + gd, + extra, + force_rel_creation); + } + + /* Set out parameter. */ + *partially_grouped_rel_p = partially_grouped_rel; + + /* Apply partitionwise aggregation technique, if possible. */ + if (patype != PARTITIONWISE_AGGREGATE_NONE) + create_partitionwise_grouping_paths(root, input_rel, grouped_rel, + partially_grouped_rel, agg_costs, + gd, patype, extra); + + /* If we are doing partial aggregation only, return. */ + if (extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL) + { + Assert(partially_grouped_rel); + + if (partially_grouped_rel->pathlist) + set_cheapest(partially_grouped_rel); + + return; + } + + /* Gather any partially grouped partial paths. */ + if (partially_grouped_rel && partially_grouped_rel->partial_pathlist) + { + gather_grouping_paths(root, partially_grouped_rel); + set_cheapest(partially_grouped_rel); + } + + /* + * Estimate number of groups. + */ + dNumGroups = get_number_of_groups(root, + cheapest_path->rows, + gd, + extra->targetList); + + /* Build final grouping paths */ + add_paths_to_grouping_rel(root, input_rel, grouped_rel, + partially_grouped_rel, agg_costs, gd, + dNumGroups, extra); + + /* Give a helpful error if we failed to find any implementation */ + if (grouped_rel->pathlist == NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement GROUP BY"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + + /* + * If there is an FDW that's responsible for all baserels of the query, + * let it consider adding ForeignPaths. + */ + if (grouped_rel->fdwroutine && + grouped_rel->fdwroutine->GetForeignUpperPaths) + grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG, + input_rel, grouped_rel, + extra); + + /* Let extensions possibly add some more paths */ + if (create_upper_paths_hook) + (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG, + input_rel, grouped_rel, + extra); +} + +/* + * For a given input path, consider the possible ways of doing grouping sets on + * it, by combinations of hashing and sorting. This can be called multiple + * times, so it's important that it not scribble on input. No result is + * returned, but any generated paths are added to grouped_rel. + */ +static void +consider_groupingsets_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + Path *path, + bool is_sorted, + bool can_hash, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs, + double dNumGroups) +{ + Query *parse = root->parse; + Size hash_mem_limit = get_hash_memory_limit(); + + /* + * If we're not being offered sorted input, then only consider plans that + * can be done entirely by hashing. + * + * We can hash everything if it looks like it'll fit in hash_mem. But if + * the input is actually sorted despite not being advertised as such, we + * prefer to make use of that in order to use less memory. + * + * If none of the grouping sets are sortable, then ignore the hash_mem + * limit and generate a path anyway, since otherwise we'll just fail. + */ + if (!is_sorted) + { + List *new_rollups = NIL; + RollupData *unhashed_rollup = NULL; + List *sets_data; + List *empty_sets_data = NIL; + List *empty_sets = NIL; + ListCell *lc; + ListCell *l_start = list_head(gd->rollups); + AggStrategy strat = AGG_HASHED; + double hashsize; + double exclude_groups = 0.0; + + Assert(can_hash); + + /* + * If the input is coincidentally sorted usefully (which can happen + * even if is_sorted is false, since that only means that our caller + * has set up the sorting for us), then save some hashtable space by + * making use of that. But we need to watch out for degenerate cases: + * + * 1) If there are any empty grouping sets, then group_pathkeys might + * be NIL if all non-empty grouping sets are unsortable. In this case, + * there will be a rollup containing only empty groups, and the + * pathkeys_contained_in test is vacuously true; this is ok. + * + * XXX: the above relies on the fact that group_pathkeys is generated + * from the first rollup. If we add the ability to consider multiple + * sort orders for grouping input, this assumption might fail. + * + * 2) If there are no empty sets and only unsortable sets, then the + * rollups list will be empty (and thus l_start == NULL), and + * group_pathkeys will be NIL; we must ensure that the vacuously-true + * pathkeys_contained_in test doesn't cause us to crash. + */ + if (l_start != NULL && + pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) + { + unhashed_rollup = lfirst_node(RollupData, l_start); + exclude_groups = unhashed_rollup->numGroups; + l_start = lnext(gd->rollups, l_start); + } + + hashsize = estimate_hashagg_tablesize(root, + path, + agg_costs, + dNumGroups - exclude_groups); + + /* + * gd->rollups is empty if we have only unsortable columns to work + * with. Override hash_mem in that case; otherwise, we'll rely on the + * sorted-input case to generate usable mixed paths. + */ + if (hashsize > hash_mem_limit && gd->rollups) + return; /* nope, won't fit */ + + /* + * We need to burst the existing rollups list into individual grouping + * sets and recompute a groupClause for each set. + */ + sets_data = list_copy(gd->unsortable_sets); + + for_each_cell(lc, gd->rollups, l_start) + { + RollupData *rollup = lfirst_node(RollupData, lc); + + /* + * If we find an unhashable rollup that's not been skipped by the + * "actually sorted" check above, we can't cope; we'd need sorted + * input (with a different sort order) but we can't get that here. + * So bail out; we'll get a valid path from the is_sorted case + * instead. + * + * The mere presence of empty grouping sets doesn't make a rollup + * unhashable (see preprocess_grouping_sets), we handle those + * specially below. + */ + if (!rollup->hashable) + return; + + sets_data = list_concat(sets_data, rollup->gsets_data); + } + foreach(lc, sets_data) + { + GroupingSetData *gs = lfirst_node(GroupingSetData, lc); + List *gset = gs->set; + RollupData *rollup; + + if (gset == NIL) + { + /* Empty grouping sets can't be hashed. */ + empty_sets_data = lappend(empty_sets_data, gs); + empty_sets = lappend(empty_sets, NIL); + } + else + { + rollup = makeNode(RollupData); + + rollup->groupClause = preprocess_groupclause(root, gset); + rollup->gsets_data = list_make1(gs); + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + rollup->gsets_data, + gd->tleref_to_colnum_map); + rollup->numGroups = gs->numGroups; + rollup->hashable = true; + rollup->is_hashed = true; + new_rollups = lappend(new_rollups, rollup); + } + } + + /* + * If we didn't find anything nonempty to hash, then bail. We'll + * generate a path from the is_sorted case. + */ + if (new_rollups == NIL) + return; + + /* + * If there were empty grouping sets they should have been in the + * first rollup. + */ + Assert(!unhashed_rollup || !empty_sets); + + if (unhashed_rollup) + { + new_rollups = lappend(new_rollups, unhashed_rollup); + strat = AGG_MIXED; + } + else if (empty_sets) + { + RollupData *rollup = makeNode(RollupData); + + rollup->groupClause = NIL; + rollup->gsets_data = empty_sets_data; + rollup->gsets = empty_sets; + rollup->numGroups = list_length(empty_sets); + rollup->hashable = false; + rollup->is_hashed = false; + new_rollups = lappend(new_rollups, rollup); + strat = AGG_MIXED; + } + + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + (List *) parse->havingQual, + strat, + new_rollups, + agg_costs, + dNumGroups)); + return; + } + + /* + * If we have sorted input but nothing we can do with it, bail. + */ + if (list_length(gd->rollups) == 0) + return; + + /* + * Given sorted input, we try and make two paths: one sorted and one mixed + * sort/hash. (We need to try both because hashagg might be disabled, or + * some columns might not be sortable.) + * + * can_hash is passed in as false if some obstacle elsewhere (such as + * ordered aggs) means that we shouldn't consider hashing at all. + */ + if (can_hash && gd->any_hashable) + { + List *rollups = NIL; + List *hash_sets = list_copy(gd->unsortable_sets); + double availspace = hash_mem_limit; + ListCell *lc; + + /* + * Account first for space needed for groups we can't sort at all. + */ + availspace -= estimate_hashagg_tablesize(root, + path, + agg_costs, + gd->dNumHashGroups); + + if (availspace > 0 && list_length(gd->rollups) > 1) + { + double scale; + int num_rollups = list_length(gd->rollups); + int k_capacity; + int *k_weights = palloc(num_rollups * sizeof(int)); + Bitmapset *hash_items = NULL; + int i; + + /* + * We treat this as a knapsack problem: the knapsack capacity + * represents hash_mem, the item weights are the estimated memory + * usage of the hashtables needed to implement a single rollup, + * and we really ought to use the cost saving as the item value; + * however, currently the costs assigned to sort nodes don't + * reflect the comparison costs well, and so we treat all items as + * of equal value (each rollup we hash instead saves us one sort). + * + * To use the discrete knapsack, we need to scale the values to a + * reasonably small bounded range. We choose to allow a 5% error + * margin; we have no more than 4096 rollups in the worst possible + * case, which with a 5% error margin will require a bit over 42MB + * of workspace. (Anyone wanting to plan queries that complex had + * better have the memory for it. In more reasonable cases, with + * no more than a couple of dozen rollups, the memory usage will + * be negligible.) + * + * k_capacity is naturally bounded, but we clamp the values for + * scale and weight (below) to avoid overflows or underflows (or + * uselessly trying to use a scale factor less than 1 byte). + */ + scale = Max(availspace / (20.0 * num_rollups), 1.0); + k_capacity = (int) floor(availspace / scale); + + /* + * We leave the first rollup out of consideration since it's the + * one that matches the input sort order. We assign indexes "i" + * to only those entries considered for hashing; the second loop, + * below, must use the same condition. + */ + i = 0; + for_each_from(lc, gd->rollups, 1) + { + RollupData *rollup = lfirst_node(RollupData, lc); + + if (rollup->hashable) + { + double sz = estimate_hashagg_tablesize(root, + path, + agg_costs, + rollup->numGroups); + + /* + * If sz is enormous, but hash_mem (and hence scale) is + * small, avoid integer overflow here. + */ + k_weights[i] = (int) Min(floor(sz / scale), + k_capacity + 1.0); + ++i; + } + } + + /* + * Apply knapsack algorithm; compute the set of items which + * maximizes the value stored (in this case the number of sorts + * saved) while keeping the total size (approximately) within + * capacity. + */ + if (i > 0) + hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL); + + if (!bms_is_empty(hash_items)) + { + rollups = list_make1(linitial(gd->rollups)); + + i = 0; + for_each_from(lc, gd->rollups, 1) + { + RollupData *rollup = lfirst_node(RollupData, lc); + + if (rollup->hashable) + { + if (bms_is_member(i, hash_items)) + hash_sets = list_concat(hash_sets, + rollup->gsets_data); + else + rollups = lappend(rollups, rollup); + ++i; + } + else + rollups = lappend(rollups, rollup); + } + } + } + + if (!rollups && hash_sets) + rollups = list_copy(gd->rollups); + + foreach(lc, hash_sets) + { + GroupingSetData *gs = lfirst_node(GroupingSetData, lc); + RollupData *rollup = makeNode(RollupData); + + Assert(gs->set != NIL); + + rollup->groupClause = preprocess_groupclause(root, gs->set); + rollup->gsets_data = list_make1(gs); + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + rollup->gsets_data, + gd->tleref_to_colnum_map); + rollup->numGroups = gs->numGroups; + rollup->hashable = true; + rollup->is_hashed = true; + rollups = lcons(rollup, rollups); + } + + if (rollups) + { + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + (List *) parse->havingQual, + AGG_MIXED, + rollups, + agg_costs, + dNumGroups)); + } + } + + /* + * Now try the simple sorted case. + */ + if (!gd->unsortable_sets) + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + (List *) parse->havingQual, + AGG_SORTED, + gd->rollups, + agg_costs, + dNumGroups)); +} + +/* + * create_window_paths + * + * Build a new upperrel containing Paths for window-function evaluation. + * + * input_rel: contains the source-data Paths + * input_target: result of make_window_input_target + * output_target: what the topmost WindowAggPath should return + * wflists: result of find_window_functions + * activeWindows: result of select_active_windows + * + * Note: all Paths in input_rel are expected to return input_target. + */ +static RelOptInfo * +create_window_paths(PlannerInfo *root, + RelOptInfo *input_rel, + PathTarget *input_target, + PathTarget *output_target, + bool output_target_parallel_safe, + WindowFuncLists *wflists, + List *activeWindows) +{ + RelOptInfo *window_rel; + ListCell *lc; + + /* For now, do all work in the (WINDOW, NULL) upperrel */ + window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL); + + /* + * If the input relation is not parallel-safe, then the window relation + * can't be parallel-safe, either. Otherwise, we need to examine the + * target list and active windows for non-parallel-safe constructs. + */ + if (input_rel->consider_parallel && output_target_parallel_safe && + is_parallel_safe(root, (Node *) activeWindows)) + window_rel->consider_parallel = true; + + /* + * If the input rel belongs to a single FDW, so does the window rel. + */ + window_rel->serverid = input_rel->serverid; + window_rel->userid = input_rel->userid; + window_rel->useridiscurrent = input_rel->useridiscurrent; + window_rel->fdwroutine = input_rel->fdwroutine; + + /* + * Consider computing window functions starting from the existing + * cheapest-total path (which will likely require a sort) as well as any + * existing paths that satisfy or partially satisfy root->window_pathkeys. + */ + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + int presorted_keys; + + if (path == input_rel->cheapest_total_path || + pathkeys_count_contained_in(root->window_pathkeys, path->pathkeys, + &presorted_keys) || + presorted_keys > 0) + create_one_window_path(root, + window_rel, + path, + input_target, + output_target, + wflists, + activeWindows); + } + + /* + * If there is an FDW that's responsible for all baserels of the query, + * let it consider adding ForeignPaths. + */ + if (window_rel->fdwroutine && + window_rel->fdwroutine->GetForeignUpperPaths) + window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW, + input_rel, window_rel, + NULL); + + /* Let extensions possibly add some more paths */ + if (create_upper_paths_hook) + (*create_upper_paths_hook) (root, UPPERREL_WINDOW, + input_rel, window_rel, NULL); + + /* Now choose the best path(s) */ + set_cheapest(window_rel); + + return window_rel; +} + +/* + * Stack window-function implementation steps atop the given Path, and + * add the result to window_rel. + * + * window_rel: upperrel to contain result + * path: input Path to use (must return input_target) + * input_target: result of make_window_input_target + * output_target: what the topmost WindowAggPath should return + * wflists: result of find_window_functions + * activeWindows: result of select_active_windows + */ +static void +create_one_window_path(PlannerInfo *root, + RelOptInfo *window_rel, + Path *path, + PathTarget *input_target, + PathTarget *output_target, + WindowFuncLists *wflists, + List *activeWindows) +{ + PathTarget *window_target; + ListCell *l; + List *topqual = NIL; + + /* + * Since each window clause could require a different sort order, we stack + * up a WindowAgg node for each clause, with sort steps between them as + * needed. (We assume that select_active_windows chose a good order for + * executing the clauses in.) + * + * input_target should contain all Vars and Aggs needed for the result. + * (In some cases we wouldn't need to propagate all of these all the way + * to the top, since they might only be needed as inputs to WindowFuncs. + * It's probably not worth trying to optimize that though.) It must also + * contain all window partitioning and sorting expressions, to ensure + * they're computed only once at the bottom of the stack (that's critical + * for volatile functions). As we climb up the stack, we'll add outputs + * for the WindowFuncs computed at each level. + */ + window_target = input_target; + + foreach(l, activeWindows) + { + WindowClause *wc = lfirst_node(WindowClause, l); + List *window_pathkeys; + int presorted_keys; + bool is_sorted; + bool topwindow; + + window_pathkeys = make_pathkeys_for_window(root, + wc, + root->processed_tlist); + + is_sorted = pathkeys_count_contained_in(window_pathkeys, + path->pathkeys, + &presorted_keys); + + /* Sort if necessary */ + if (!is_sorted) + { + /* + * No presorted keys or incremental sort disabled, just perform a + * complete sort. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, window_rel, + path, + window_pathkeys, + -1.0); + else + { + /* + * Since we have presorted keys and incremental sort is + * enabled, just use incremental sort. + */ + path = (Path *) create_incremental_sort_path(root, + window_rel, + path, + window_pathkeys, + presorted_keys, + -1.0); + } + } + + if (lnext(activeWindows, l)) + { + /* + * Add the current WindowFuncs to the output target for this + * intermediate WindowAggPath. We must copy window_target to + * avoid changing the previous path's target. + * + * Note: a WindowFunc adds nothing to the target's eval costs; but + * we do need to account for the increase in tlist width. + */ + ListCell *lc2; + + window_target = copy_pathtarget(window_target); + foreach(lc2, wflists->windowFuncs[wc->winref]) + { + WindowFunc *wfunc = lfirst_node(WindowFunc, lc2); + + add_column_to_pathtarget(window_target, (Expr *) wfunc, 0); + window_target->width += get_typavgwidth(wfunc->wintype, -1); + } + } + else + { + /* Install the goal target in the topmost WindowAgg */ + window_target = output_target; + } + + /* mark the final item in the list as the top-level window */ + topwindow = foreach_current_index(l) == list_length(activeWindows) - 1; + + /* + * Accumulate all of the runConditions from each intermediate + * WindowClause. The top-level WindowAgg must pass these as a qual so + * that it filters out unwanted tuples correctly. + */ + if (!topwindow) + topqual = list_concat(topqual, wc->runCondition); + + path = (Path *) + create_windowagg_path(root, window_rel, path, window_target, + wflists->windowFuncs[wc->winref], + wc, topwindow ? topqual : NIL, topwindow); + } + + add_path(window_rel, path); +} + +/* + * create_distinct_paths + * + * Build a new upperrel containing Paths for SELECT DISTINCT evaluation. + * + * input_rel: contains the source-data Paths + * + * Note: input paths should already compute the desired pathtarget, since + * Sort/Unique won't project anything. + */ +static RelOptInfo * +create_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel) +{ + RelOptInfo *distinct_rel; + + /* For now, do all work in the (DISTINCT, NULL) upperrel */ + distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL); + + /* + * We don't compute anything at this level, so distinct_rel will be + * parallel-safe if the input rel is parallel-safe. In particular, if + * there is a DISTINCT ON (...) clause, any path for the input_rel will + * output those expressions, and will not be parallel-safe unless those + * expressions are parallel-safe. + */ + distinct_rel->consider_parallel = input_rel->consider_parallel; + + /* + * If the input rel belongs to a single FDW, so does the distinct_rel. + */ + distinct_rel->serverid = input_rel->serverid; + distinct_rel->userid = input_rel->userid; + distinct_rel->useridiscurrent = input_rel->useridiscurrent; + distinct_rel->fdwroutine = input_rel->fdwroutine; + + /* build distinct paths based on input_rel's pathlist */ + create_final_distinct_paths(root, input_rel, distinct_rel); + + /* now build distinct paths based on input_rel's partial_pathlist */ + create_partial_distinct_paths(root, input_rel, distinct_rel); + + /* Give a helpful error if we failed to create any paths */ + if (distinct_rel->pathlist == NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement DISTINCT"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + + /* + * If there is an FDW that's responsible for all baserels of the query, + * let it consider adding ForeignPaths. + */ + if (distinct_rel->fdwroutine && + distinct_rel->fdwroutine->GetForeignUpperPaths) + distinct_rel->fdwroutine->GetForeignUpperPaths(root, + UPPERREL_DISTINCT, + input_rel, + distinct_rel, + NULL); + + /* Let extensions possibly add some more paths */ + if (create_upper_paths_hook) + (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel, + distinct_rel, NULL); + + /* Now choose the best path(s) */ + set_cheapest(distinct_rel); + + return distinct_rel; +} + +/* + * create_partial_distinct_paths + * + * Process 'input_rel' partial paths and add unique/aggregate paths to the + * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge + * paths on top and add a final unique/aggregate path to remove any duplicate + * produced from combining rows from parallel workers. + */ +static void +create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, + RelOptInfo *final_distinct_rel) +{ + RelOptInfo *partial_distinct_rel; + Query *parse; + List *distinctExprs; + double numDistinctRows; + Path *cheapest_partial_path; + ListCell *lc; + + /* nothing to do when there are no partial paths in the input rel */ + if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL) + return; + + parse = root->parse; + + /* can't do parallel DISTINCT ON */ + if (parse->hasDistinctOn) + return; + + partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT, + NULL); + partial_distinct_rel->reltarget = root->upper_targets[UPPERREL_PARTIAL_DISTINCT]; + partial_distinct_rel->consider_parallel = input_rel->consider_parallel; + + /* + * If input_rel belongs to a single FDW, so does the partial_distinct_rel. + */ + partial_distinct_rel->serverid = input_rel->serverid; + partial_distinct_rel->userid = input_rel->userid; + partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent; + partial_distinct_rel->fdwroutine = input_rel->fdwroutine; + + cheapest_partial_path = linitial(input_rel->partial_pathlist); + + distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, + parse->targetList); + + /* estimate how many distinct rows we'll get from each worker */ + numDistinctRows = estimate_num_groups(root, distinctExprs, + cheapest_partial_path->rows, + NULL, NULL); + + /* first try adding unique paths atop of sorted paths */ + if (grouping_is_sortable(parse->distinctClause)) + { + foreach(lc, input_rel->partial_pathlist) + { + Path *path = (Path *) lfirst(lc); + + if (pathkeys_contained_in(root->distinct_pathkeys, path->pathkeys)) + { + add_partial_path(partial_distinct_rel, (Path *) + create_upper_unique_path(root, + partial_distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } + } + } + + /* + * Now try hash aggregate paths, if enabled and hashing is possible. Since + * we're not on the hook to ensure we do our best to create at least one + * path here, we treat enable_hashagg as a hard off-switch rather than the + * slightly softer variant in create_final_distinct_paths. + */ + if (enable_hashagg && grouping_is_hashable(parse->distinctClause)) + { + add_partial_path(partial_distinct_rel, (Path *) + create_agg_path(root, + partial_distinct_rel, + cheapest_partial_path, + cheapest_partial_path->pathtarget, + AGG_HASHED, + AGGSPLIT_SIMPLE, + parse->distinctClause, + NIL, + NULL, + numDistinctRows)); + } + + /* + * If there is an FDW that's responsible for all baserels of the query, + * let it consider adding ForeignPaths. + */ + if (partial_distinct_rel->fdwroutine && + partial_distinct_rel->fdwroutine->GetForeignUpperPaths) + partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root, + UPPERREL_PARTIAL_DISTINCT, + input_rel, + partial_distinct_rel, + NULL); + + /* Let extensions possibly add some more partial paths */ + if (create_upper_paths_hook) + (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT, + input_rel, partial_distinct_rel, NULL); + + if (partial_distinct_rel->partial_pathlist != NIL) + { + generate_gather_paths(root, partial_distinct_rel, true); + set_cheapest(partial_distinct_rel); + + /* + * Finally, create paths to distinctify the final result. This step + * is needed to remove any duplicates due to combining rows from + * parallel workers. + */ + create_final_distinct_paths(root, partial_distinct_rel, + final_distinct_rel); + } +} + +/* + * create_final_distinct_paths + * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist + * + * input_rel: contains the source-data paths + * distinct_rel: destination relation for storing created paths + */ +static RelOptInfo * +create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, + RelOptInfo *distinct_rel) +{ + Query *parse = root->parse; + Path *cheapest_input_path = input_rel->cheapest_total_path; + double numDistinctRows; + bool allow_hash; + Path *path; + ListCell *lc; + + /* Estimate number of distinct rows there will be */ + if (parse->groupClause || parse->groupingSets || parse->hasAggs || + root->hasHavingQual) + { + /* + * If there was grouping or aggregation, use the number of input rows + * as the estimated number of DISTINCT rows (ie, assume the input is + * already mostly unique). + */ + numDistinctRows = cheapest_input_path->rows; + } + else + { + /* + * Otherwise, the UNIQUE filter has effects comparable to GROUP BY. + */ + List *distinctExprs; + + distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, + parse->targetList); + numDistinctRows = estimate_num_groups(root, distinctExprs, + cheapest_input_path->rows, + NULL, NULL); + } + + /* + * Consider sort-based implementations of DISTINCT, if possible. + */ + if (grouping_is_sortable(parse->distinctClause)) + { + /* + * First, if we have any adequately-presorted paths, just stick a + * Unique node on those. Then consider doing an explicit sort of the + * cheapest input path and Unique'ing that. + * + * When we have DISTINCT ON, we must sort by the more rigorous of + * DISTINCT and ORDER BY, else it won't have the desired behavior. + * Also, if we do have to do an explicit sort, we might as well use + * the more rigorous ordering to avoid a second sort later. (Note + * that the parser will have ensured that one clause is a prefix of + * the other.) + */ + List *needed_pathkeys; + + if (parse->hasDistinctOn && + list_length(root->distinct_pathkeys) < + list_length(root->sort_pathkeys)) + needed_pathkeys = root->sort_pathkeys; + else + needed_pathkeys = root->distinct_pathkeys; + + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + if (pathkeys_contained_in(needed_pathkeys, path->pathkeys)) + { + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } + } + + /* For explicit-sort case, always use the more rigorous clause */ + if (list_length(root->distinct_pathkeys) < + list_length(root->sort_pathkeys)) + { + needed_pathkeys = root->sort_pathkeys; + /* Assert checks that parser didn't mess up... */ + Assert(pathkeys_contained_in(root->distinct_pathkeys, + needed_pathkeys)); + } + else + needed_pathkeys = root->distinct_pathkeys; + + path = cheapest_input_path; + if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys)) + path = (Path *) create_sort_path(root, distinct_rel, + path, + needed_pathkeys, + -1.0); + + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } + + /* + * Consider hash-based implementations of DISTINCT, if possible. + * + * If we were not able to make any other types of path, we *must* hash or + * die trying. If we do have other choices, there are two things that + * should prevent selection of hashing: if the query uses DISTINCT ON + * (because it won't really have the expected behavior if we hash), or if + * enable_hashagg is off. + * + * Note: grouping_is_hashable() is much more expensive to check than the + * other gating conditions, so we want to do it last. + */ + if (distinct_rel->pathlist == NIL) + allow_hash = true; /* we have no alternatives */ + else if (parse->hasDistinctOn || !enable_hashagg) + allow_hash = false; /* policy-based decision not to hash */ + else + allow_hash = true; /* default */ + + if (allow_hash && grouping_is_hashable(parse->distinctClause)) + { + /* Generate hashed aggregate path --- no sort needed */ + add_path(distinct_rel, (Path *) + create_agg_path(root, + distinct_rel, + cheapest_input_path, + cheapest_input_path->pathtarget, + AGG_HASHED, + AGGSPLIT_SIMPLE, + parse->distinctClause, + NIL, + NULL, + numDistinctRows)); + } + + return distinct_rel; +} + +/* + * create_ordered_paths + * + * Build a new upperrel containing Paths for ORDER BY evaluation. + * + * All paths in the result must satisfy the ORDER BY ordering. + * The only new paths we need consider are an explicit full sort + * and incremental sort on the cheapest-total existing path. + * + * input_rel: contains the source-data Paths + * target: the output tlist the result Paths must emit + * limit_tuples: estimated bound on the number of output tuples, + * or -1 if no LIMIT or couldn't estimate + * + * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the + * other pathkeys (grouping, ...) like generate_useful_gather_paths. + */ +static RelOptInfo * +create_ordered_paths(PlannerInfo *root, + RelOptInfo *input_rel, + PathTarget *target, + bool target_parallel_safe, + double limit_tuples) +{ + Path *cheapest_input_path = input_rel->cheapest_total_path; + RelOptInfo *ordered_rel; + ListCell *lc; + + /* For now, do all work in the (ORDERED, NULL) upperrel */ + ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL); + + /* + * If the input relation is not parallel-safe, then the ordered relation + * can't be parallel-safe, either. Otherwise, it's parallel-safe if the + * target list is parallel-safe. + */ + if (input_rel->consider_parallel && target_parallel_safe) + ordered_rel->consider_parallel = true; + + /* + * If the input rel belongs to a single FDW, so does the ordered_rel. + */ + ordered_rel->serverid = input_rel->serverid; + ordered_rel->userid = input_rel->userid; + ordered_rel->useridiscurrent = input_rel->useridiscurrent; + ordered_rel->fdwroutine = input_rel->fdwroutine; + + foreach(lc, input_rel->pathlist) + { + Path *input_path = (Path *) lfirst(lc); + Path *sorted_path = input_path; + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_count_contained_in(root->sort_pathkeys, + input_path->pathkeys, &presorted_keys); + + if (is_sorted) + { + /* Use the input path as is, but add a projection step if needed */ + if (sorted_path->pathtarget != target) + sorted_path = apply_projection_to_path(root, ordered_rel, + sorted_path, target); + + add_path(ordered_rel, sorted_path); + } + else + { + /* + * Try adding an explicit sort, but only to the cheapest total + * path since a full sort should generally add the same cost to + * all paths. + */ + if (input_path == cheapest_input_path) + { + /* + * Sort the cheapest input path. An explicit sort here can + * take advantage of LIMIT. + */ + sorted_path = (Path *) create_sort_path(root, + ordered_rel, + input_path, + root->sort_pathkeys, + limit_tuples); + /* Add projection step if needed */ + if (sorted_path->pathtarget != target) + sorted_path = apply_projection_to_path(root, ordered_rel, + sorted_path, target); + + add_path(ordered_rel, sorted_path); + } + + /* + * If incremental sort is enabled, then try it as well. Unlike + * with regular sorts, we can't just look at the cheapest path, + * because the cost of incremental sort depends on how well + * presorted the path is. Additionally incremental sort may enable + * a cheaper startup path to win out despite higher total cost. + */ + if (!enable_incremental_sort) + continue; + + /* Likewise, if the path can't be used for incremental sort. */ + if (!presorted_keys) + continue; + + /* Also consider incremental sort. */ + sorted_path = (Path *) create_incremental_sort_path(root, + ordered_rel, + input_path, + root->sort_pathkeys, + presorted_keys, + limit_tuples); + + /* Add projection step if needed */ + if (sorted_path->pathtarget != target) + sorted_path = apply_projection_to_path(root, ordered_rel, + sorted_path, target); + + add_path(ordered_rel, sorted_path); + } + } + + /* + * generate_gather_paths() will have already generated a simple Gather + * path for the best parallel path, if any, and the loop above will have + * considered sorting it. Similarly, generate_gather_paths() will also + * have generated order-preserving Gather Merge plans which can be used + * without sorting if they happen to match the sort_pathkeys, and the loop + * above will have handled those as well. However, there's one more + * possibility: it may make sense to sort the cheapest partial path + * according to the required output order and then use Gather Merge. + */ + if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL && + input_rel->partial_pathlist != NIL) + { + Path *cheapest_partial_path; + + cheapest_partial_path = linitial(input_rel->partial_pathlist); + + /* + * If cheapest partial path doesn't need a sort, this is redundant + * with what's already been tried. + */ + if (!pathkeys_contained_in(root->sort_pathkeys, + cheapest_partial_path->pathkeys)) + { + Path *path; + double total_groups; + + path = (Path *) create_sort_path(root, + ordered_rel, + cheapest_partial_path, + root->sort_pathkeys, + limit_tuples); + + total_groups = cheapest_partial_path->rows * + cheapest_partial_path->parallel_workers; + path = (Path *) + create_gather_merge_path(root, ordered_rel, + path, + path->pathtarget, + root->sort_pathkeys, NULL, + &total_groups); + + /* Add projection step if needed */ + if (path->pathtarget != target) + path = apply_projection_to_path(root, ordered_rel, + path, target); + + add_path(ordered_rel, path); + } + + /* + * Consider incremental sort with a gather merge on partial paths. + * + * We can also skip the entire loop when we only have a single-item + * sort_pathkeys because then we can't possibly have a presorted + * prefix of the list without having the list be fully sorted. + */ + if (enable_incremental_sort && list_length(root->sort_pathkeys) > 1) + { + ListCell *lc; + + foreach(lc, input_rel->partial_pathlist) + { + Path *input_path = (Path *) lfirst(lc); + Path *sorted_path; + bool is_sorted; + int presorted_keys; + double total_groups; + + /* + * We don't care if this is the cheapest partial path - we + * can't simply skip it, because it may be partially sorted in + * which case we want to consider adding incremental sort + * (instead of full sort, which is what happens above). + */ + + is_sorted = pathkeys_count_contained_in(root->sort_pathkeys, + input_path->pathkeys, + &presorted_keys); + + /* No point in adding incremental sort on fully sorted paths. */ + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + /* Since we have presorted keys, consider incremental sort. */ + sorted_path = (Path *) create_incremental_sort_path(root, + ordered_rel, + input_path, + root->sort_pathkeys, + presorted_keys, + limit_tuples); + total_groups = input_path->rows * + input_path->parallel_workers; + sorted_path = (Path *) + create_gather_merge_path(root, ordered_rel, + sorted_path, + sorted_path->pathtarget, + root->sort_pathkeys, NULL, + &total_groups); + + /* Add projection step if needed */ + if (sorted_path->pathtarget != target) + sorted_path = apply_projection_to_path(root, ordered_rel, + sorted_path, target); + + add_path(ordered_rel, sorted_path); + } + } + } + + /* + * If there is an FDW that's responsible for all baserels of the query, + * let it consider adding ForeignPaths. + */ + if (ordered_rel->fdwroutine && + ordered_rel->fdwroutine->GetForeignUpperPaths) + ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED, + input_rel, ordered_rel, + NULL); + + /* Let extensions possibly add some more paths */ + if (create_upper_paths_hook) + (*create_upper_paths_hook) (root, UPPERREL_ORDERED, + input_rel, ordered_rel, NULL); + + /* + * No need to bother with set_cheapest here; grouping_planner does not + * need us to do it. + */ + Assert(ordered_rel->pathlist != NIL); + + return ordered_rel; +} + + +/* + * make_group_input_target + * Generate appropriate PathTarget for initial input to grouping nodes. + * + * If there is grouping or aggregation, the scan/join subplan cannot emit + * the query's final targetlist; for example, it certainly can't emit any + * aggregate function calls. This routine generates the correct target + * for the scan/join subplan. + * + * The query target list passed from the parser already contains entries + * for all ORDER BY and GROUP BY expressions, but it will not have entries + * for variables used only in HAVING clauses; so we need to add those + * variables to the subplan target list. Also, we flatten all expressions + * except GROUP BY items into their component variables; other expressions + * will be computed by the upper plan nodes rather than by the subplan. + * For example, given a query like + * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b; + * we want to pass this targetlist to the subplan: + * a+b,c,d + * where the a+b target will be used by the Sort/Group steps, and the + * other targets will be used for computing the final results. + * + * 'final_target' is the query's final target list (in PathTarget form) + * + * The result is the PathTarget to be computed by the Paths returned from + * query_planner(). + */ +static PathTarget * +make_group_input_target(PlannerInfo *root, PathTarget *final_target) +{ + Query *parse = root->parse; + PathTarget *input_target; + List *non_group_cols; + List *non_group_vars; + int i; + ListCell *lc; + + /* + * We must build a target containing all grouping columns, plus any other + * Vars mentioned in the query's targetlist and HAVING qual. + */ + input_target = create_empty_pathtarget(); + non_group_cols = NIL; + + i = 0; + foreach(lc, final_target->exprs) + { + Expr *expr = (Expr *) lfirst(lc); + Index sgref = get_pathtarget_sortgroupref(final_target, i); + + if (sgref && parse->groupClause && + get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL) + { + /* + * It's a grouping column, so add it to the input target as-is. + */ + add_column_to_pathtarget(input_target, expr, sgref); + } + else + { + /* + * Non-grouping column, so just remember the expression for later + * call to pull_var_clause. + */ + non_group_cols = lappend(non_group_cols, expr); + } + + i++; + } + + /* + * If there's a HAVING clause, we'll need the Vars it uses, too. + */ + if (parse->havingQual) + non_group_cols = lappend(non_group_cols, parse->havingQual); + + /* + * Pull out all the Vars mentioned in non-group cols (plus HAVING), and + * add them to the input target if not already present. (A Var used + * directly as a GROUP BY item will be present already.) Note this + * includes Vars used in resjunk items, so we are covering the needs of + * ORDER BY and window specifications. Vars used within Aggrefs and + * WindowFuncs will be pulled out here, too. + */ + non_group_vars = pull_var_clause((Node *) non_group_cols, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + add_new_columns_to_pathtarget(input_target, non_group_vars); + + /* clean up cruft */ + list_free(non_group_vars); + list_free(non_group_cols); + + /* XXX this causes some redundant cost calculation ... */ + return set_pathtarget_cost_width(root, input_target); +} + +/* + * make_partial_grouping_target + * Generate appropriate PathTarget for output of partial aggregate + * (or partial grouping, if there are no aggregates) nodes. + * + * A partial aggregation node needs to emit all the same aggregates that + * a regular aggregation node would, plus any aggregates used in HAVING; + * except that the Aggref nodes should be marked as partial aggregates. + * + * In addition, we'd better emit any Vars and PlaceHolderVars that are + * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably, + * these would be Vars that are grouped by or used in grouping expressions.) + * + * grouping_target is the tlist to be emitted by the topmost aggregation step. + * havingQual represents the HAVING clause. + */ +static PathTarget * +make_partial_grouping_target(PlannerInfo *root, + PathTarget *grouping_target, + Node *havingQual) +{ + Query *parse = root->parse; + PathTarget *partial_target; + List *non_group_cols; + List *non_group_exprs; + int i; + ListCell *lc; + + partial_target = create_empty_pathtarget(); + non_group_cols = NIL; + + i = 0; + foreach(lc, grouping_target->exprs) + { + Expr *expr = (Expr *) lfirst(lc); + Index sgref = get_pathtarget_sortgroupref(grouping_target, i); + + if (sgref && parse->groupClause && + get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL) + { + /* + * It's a grouping column, so add it to the partial_target as-is. + * (This allows the upper agg step to repeat the grouping calcs.) + */ + add_column_to_pathtarget(partial_target, expr, sgref); + } + else + { + /* + * Non-grouping column, so just remember the expression for later + * call to pull_var_clause. + */ + non_group_cols = lappend(non_group_cols, expr); + } + + i++; + } + + /* + * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too. + */ + if (havingQual) + non_group_cols = lappend(non_group_cols, havingQual); + + /* + * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in + * non-group cols (plus HAVING), and add them to the partial_target if not + * already present. (An expression used directly as a GROUP BY item will + * be present already.) Note this includes Vars used in resjunk items, so + * we are covering the needs of ORDER BY and window specifications. + */ + non_group_exprs = pull_var_clause((Node *) non_group_cols, + PVC_INCLUDE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + + add_new_columns_to_pathtarget(partial_target, non_group_exprs); + + /* + * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs + * are at the top level of the target list, so we can just scan the list + * rather than recursing through the expression trees. + */ + foreach(lc, partial_target->exprs) + { + Aggref *aggref = (Aggref *) lfirst(lc); + + if (IsA(aggref, Aggref)) + { + Aggref *newaggref; + + /* + * We shouldn't need to copy the substructure of the Aggref node, + * but flat-copy the node itself to avoid damaging other trees. + */ + newaggref = makeNode(Aggref); + memcpy(newaggref, aggref, sizeof(Aggref)); + + /* For now, assume serialization is required */ + mark_partial_aggref(newaggref, AGGSPLIT_INITIAL_SERIAL); + + lfirst(lc) = newaggref; + } + } + + /* clean up cruft */ + list_free(non_group_exprs); + list_free(non_group_cols); + + /* XXX this causes some redundant cost calculation ... */ + return set_pathtarget_cost_width(root, partial_target); +} + +/* + * mark_partial_aggref + * Adjust an Aggref to make it represent a partial-aggregation step. + * + * The Aggref node is modified in-place; caller must do any copying required. + */ +void +mark_partial_aggref(Aggref *agg, AggSplit aggsplit) +{ + /* aggtranstype should be computed by this point */ + Assert(OidIsValid(agg->aggtranstype)); + /* ... but aggsplit should still be as the parser left it */ + Assert(agg->aggsplit == AGGSPLIT_SIMPLE); + + /* Mark the Aggref with the intended partial-aggregation mode */ + agg->aggsplit = aggsplit; + + /* + * Adjust result type if needed. Normally, a partial aggregate returns + * the aggregate's transition type; but if that's INTERNAL and we're + * serializing, it returns BYTEA instead. + */ + if (DO_AGGSPLIT_SKIPFINAL(aggsplit)) + { + if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit)) + agg->aggtype = BYTEAOID; + else + agg->aggtype = agg->aggtranstype; + } +} + +/* + * postprocess_setop_tlist + * Fix up targetlist returned by plan_set_operations(). + * + * We need to transpose sort key info from the orig_tlist into new_tlist. + * NOTE: this would not be good enough if we supported resjunk sort keys + * for results of set operations --- then, we'd need to project a whole + * new tlist to evaluate the resjunk columns. For now, just ereport if we + * find any resjunk columns in orig_tlist. + */ +static List * +postprocess_setop_tlist(List *new_tlist, List *orig_tlist) +{ + ListCell *l; + ListCell *orig_tlist_item = list_head(orig_tlist); + + foreach(l, new_tlist) + { + TargetEntry *new_tle = lfirst_node(TargetEntry, l); + TargetEntry *orig_tle; + + /* ignore resjunk columns in setop result */ + if (new_tle->resjunk) + continue; + + Assert(orig_tlist_item != NULL); + orig_tle = lfirst_node(TargetEntry, orig_tlist_item); + orig_tlist_item = lnext(orig_tlist, orig_tlist_item); + if (orig_tle->resjunk) /* should not happen */ + elog(ERROR, "resjunk output columns are not implemented"); + Assert(new_tle->resno == orig_tle->resno); + new_tle->ressortgroupref = orig_tle->ressortgroupref; + } + if (orig_tlist_item != NULL) + elog(ERROR, "resjunk output columns are not implemented"); + return new_tlist; +} + +/* + * select_active_windows + * Create a list of the "active" window clauses (ie, those referenced + * by non-deleted WindowFuncs) in the order they are to be executed. + */ +static List * +select_active_windows(PlannerInfo *root, WindowFuncLists *wflists) +{ + List *windowClause = root->parse->windowClause; + List *result = NIL; + ListCell *lc; + int nActive = 0; + WindowClauseSortData *actives = palloc(sizeof(WindowClauseSortData) + * list_length(windowClause)); + + /* First, construct an array of the active windows */ + foreach(lc, windowClause) + { + WindowClause *wc = lfirst_node(WindowClause, lc); + + /* It's only active if wflists shows some related WindowFuncs */ + Assert(wc->winref <= wflists->maxWinRef); + if (wflists->windowFuncs[wc->winref] == NIL) + continue; + + actives[nActive].wc = wc; /* original clause */ + + /* + * For sorting, we want the list of partition keys followed by the + * list of sort keys. But pathkeys construction will remove duplicates + * between the two, so we can as well (even though we can't detect all + * of the duplicates, since some may come from ECs - that might mean + * we miss optimization chances here). We must, however, ensure that + * the order of entries is preserved with respect to the ones we do + * keep. + * + * partitionClause and orderClause had their own duplicates removed in + * parse analysis, so we're only concerned here with removing + * orderClause entries that also appear in partitionClause. + */ + actives[nActive].uniqueOrder = + list_concat_unique(list_copy(wc->partitionClause), + wc->orderClause); + nActive++; + } + + /* + * Sort active windows by their partitioning/ordering clauses, ignoring + * any framing clauses, so that the windows that need the same sorting are + * adjacent in the list. When we come to generate paths, this will avoid + * inserting additional Sort nodes. + * + * This is how we implement a specific requirement from the SQL standard, + * which says that when two or more windows are order-equivalent (i.e. + * have matching partition and order clauses, even if their names or + * framing clauses differ), then all peer rows must be presented in the + * same order in all of them. If we allowed multiple sort nodes for such + * cases, we'd risk having the peer rows end up in different orders in + * equivalent windows due to sort instability. (See General Rule 4 of + * <window clause> in SQL2008 - SQL2016.) + * + * Additionally, if the entire list of clauses of one window is a prefix + * of another, put first the window with stronger sorting requirements. + * This way we will first sort for stronger window, and won't have to sort + * again for the weaker one. + */ + qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp); + + /* build ordered list of the original WindowClause nodes */ + for (int i = 0; i < nActive; i++) + result = lappend(result, actives[i].wc); + + pfree(actives); + + return result; +} + +/* + * common_prefix_cmp + * QSort comparison function for WindowClauseSortData + * + * Sort the windows by the required sorting clauses. First, compare the sort + * clauses themselves. Second, if one window's clauses are a prefix of another + * one's clauses, put the window with more sort clauses first. + */ +static int +common_prefix_cmp(const void *a, const void *b) +{ + const WindowClauseSortData *wcsa = a; + const WindowClauseSortData *wcsb = b; + ListCell *item_a; + ListCell *item_b; + + forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder) + { + SortGroupClause *sca = lfirst_node(SortGroupClause, item_a); + SortGroupClause *scb = lfirst_node(SortGroupClause, item_b); + + if (sca->tleSortGroupRef > scb->tleSortGroupRef) + return -1; + else if (sca->tleSortGroupRef < scb->tleSortGroupRef) + return 1; + else if (sca->sortop > scb->sortop) + return -1; + else if (sca->sortop < scb->sortop) + return 1; + else if (sca->nulls_first && !scb->nulls_first) + return -1; + else if (!sca->nulls_first && scb->nulls_first) + return 1; + /* no need to compare eqop, since it is fully determined by sortop */ + } + + if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder)) + return -1; + else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder)) + return 1; + + return 0; +} + +/* + * make_window_input_target + * Generate appropriate PathTarget for initial input to WindowAgg nodes. + * + * When the query has window functions, this function computes the desired + * target to be computed by the node just below the first WindowAgg. + * This tlist must contain all values needed to evaluate the window functions, + * compute the final target list, and perform any required final sort step. + * If multiple WindowAggs are needed, each intermediate one adds its window + * function results onto this base tlist; only the topmost WindowAgg computes + * the actual desired target list. + * + * This function is much like make_group_input_target, though not quite enough + * like it to share code. As in that function, we flatten most expressions + * into their component variables. But we do not want to flatten window + * PARTITION BY/ORDER BY clauses, since that might result in multiple + * evaluations of them, which would be bad (possibly even resulting in + * inconsistent answers, if they contain volatile functions). + * Also, we must not flatten GROUP BY clauses that were left unflattened by + * make_group_input_target, because we may no longer have access to the + * individual Vars in them. + * + * Another key difference from make_group_input_target is that we don't + * flatten Aggref expressions, since those are to be computed below the + * window functions and just referenced like Vars above that. + * + * 'final_target' is the query's final target list (in PathTarget form) + * 'activeWindows' is the list of active windows previously identified by + * select_active_windows. + * + * The result is the PathTarget to be computed by the plan node immediately + * below the first WindowAgg node. + */ +static PathTarget * +make_window_input_target(PlannerInfo *root, + PathTarget *final_target, + List *activeWindows) +{ + Query *parse = root->parse; + PathTarget *input_target; + Bitmapset *sgrefs; + List *flattenable_cols; + List *flattenable_vars; + int i; + ListCell *lc; + + Assert(parse->hasWindowFuncs); + + /* + * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses + * into a bitmapset for convenient reference below. + */ + sgrefs = NULL; + foreach(lc, activeWindows) + { + WindowClause *wc = lfirst_node(WindowClause, lc); + ListCell *lc2; + + foreach(lc2, wc->partitionClause) + { + SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2); + + sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef); + } + foreach(lc2, wc->orderClause) + { + SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2); + + sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef); + } + } + + /* Add in sortgroupref numbers of GROUP BY clauses, too */ + foreach(lc, parse->groupClause) + { + SortGroupClause *grpcl = lfirst_node(SortGroupClause, lc); + + sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef); + } + + /* + * Construct a target containing all the non-flattenable targetlist items, + * and save aside the others for a moment. + */ + input_target = create_empty_pathtarget(); + flattenable_cols = NIL; + + i = 0; + foreach(lc, final_target->exprs) + { + Expr *expr = (Expr *) lfirst(lc); + Index sgref = get_pathtarget_sortgroupref(final_target, i); + + /* + * Don't want to deconstruct window clauses or GROUP BY items. (Note + * that such items can't contain window functions, so it's okay to + * compute them below the WindowAgg nodes.) + */ + if (sgref != 0 && bms_is_member(sgref, sgrefs)) + { + /* + * Don't want to deconstruct this value, so add it to the input + * target as-is. + */ + add_column_to_pathtarget(input_target, expr, sgref); + } + else + { + /* + * Column is to be flattened, so just remember the expression for + * later call to pull_var_clause. + */ + flattenable_cols = lappend(flattenable_cols, expr); + } + + i++; + } + + /* + * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and + * add them to the input target if not already present. (Some might be + * there already because they're used directly as window/group clauses.) + * + * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any + * Aggrefs are placed in the Agg node's tlist and not left to be computed + * at higher levels. On the other hand, we should recurse into + * WindowFuncs to make sure their input expressions are available. + */ + flattenable_vars = pull_var_clause((Node *) flattenable_cols, + PVC_INCLUDE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + add_new_columns_to_pathtarget(input_target, flattenable_vars); + + /* clean up cruft */ + list_free(flattenable_vars); + list_free(flattenable_cols); + + /* XXX this causes some redundant cost calculation ... */ + return set_pathtarget_cost_width(root, input_target); +} + +/* + * make_pathkeys_for_window + * Create a pathkeys list describing the required input ordering + * for the given WindowClause. + * + * The required ordering is first the PARTITION keys, then the ORDER keys. + * In the future we might try to implement windowing using hashing, in which + * case the ordering could be relaxed, but for now we always sort. + */ +static List * +make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, + List *tlist) +{ + List *window_pathkeys; + List *window_sortclauses; + + /* Throw error if can't sort */ + if (!grouping_is_sortable(wc->partitionClause)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement window PARTITION BY"), + errdetail("Window partitioning columns must be of sortable datatypes."))); + if (!grouping_is_sortable(wc->orderClause)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement window ORDER BY"), + errdetail("Window ordering columns must be of sortable datatypes."))); + + /* Okay, make the combined pathkeys */ + window_sortclauses = list_concat_copy(wc->partitionClause, wc->orderClause); + window_pathkeys = make_pathkeys_for_sortclauses(root, + window_sortclauses, + tlist); + list_free(window_sortclauses); + return window_pathkeys; +} + +/* + * make_sort_input_target + * Generate appropriate PathTarget for initial input to Sort step. + * + * If the query has ORDER BY, this function chooses the target to be computed + * by the node just below the Sort (and DISTINCT, if any, since Unique can't + * project) steps. This might or might not be identical to the query's final + * output target. + * + * The main argument for keeping the sort-input tlist the same as the final + * is that we avoid a separate projection node (which will be needed if + * they're different, because Sort can't project). However, there are also + * advantages to postponing tlist evaluation till after the Sort: it ensures + * a consistent order of evaluation for any volatile functions in the tlist, + * and if there's also a LIMIT, we can stop the query without ever computing + * tlist functions for later rows, which is beneficial for both volatile and + * expensive functions. + * + * Our current policy is to postpone volatile expressions till after the sort + * unconditionally (assuming that that's possible, ie they are in plain tlist + * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to + * postpone set-returning expressions, because running them beforehand would + * bloat the sort dataset, and because it might cause unexpected output order + * if the sort isn't stable. However there's a constraint on that: all SRFs + * in the tlist should be evaluated at the same plan step, so that they can + * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we + * mustn't postpone any SRFs. (Note that in principle that policy should + * probably get applied to the group/window input targetlists too, but we + * have not done that historically.) Lastly, expensive expressions are + * postponed if there is a LIMIT, or if root->tuple_fraction shows that + * partial evaluation of the query is possible (if neither is true, we expect + * to have to evaluate the expressions for every row anyway), or if there are + * any volatile or set-returning expressions (since once we've put in a + * projection at all, it won't cost any more to postpone more stuff). + * + * Another issue that could potentially be considered here is that + * evaluating tlist expressions could result in data that's either wider + * or narrower than the input Vars, thus changing the volume of data that + * has to go through the Sort. However, we usually have only a very bad + * idea of the output width of any expression more complex than a Var, + * so for now it seems too risky to try to optimize on that basis. + * + * Note that if we do produce a modified sort-input target, and then the + * query ends up not using an explicit Sort, no particular harm is done: + * we'll initially use the modified target for the preceding path nodes, + * but then change them to the final target with apply_projection_to_path. + * Moreover, in such a case the guarantees about evaluation order of + * volatile functions still hold, since the rows are sorted already. + * + * This function has some things in common with make_group_input_target and + * make_window_input_target, though the detailed rules for what to do are + * different. We never flatten/postpone any grouping or ordering columns; + * those are needed before the sort. If we do flatten a particular + * expression, we leave Aggref and WindowFunc nodes alone, since those were + * computed earlier. + * + * 'final_target' is the query's final target list (in PathTarget form) + * 'have_postponed_srfs' is an output argument, see below + * + * The result is the PathTarget to be computed by the plan node immediately + * below the Sort step (and the Distinct step, if any). This will be + * exactly final_target if we decide a projection step wouldn't be helpful. + * + * In addition, *have_postponed_srfs is set to true if we choose to postpone + * any set-returning functions to after the Sort. + */ +static PathTarget * +make_sort_input_target(PlannerInfo *root, + PathTarget *final_target, + bool *have_postponed_srfs) +{ + Query *parse = root->parse; + PathTarget *input_target; + int ncols; + bool *col_is_srf; + bool *postpone_col; + bool have_srf; + bool have_volatile; + bool have_expensive; + bool have_srf_sortcols; + bool postpone_srfs; + List *postponable_cols; + List *postponable_vars; + int i; + ListCell *lc; + + /* Shouldn't get here unless query has ORDER BY */ + Assert(parse->sortClause); + + *have_postponed_srfs = false; /* default result */ + + /* Inspect tlist and collect per-column information */ + ncols = list_length(final_target->exprs); + col_is_srf = (bool *) palloc0(ncols * sizeof(bool)); + postpone_col = (bool *) palloc0(ncols * sizeof(bool)); + have_srf = have_volatile = have_expensive = have_srf_sortcols = false; + + i = 0; + foreach(lc, final_target->exprs) + { + Expr *expr = (Expr *) lfirst(lc); + + /* + * If the column has a sortgroupref, assume it has to be evaluated + * before sorting. Generally such columns would be ORDER BY, GROUP + * BY, etc targets. One exception is columns that were removed from + * GROUP BY by remove_useless_groupby_columns() ... but those would + * only be Vars anyway. There don't seem to be any cases where it + * would be worth the trouble to double-check. + */ + if (get_pathtarget_sortgroupref(final_target, i) == 0) + { + /* + * Check for SRF or volatile functions. Check the SRF case first + * because we must know whether we have any postponed SRFs. + */ + if (parse->hasTargetSRFs && + expression_returns_set((Node *) expr)) + { + /* We'll decide below whether these are postponable */ + col_is_srf[i] = true; + have_srf = true; + } + else if (contain_volatile_functions((Node *) expr)) + { + /* Unconditionally postpone */ + postpone_col[i] = true; + have_volatile = true; + } + else + { + /* + * Else check the cost. XXX it's annoying to have to do this + * when set_pathtarget_cost_width() just did it. Refactor to + * allow sharing the work? + */ + QualCost cost; + + cost_qual_eval_node(&cost, (Node *) expr, root); + + /* + * We arbitrarily define "expensive" as "more than 10X + * cpu_operator_cost". Note this will take in any PL function + * with default cost. + */ + if (cost.per_tuple > 10 * cpu_operator_cost) + { + postpone_col[i] = true; + have_expensive = true; + } + } + } + else + { + /* For sortgroupref cols, just check if any contain SRFs */ + if (!have_srf_sortcols && + parse->hasTargetSRFs && + expression_returns_set((Node *) expr)) + have_srf_sortcols = true; + } + + i++; + } + + /* + * We can postpone SRFs if we have some but none are in sortgroupref cols. + */ + postpone_srfs = (have_srf && !have_srf_sortcols); + + /* + * If we don't need a post-sort projection, just return final_target. + */ + if (!(postpone_srfs || have_volatile || + (have_expensive && + (parse->limitCount || root->tuple_fraction > 0)))) + return final_target; + + /* + * Report whether the post-sort projection will contain set-returning + * functions. This is important because it affects whether the Sort can + * rely on the query's LIMIT (if any) to bound the number of rows it needs + * to return. + */ + *have_postponed_srfs = postpone_srfs; + + /* + * Construct the sort-input target, taking all non-postponable columns and + * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in + * the postponable ones. + */ + input_target = create_empty_pathtarget(); + postponable_cols = NIL; + + i = 0; + foreach(lc, final_target->exprs) + { + Expr *expr = (Expr *) lfirst(lc); + + if (postpone_col[i] || (postpone_srfs && col_is_srf[i])) + postponable_cols = lappend(postponable_cols, expr); + else + add_column_to_pathtarget(input_target, expr, + get_pathtarget_sortgroupref(final_target, i)); + + i++; + } + + /* + * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in + * postponable columns, and add them to the sort-input target if not + * already present. (Some might be there already.) We mustn't + * deconstruct Aggrefs or WindowFuncs here, since the projection node + * would be unable to recompute them. + */ + postponable_vars = pull_var_clause((Node *) postponable_cols, + PVC_INCLUDE_AGGREGATES | + PVC_INCLUDE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + add_new_columns_to_pathtarget(input_target, postponable_vars); + + /* clean up cruft */ + list_free(postponable_vars); + list_free(postponable_cols); + + /* XXX this represents even more redundant cost calculation ... */ + return set_pathtarget_cost_width(root, input_target); +} + +/* + * get_cheapest_fractional_path + * Find the cheapest path for retrieving a specified fraction of all + * the tuples expected to be returned by the given relation. + * + * We interpret tuple_fraction the same way as grouping_planner. + * + * We assume set_cheapest() has been run on the given rel. + */ +Path * +get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction) +{ + Path *best_path = rel->cheapest_total_path; + ListCell *l; + + /* If all tuples will be retrieved, just return the cheapest-total path */ + if (tuple_fraction <= 0.0) + return best_path; + + /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */ + if (tuple_fraction >= 1.0 && best_path->rows > 0) + tuple_fraction /= best_path->rows; + + foreach(l, rel->pathlist) + { + Path *path = (Path *) lfirst(l); + + if (path == rel->cheapest_total_path || + compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0) + continue; + + best_path = path; + } + + return best_path; +} + +/* + * adjust_paths_for_srfs + * Fix up the Paths of the given upperrel to handle tSRFs properly. + * + * The executor can only handle set-returning functions that appear at the + * top level of the targetlist of a ProjectSet plan node. If we have any SRFs + * that are not at top level, we need to split up the evaluation into multiple + * plan levels in which each level satisfies this constraint. This function + * modifies each Path of an upperrel that (might) compute any SRFs in its + * output tlist to insert appropriate projection steps. + * + * The given targets and targets_contain_srfs lists are from + * split_pathtarget_at_srfs(). We assume the existing Paths emit the first + * target in targets. + */ +static void +adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, + List *targets, List *targets_contain_srfs) +{ + ListCell *lc; + + Assert(list_length(targets) == list_length(targets_contain_srfs)); + Assert(!linitial_int(targets_contain_srfs)); + + /* If no SRFs appear at this plan level, nothing to do */ + if (list_length(targets) == 1) + return; + + /* + * Stack SRF-evaluation nodes atop each path for the rel. + * + * In principle we should re-run set_cheapest() here to identify the + * cheapest path, but it seems unlikely that adding the same tlist eval + * costs to all the paths would change that, so we don't bother. Instead, + * just assume that the cheapest-startup and cheapest-total paths remain + * so. (There should be no parameterized paths anymore, so we needn't + * worry about updating cheapest_parameterized_paths.) + */ + foreach(lc, rel->pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath = subpath; + ListCell *lc1, + *lc2; + + Assert(subpath->param_info == NULL); + forboth(lc1, targets, lc2, targets_contain_srfs) + { + PathTarget *thistarget = lfirst_node(PathTarget, lc1); + bool contains_srfs = (bool) lfirst_int(lc2); + + /* If this level doesn't contain SRFs, do regular projection */ + if (contains_srfs) + newpath = (Path *) create_set_projection_path(root, + rel, + newpath, + thistarget); + else + newpath = (Path *) apply_projection_to_path(root, + rel, + newpath, + thistarget); + } + lfirst(lc) = newpath; + if (subpath == rel->cheapest_startup_path) + rel->cheapest_startup_path = newpath; + if (subpath == rel->cheapest_total_path) + rel->cheapest_total_path = newpath; + } + + /* Likewise for partial paths, if any */ + foreach(lc, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath = subpath; + ListCell *lc1, + *lc2; + + Assert(subpath->param_info == NULL); + forboth(lc1, targets, lc2, targets_contain_srfs) + { + PathTarget *thistarget = lfirst_node(PathTarget, lc1); + bool contains_srfs = (bool) lfirst_int(lc2); + + /* If this level doesn't contain SRFs, do regular projection */ + if (contains_srfs) + newpath = (Path *) create_set_projection_path(root, + rel, + newpath, + thistarget); + else + { + /* avoid apply_projection_to_path, in case of multiple refs */ + newpath = (Path *) create_projection_path(root, + rel, + newpath, + thistarget); + } + } + lfirst(lc) = newpath; + } +} + +/* + * expression_planner + * Perform planner's transformations on a standalone expression. + * + * Various utility commands need to evaluate expressions that are not part + * of a plannable query. They can do so using the executor's regular + * expression-execution machinery, but first the expression has to be fed + * through here to transform it from parser output to something executable. + * + * Currently, we disallow sublinks in standalone expressions, so there's no + * real "planning" involved here. (That might not always be true though.) + * What we must do is run eval_const_expressions to ensure that any function + * calls are converted to positional notation and function default arguments + * get inserted. The fact that constant subexpressions get simplified is a + * side-effect that is useful when the expression will get evaluated more than + * once. Also, we must fix operator function IDs. + * + * This does not return any information about dependencies of the expression. + * Hence callers should use the results only for the duration of the current + * query. Callers that would like to cache the results for longer should use + * expression_planner_with_deps, probably via the plancache. + * + * Note: this must not make any damaging changes to the passed-in expression + * tree. (It would actually be okay to apply fix_opfuncids to it, but since + * we first do an expression_tree_mutator-based walk, what is returned will + * be a new node tree.) The result is constructed in the current memory + * context; beware that this can leak a lot of additional stuff there, too. + */ +Expr * +expression_planner(Expr *expr) +{ + Node *result; + + /* + * Convert named-argument function calls, insert default arguments and + * simplify constant subexprs + */ + result = eval_const_expressions(NULL, (Node *) expr); + + /* Fill in opfuncid values if missing */ + fix_opfuncids(result); + + return (Expr *) result; +} + +/* + * expression_planner_with_deps + * Perform planner's transformations on a standalone expression, + * returning expression dependency information along with the result. + * + * This is identical to expression_planner() except that it also returns + * information about possible dependencies of the expression, ie identities of + * objects whose definitions affect the result. As in a PlannedStmt, these + * are expressed as a list of relation Oids and a list of PlanInvalItems. + */ +Expr * +expression_planner_with_deps(Expr *expr, + List **relationOids, + List **invalItems) +{ + Node *result; + PlannerGlobal glob; + PlannerInfo root; + + /* Make up dummy planner state so we can use setrefs machinery */ + MemSet(&glob, 0, sizeof(glob)); + glob.type = T_PlannerGlobal; + glob.relationOids = NIL; + glob.invalItems = NIL; + + MemSet(&root, 0, sizeof(root)); + root.type = T_PlannerInfo; + root.glob = &glob; + + /* + * Convert named-argument function calls, insert default arguments and + * simplify constant subexprs. Collect identities of inlined functions + * and elided domains, too. + */ + result = eval_const_expressions(&root, (Node *) expr); + + /* Fill in opfuncid values if missing */ + fix_opfuncids(result); + + /* + * Now walk the finished expression to find anything else we ought to + * record as an expression dependency. + */ + (void) extract_query_dependencies_walker(result, &root); + + *relationOids = glob.relationOids; + *invalItems = glob.invalItems; + + return (Expr *) result; +} + + +/* + * plan_cluster_use_sort + * Use the planner to decide how CLUSTER should implement sorting + * + * tableOid is the OID of a table to be clustered on its index indexOid + * (which is already known to be a btree index). Decide whether it's + * cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER. + * Return true to use sorting, false to use an indexscan. + * + * Note: caller had better already hold some type of lock on the table. + */ +bool +plan_cluster_use_sort(Oid tableOid, Oid indexOid) +{ + PlannerInfo *root; + Query *query; + PlannerGlobal *glob; + RangeTblEntry *rte; + RelOptInfo *rel; + IndexOptInfo *indexInfo; + QualCost indexExprCost; + Cost comparisonCost; + Path *seqScanPath; + Path seqScanAndSortPath; + IndexPath *indexScanPath; + ListCell *lc; + + /* We can short-circuit the cost comparison if indexscans are disabled */ + if (!enable_indexscan) + return true; /* use sort */ + + /* Set up mostly-dummy planner state */ + query = makeNode(Query); + query->commandType = CMD_SELECT; + + glob = makeNode(PlannerGlobal); + + root = makeNode(PlannerInfo); + root->parse = query; + root->glob = glob; + root->query_level = 1; + root->planner_cxt = CurrentMemoryContext; + root->wt_param_id = -1; + + /* Build a minimal RTE for the rel */ + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = tableOid; + rte->relkind = RELKIND_RELATION; /* Don't be too picky. */ + rte->rellockmode = AccessShareLock; + rte->lateral = false; + rte->inh = false; + rte->inFromCl = true; + query->rtable = list_make1(rte); + + /* Set up RTE/RelOptInfo arrays */ + setup_simple_rel_arrays(root); + + /* Build RelOptInfo */ + rel = build_simple_rel(root, 1, NULL); + + /* Locate IndexOptInfo for the target index */ + indexInfo = NULL; + foreach(lc, rel->indexlist) + { + indexInfo = lfirst_node(IndexOptInfo, lc); + if (indexInfo->indexoid == indexOid) + break; + } + + /* + * It's possible that get_relation_info did not generate an IndexOptInfo + * for the desired index; this could happen if it's not yet reached its + * indcheckxmin usability horizon, or if it's a system index and we're + * ignoring system indexes. In such cases we should tell CLUSTER to not + * trust the index contents but use seqscan-and-sort. + */ + if (lc == NULL) /* not in the list? */ + return true; /* use sort */ + + /* + * Rather than doing all the pushups that would be needed to use + * set_baserel_size_estimates, just do a quick hack for rows and width. + */ + rel->rows = rel->tuples; + rel->reltarget->width = get_relation_data_width(tableOid, NULL); + + root->total_table_pages = rel->pages; + + /* + * Determine eval cost of the index expressions, if any. We need to + * charge twice that amount for each tuple comparison that happens during + * the sort, since tuplesort.c will have to re-evaluate the index + * expressions each time. (XXX that's pretty inefficient...) + */ + cost_qual_eval(&indexExprCost, indexInfo->indexprs, root); + comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple); + + /* Estimate the cost of seq scan + sort */ + seqScanPath = create_seqscan_path(root, rel, NULL, 0); + cost_sort(&seqScanAndSortPath, root, NIL, + seqScanPath->total_cost, rel->tuples, rel->reltarget->width, + comparisonCost, maintenance_work_mem, -1.0); + + /* Estimate the cost of index scan */ + indexScanPath = create_index_path(root, indexInfo, + NIL, NIL, NIL, NIL, + ForwardScanDirection, false, + NULL, 1.0, false); + + return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost); +} + +/* + * plan_create_index_workers + * Use the planner to decide how many parallel worker processes + * CREATE INDEX should request for use + * + * tableOid is the table on which the index is to be built. indexOid is the + * OID of an index to be created or reindexed (which must be a btree index). + * + * Return value is the number of parallel worker processes to request. It + * may be unsafe to proceed if this is 0. Note that this does not include the + * leader participating as a worker (value is always a number of parallel + * worker processes). + * + * Note: caller had better already hold some type of lock on the table and + * index. + */ +int +plan_create_index_workers(Oid tableOid, Oid indexOid) +{ + PlannerInfo *root; + Query *query; + PlannerGlobal *glob; + RangeTblEntry *rte; + Relation heap; + Relation index; + RelOptInfo *rel; + int parallel_workers; + BlockNumber heap_blocks; + double reltuples; + double allvisfrac; + + /* + * We don't allow performing parallel operation in standalone backend or + * when parallelism is disabled. + */ + if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0) + return 0; + + /* Set up largely-dummy planner state */ + query = makeNode(Query); + query->commandType = CMD_SELECT; + + glob = makeNode(PlannerGlobal); + + root = makeNode(PlannerInfo); + root->parse = query; + root->glob = glob; + root->query_level = 1; + root->planner_cxt = CurrentMemoryContext; + root->wt_param_id = -1; + + /* + * Build a minimal RTE. + * + * Mark the RTE with inh = true. This is a kludge to prevent + * get_relation_info() from fetching index info, which is necessary + * because it does not expect that any IndexOptInfo is currently + * undergoing REINDEX. + */ + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = tableOid; + rte->relkind = RELKIND_RELATION; /* Don't be too picky. */ + rte->rellockmode = AccessShareLock; + rte->lateral = false; + rte->inh = true; + rte->inFromCl = true; + query->rtable = list_make1(rte); + + /* Set up RTE/RelOptInfo arrays */ + setup_simple_rel_arrays(root); + + /* Build RelOptInfo */ + rel = build_simple_rel(root, 1, NULL); + + /* Rels are assumed already locked by the caller */ + heap = table_open(tableOid, NoLock); + index = index_open(indexOid, NoLock); + + /* + * Determine if it's safe to proceed. + * + * Currently, parallel workers can't access the leader's temporary tables. + * Furthermore, any index predicate or index expressions must be parallel + * safe. + */ + if (heap->rd_rel->relpersistence == RELPERSISTENCE_TEMP || + !is_parallel_safe(root, (Node *) RelationGetIndexExpressions(index)) || + !is_parallel_safe(root, (Node *) RelationGetIndexPredicate(index))) + { + parallel_workers = 0; + goto done; + } + + /* + * If parallel_workers storage parameter is set for the table, accept that + * as the number of parallel worker processes to launch (though still cap + * at max_parallel_maintenance_workers). Note that we deliberately do not + * consider any other factor when parallel_workers is set. (e.g., memory + * use by workers.) + */ + if (rel->rel_parallel_workers != -1) + { + parallel_workers = Min(rel->rel_parallel_workers, + max_parallel_maintenance_workers); + goto done; + } + + /* + * Estimate heap relation size ourselves, since rel->pages cannot be + * trusted (heap RTE was marked as inheritance parent) + */ + estimate_rel_size(heap, NULL, &heap_blocks, &reltuples, &allvisfrac); + + /* + * Determine number of workers to scan the heap relation using generic + * model + */ + parallel_workers = compute_parallel_worker(rel, heap_blocks, -1, + max_parallel_maintenance_workers); + + /* + * Cap workers based on available maintenance_work_mem as needed. + * + * Note that each tuplesort participant receives an even share of the + * total maintenance_work_mem budget. Aim to leave participants + * (including the leader as a participant) with no less than 32MB of + * memory. This leaves cases where maintenance_work_mem is set to 64MB + * immediately past the threshold of being capable of launching a single + * parallel worker to sort. + */ + while (parallel_workers > 0 && + maintenance_work_mem / (parallel_workers + 1) < 32768L) + parallel_workers--; + +done: + index_close(index, NoLock); + table_close(heap, NoLock); + + return parallel_workers; +} + +/* + * add_paths_to_grouping_rel + * + * Add non-partial paths to grouping relation. + */ +static void +add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, + RelOptInfo *grouped_rel, + RelOptInfo *partially_grouped_rel, + const AggClauseCosts *agg_costs, + grouping_sets_data *gd, double dNumGroups, + GroupPathExtraData *extra) +{ + Query *parse = root->parse; + Path *cheapest_path = input_rel->cheapest_total_path; + ListCell *lc; + bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0; + bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; + List *havingQual = (List *) extra->havingQual; + AggClauseCosts *agg_final_costs = &extra->agg_final_costs; + + if (can_sort) + { + /* + * Use any available suitably-sorted path as input, and also consider + * sorting the cheapest-total path. + */ + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + Path *path_original = path; + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (path == cheapest_path || is_sorted) + { + /* Sort the cheapest-total path if it isn't already sorted */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + -1.0); + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, + gd, agg_costs, dNumGroups); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make + * an AggPath. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + } + else if (parse->groupClause) + { + /* + * We have GROUP BY without aggregation or grouping sets. + * Make a GroupPath. + */ + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } + } + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental sort + * is enabled. + */ + if (is_sorted || !enable_incremental_sort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + /* no shared prefix, no point in building incremental sort */ + if (presorted_keys == 0) + continue; + + /* + * We should have already excluded pathkeys of length 1 because + * then presorted_keys > 0 would imply is_sorted was true. + */ + Assert(list_length(root->group_pathkeys) != 1); + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, + gd, agg_costs, dNumGroups); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make an + * AggPath. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + } + else if (parse->groupClause) + { + /* + * We have GROUP BY without aggregation or grouping sets. Make + * a GroupPath. + */ + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } + } + + /* + * Instead of operating directly on the input relation, we can + * consider finalizing a partially aggregated path. + */ + if (partially_grouped_rel != NULL) + { + foreach(lc, partially_grouped_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + Path *path_original = path; + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + /* + * Insert a Sort node, if required. But there's no point in + * sorting anything but the cheapest path. + */ + if (!is_sorted) + { + if (path != partially_grouped_rel->cheapest_total_path) + continue; + path = (Path *) create_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + -1.0); + } + + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental + * sort is enabled. + */ + if (is_sorted || !enable_incremental_sort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + /* no shared prefix, not point in building incremental sort */ + if (presorted_keys == 0) + continue; + + /* + * We should have already excluded pathkeys of length 1 + * because then presorted_keys > 0 would imply is_sorted was + * true. + */ + Assert(list_length(root->group_pathkeys) != 1); + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); + } + } + } + + if (can_hash) + { + if (parse->groupingSets) + { + /* + * Try for a hash-only groupingsets path over unsorted input. + */ + consider_groupingsets_paths(root, grouped_rel, + cheapest_path, false, true, + gd, agg_costs, dNumGroups); + } + else + { + /* + * Generate a HashAgg Path. We just need an Agg over the + * cheapest-total input path, since input order won't matter. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, grouped_rel, + cheapest_path, + grouped_rel->reltarget, + AGG_HASHED, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + } + + /* + * Generate a Finalize HashAgg Path atop of the cheapest partially + * grouped path, assuming there is one + */ + if (partially_grouped_rel && partially_grouped_rel->pathlist) + { + Path *path = partially_grouped_rel->cheapest_total_path; + + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + AGG_HASHED, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + } + } + + /* + * When partitionwise aggregate is used, we might have fully aggregated + * paths in the partial pathlist, because add_paths_to_append_rel() will + * consider a path for grouped_rel consisting of a Parallel Append of + * non-partial paths from each child. + */ + if (grouped_rel->partial_pathlist != NIL) + gather_grouping_paths(root, grouped_rel); +} + +/* + * create_partial_grouping_paths + * + * Create a new upper relation representing the result of partial aggregation + * and populate it with appropriate paths. Note that we don't finalize the + * lists of paths here, so the caller can add additional partial or non-partial + * paths and must afterward call gather_grouping_paths and set_cheapest on + * the returned upper relation. + * + * All paths for this new upper relation -- both partial and non-partial -- + * have been partially aggregated but require a subsequent FinalizeAggregate + * step. + * + * NB: This function is allowed to return NULL if it determines that there is + * no real need to create a new RelOptInfo. + */ +static RelOptInfo * +create_partial_grouping_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + RelOptInfo *input_rel, + grouping_sets_data *gd, + GroupPathExtraData *extra, + bool force_rel_creation) +{ + Query *parse = root->parse; + RelOptInfo *partially_grouped_rel; + AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs; + AggClauseCosts *agg_final_costs = &extra->agg_final_costs; + Path *cheapest_partial_path = NULL; + Path *cheapest_total_path = NULL; + double dNumPartialGroups = 0; + double dNumPartialPartialGroups = 0; + ListCell *lc; + bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0; + bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; + + /* + * Consider whether we should generate partially aggregated non-partial + * paths. We can only do this if we have a non-partial path, and only if + * the parent of the input rel is performing partial partitionwise + * aggregation. (Note that extra->patype is the type of partitionwise + * aggregation being used at the parent level, not this level.) + */ + if (input_rel->pathlist != NIL && + extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL) + cheapest_total_path = input_rel->cheapest_total_path; + + /* + * If parallelism is possible for grouped_rel, then we should consider + * generating partially-grouped partial paths. However, if the input rel + * has no partial paths, then we can't. + */ + if (grouped_rel->consider_parallel && input_rel->partial_pathlist != NIL) + cheapest_partial_path = linitial(input_rel->partial_pathlist); + + /* + * If we can't partially aggregate partial paths, and we can't partially + * aggregate non-partial paths, then don't bother creating the new + * RelOptInfo at all, unless the caller specified force_rel_creation. + */ + if (cheapest_total_path == NULL && + cheapest_partial_path == NULL && + !force_rel_creation) + return NULL; + + /* + * Build a new upper relation to represent the result of partially + * aggregating the rows from the input relation. + */ + partially_grouped_rel = fetch_upper_rel(root, + UPPERREL_PARTIAL_GROUP_AGG, + grouped_rel->relids); + partially_grouped_rel->consider_parallel = + grouped_rel->consider_parallel; + partially_grouped_rel->reloptkind = grouped_rel->reloptkind; + partially_grouped_rel->serverid = grouped_rel->serverid; + partially_grouped_rel->userid = grouped_rel->userid; + partially_grouped_rel->useridiscurrent = grouped_rel->useridiscurrent; + partially_grouped_rel->fdwroutine = grouped_rel->fdwroutine; + + /* + * Build target list for partial aggregate paths. These paths cannot just + * emit the same tlist as regular aggregate paths, because (1) we must + * include Vars and Aggrefs needed in HAVING, which might not appear in + * the result tlist, and (2) the Aggrefs must be set in partial mode. + */ + partially_grouped_rel->reltarget = + make_partial_grouping_target(root, grouped_rel->reltarget, + extra->havingQual); + + if (!extra->partial_costs_set) + { + /* + * Collect statistics about aggregates for estimating costs of + * performing aggregation in parallel. + */ + MemSet(agg_partial_costs, 0, sizeof(AggClauseCosts)); + MemSet(agg_final_costs, 0, sizeof(AggClauseCosts)); + if (parse->hasAggs) + { + /* partial phase */ + get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL, + agg_partial_costs); + + /* final phase */ + get_agg_clause_costs(root, AGGSPLIT_FINAL_DESERIAL, + agg_final_costs); + } + + extra->partial_costs_set = true; + } + + /* Estimate number of partial groups. */ + if (cheapest_total_path != NULL) + dNumPartialGroups = + get_number_of_groups(root, + cheapest_total_path->rows, + gd, + extra->targetList); + if (cheapest_partial_path != NULL) + dNumPartialPartialGroups = + get_number_of_groups(root, + cheapest_partial_path->rows, + gd, + extra->targetList); + + if (can_sort && cheapest_total_path != NULL) + { + /* This should have been checked previously */ + Assert(parse->hasAggs || parse->groupClause); + + /* + * Use any available suitably-sorted path as input, and also consider + * sorting the cheapest partial path. + */ + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + + is_sorted = pathkeys_contained_in(root->group_pathkeys, + path->pathkeys); + if (path == cheapest_total_path || is_sorted) + { + /* Sort the cheapest partial path, if it isn't already */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + -1.0); + + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialGroups)); + } + } + + /* + * Consider incremental sort on all partial paths, if enabled. + * + * We can also skip the entire loop when we only have a single-item + * group_pathkeys because then we can't possibly have a presorted + * prefix of the list without having the list be fully sorted. + */ + if (enable_incremental_sort && list_length(root->group_pathkeys) > 1) + { + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + /* Ignore already sorted paths */ + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + /* Since we have presorted keys, consider incremental sort. */ + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialGroups)); + } + } + } + + if (can_sort && cheapest_partial_path != NULL) + { + /* Similar to above logic, but for partial paths. */ + foreach(lc, input_rel->partial_pathlist) + { + Path *path = (Path *) lfirst(lc); + Path *path_original = path; + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (path == cheapest_partial_path || is_sorted) + { + /* Sort the cheapest partial path, if it isn't already */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + -1.0); + + if (parse->hasAggs) + add_partial_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialPartialGroups)); + else + add_partial_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialPartialGroups)); + } + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental sort + * is enabled. + */ + if (is_sorted || !enable_incremental_sort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + /* no shared prefix, not point in building incremental sort */ + if (presorted_keys == 0) + continue; + + /* + * We should have already excluded pathkeys of length 1 because + * then presorted_keys > 0 would imply is_sorted was true. + */ + Assert(list_length(root->group_pathkeys) != 1); + + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_partial_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialPartialGroups)); + else + add_partial_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialPartialGroups)); + } + } + + /* + * Add a partially-grouped HashAgg Path where possible + */ + if (can_hash && cheapest_total_path != NULL) + { + /* Checked above */ + Assert(parse->hasAggs || parse->groupClause); + + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + cheapest_total_path, + partially_grouped_rel->reltarget, + AGG_HASHED, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + } + + /* + * Now add a partially-grouped HashAgg partial Path where possible + */ + if (can_hash && cheapest_partial_path != NULL) + { + add_partial_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + cheapest_partial_path, + partially_grouped_rel->reltarget, + AGG_HASHED, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialPartialGroups)); + } + + /* + * If there is an FDW that's responsible for all baserels of the query, + * let it consider adding partially grouped ForeignPaths. + */ + if (partially_grouped_rel->fdwroutine && + partially_grouped_rel->fdwroutine->GetForeignUpperPaths) + { + FdwRoutine *fdwroutine = partially_grouped_rel->fdwroutine; + + fdwroutine->GetForeignUpperPaths(root, + UPPERREL_PARTIAL_GROUP_AGG, + input_rel, partially_grouped_rel, + extra); + } + + return partially_grouped_rel; +} + +/* + * Generate Gather and Gather Merge paths for a grouping relation or partial + * grouping relation. + * + * generate_useful_gather_paths does most of the work, but we also consider a + * special case: we could try sorting the data by the group_pathkeys and then + * applying Gather Merge. + * + * NB: This function shouldn't be used for anything other than a grouped or + * partially grouped relation not only because of the fact that it explicitly + * references group_pathkeys but we pass "true" as the third argument to + * generate_useful_gather_paths(). + */ +static void +gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) +{ + ListCell *lc; + Path *cheapest_partial_path; + + /* Try Gather for unordered paths and Gather Merge for ordered ones. */ + generate_useful_gather_paths(root, rel, true); + + /* Try cheapest partial path + explicit Sort + Gather Merge. */ + cheapest_partial_path = linitial(rel->partial_pathlist); + if (!pathkeys_contained_in(root->group_pathkeys, + cheapest_partial_path->pathkeys)) + { + Path *path; + double total_groups; + + total_groups = + cheapest_partial_path->rows * cheapest_partial_path->parallel_workers; + path = (Path *) create_sort_path(root, rel, cheapest_partial_path, + root->group_pathkeys, + -1.0); + path = (Path *) + create_gather_merge_path(root, + rel, + path, + rel->reltarget, + root->group_pathkeys, + NULL, + &total_groups); + + add_path(rel, path); + } + + /* + * Consider incremental sort on all partial paths, if enabled. + * + * We can also skip the entire loop when we only have a single-item + * group_pathkeys because then we can't possibly have a presorted prefix + * of the list without having the list be fully sorted. + */ + if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1) + return; + + /* also consider incremental sort on partial paths, if enabled */ + foreach(lc, rel->partial_pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + double total_groups; + + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + path = (Path *) + create_gather_merge_path(root, + rel, + path, + rel->reltarget, + root->group_pathkeys, + NULL, + &total_groups); + + add_path(rel, path); + } +} + +/* + * can_partial_agg + * + * Determines whether or not partial grouping and/or aggregation is possible. + * Returns true when possible, false otherwise. + */ +static bool +can_partial_agg(PlannerInfo *root) +{ + Query *parse = root->parse; + + if (!parse->hasAggs && parse->groupClause == NIL) + { + /* + * We don't know how to do parallel aggregation unless we have either + * some aggregates or a grouping clause. + */ + return false; + } + else if (parse->groupingSets) + { + /* We don't know how to do grouping sets in parallel. */ + return false; + } + else if (root->hasNonPartialAggs || root->hasNonSerialAggs) + { + /* Insufficient support for partial mode. */ + return false; + } + + /* Everything looks good. */ + return true; +} + +/* + * apply_scanjoin_target_to_paths + * + * Adjust the final scan/join relation, and recursively all of its children, + * to generate the final scan/join target. It would be more correct to model + * this as a separate planning step with a new RelOptInfo at the toplevel and + * for each child relation, but doing it this way is noticeably cheaper. + * Maybe that problem can be solved at some point, but for now we do this. + * + * If tlist_same_exprs is true, then the scan/join target to be applied has + * the same expressions as the existing reltarget, so we need only insert the + * appropriate sortgroupref information. By avoiding the creation of + * projection paths we save effort both immediately and at plan creation time. + */ +static void +apply_scanjoin_target_to_paths(PlannerInfo *root, + RelOptInfo *rel, + List *scanjoin_targets, + List *scanjoin_targets_contain_srfs, + bool scanjoin_target_parallel_safe, + bool tlist_same_exprs) +{ + bool rel_is_partitioned = IS_PARTITIONED_REL(rel); + PathTarget *scanjoin_target; + ListCell *lc; + + /* This recurses, so be paranoid. */ + check_stack_depth(); + + /* + * If the rel is partitioned, we want to drop its existing paths and + * generate new ones. This function would still be correct if we kept the + * existing paths: we'd modify them to generate the correct target above + * the partitioning Append, and then they'd compete on cost with paths + * generating the target below the Append. However, in our current cost + * model the latter way is always the same or cheaper cost, so modifying + * the existing paths would just be useless work. Moreover, when the cost + * is the same, varying roundoff errors might sometimes allow an existing + * path to be picked, resulting in undesirable cross-platform plan + * variations. So we drop old paths and thereby force the work to be done + * below the Append, except in the case of a non-parallel-safe target. + * + * Some care is needed, because we have to allow + * generate_useful_gather_paths to see the old partial paths in the next + * stanza. Hence, zap the main pathlist here, then allow + * generate_useful_gather_paths to add path(s) to the main list, and + * finally zap the partial pathlist. + */ + if (rel_is_partitioned) + rel->pathlist = NIL; + + /* + * If the scan/join target is not parallel-safe, partial paths cannot + * generate it. + */ + if (!scanjoin_target_parallel_safe) + { + /* + * Since we can't generate the final scan/join target in parallel + * workers, this is our last opportunity to use any partial paths that + * exist; so build Gather path(s) that use them and emit whatever the + * current reltarget is. We don't do this in the case where the + * target is parallel-safe, since we will be able to generate superior + * paths by doing it after the final scan/join target has been + * applied. + */ + generate_useful_gather_paths(root, rel, false); + + /* Can't use parallel query above this level. */ + rel->partial_pathlist = NIL; + rel->consider_parallel = false; + } + + /* Finish dropping old paths for a partitioned rel, per comment above */ + if (rel_is_partitioned) + rel->partial_pathlist = NIL; + + /* Extract SRF-free scan/join target. */ + scanjoin_target = linitial_node(PathTarget, scanjoin_targets); + + /* + * Apply the SRF-free scan/join target to each existing path. + * + * If the tlist exprs are the same, we can just inject the sortgroupref + * information into the existing pathtargets. Otherwise, replace each + * path with a projection path that generates the SRF-free scan/join + * target. This can't change the ordering of paths within rel->pathlist, + * so we just modify the list in place. + */ + foreach(lc, rel->pathlist) + { + Path *subpath = (Path *) lfirst(lc); + + /* Shouldn't have any parameterized paths anymore */ + Assert(subpath->param_info == NULL); + + if (tlist_same_exprs) + subpath->pathtarget->sortgrouprefs = + scanjoin_target->sortgrouprefs; + else + { + Path *newpath; + + newpath = (Path *) create_projection_path(root, rel, subpath, + scanjoin_target); + lfirst(lc) = newpath; + } + } + + /* Likewise adjust the targets for any partial paths. */ + foreach(lc, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + + /* Shouldn't have any parameterized paths anymore */ + Assert(subpath->param_info == NULL); + + if (tlist_same_exprs) + subpath->pathtarget->sortgrouprefs = + scanjoin_target->sortgrouprefs; + else + { + Path *newpath; + + newpath = (Path *) create_projection_path(root, rel, subpath, + scanjoin_target); + lfirst(lc) = newpath; + } + } + + /* + * Now, if final scan/join target contains SRFs, insert ProjectSetPath(s) + * atop each existing path. (Note that this function doesn't look at the + * cheapest-path fields, which is a good thing because they're bogus right + * now.) + */ + if (root->parse->hasTargetSRFs) + adjust_paths_for_srfs(root, rel, + scanjoin_targets, + scanjoin_targets_contain_srfs); + + /* + * Update the rel's target to be the final (with SRFs) scan/join target. + * This now matches the actual output of all the paths, and we might get + * confused in createplan.c if they don't agree. We must do this now so + * that any append paths made in the next part will use the correct + * pathtarget (cf. create_append_path). + * + * Note that this is also necessary if GetForeignUpperPaths() gets called + * on the final scan/join relation or on any of its children, since the + * FDW might look at the rel's target to create ForeignPaths. + */ + rel->reltarget = llast_node(PathTarget, scanjoin_targets); + + /* + * If the relation is partitioned, recursively apply the scan/join target + * to all partitions, and generate brand-new Append paths in which the + * scan/join target is computed below the Append rather than above it. + * Since Append is not projection-capable, that might save a separate + * Result node, and it also is important for partitionwise aggregate. + */ + if (rel_is_partitioned) + { + List *live_children = NIL; + int i; + + /* Adjust each partition. */ + i = -1; + while ((i = bms_next_member(rel->live_parts, i)) >= 0) + { + RelOptInfo *child_rel = rel->part_rels[i]; + AppendRelInfo **appinfos; + int nappinfos; + List *child_scanjoin_targets = NIL; + ListCell *lc; + + Assert(child_rel != NULL); + + /* Dummy children can be ignored. */ + if (IS_DUMMY_REL(child_rel)) + continue; + + /* Translate scan/join targets for this child. */ + appinfos = find_appinfos_by_relids(root, child_rel->relids, + &nappinfos); + foreach(lc, scanjoin_targets) + { + PathTarget *target = lfirst_node(PathTarget, lc); + + target = copy_pathtarget(target); + target->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) target->exprs, + nappinfos, appinfos); + child_scanjoin_targets = lappend(child_scanjoin_targets, + target); + } + pfree(appinfos); + + /* Recursion does the real work. */ + apply_scanjoin_target_to_paths(root, child_rel, + child_scanjoin_targets, + scanjoin_targets_contain_srfs, + scanjoin_target_parallel_safe, + tlist_same_exprs); + + /* Save non-dummy children for Append paths. */ + if (!IS_DUMMY_REL(child_rel)) + live_children = lappend(live_children, child_rel); + } + + /* Build new paths for this relation by appending child paths. */ + add_paths_to_append_rel(root, rel, live_children); + } + + /* + * Consider generating Gather or Gather Merge paths. We must only do this + * if the relation is parallel safe, and we don't do it for child rels to + * avoid creating multiple Gather nodes within the same plan. We must do + * this after all paths have been generated and before set_cheapest, since + * one of the generated paths may turn out to be the cheapest one. + */ + if (rel->consider_parallel && !IS_OTHER_REL(rel)) + generate_useful_gather_paths(root, rel, false); + + /* + * Reassess which paths are the cheapest, now that we've potentially added + * new Gather (or Gather Merge) and/or Append (or MergeAppend) paths to + * this relation. + */ + set_cheapest(rel); +} + +/* + * create_partitionwise_grouping_paths + * + * If the partition keys of input relation are part of the GROUP BY clause, all + * the rows belonging to a given group come from a single partition. This + * allows aggregation/grouping over a partitioned relation to be broken down + * into aggregation/grouping on each partition. This should be no worse, and + * often better, than the normal approach. + * + * However, if the GROUP BY clause does not contain all the partition keys, + * rows from a given group may be spread across multiple partitions. In that + * case, we perform partial aggregation for each group, append the results, + * and then finalize aggregation. This is less certain to win than the + * previous case. It may win if the PartialAggregate stage greatly reduces + * the number of groups, because fewer rows will pass through the Append node. + * It may lose if we have lots of small groups. + */ +static void +create_partitionwise_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + RelOptInfo *grouped_rel, + RelOptInfo *partially_grouped_rel, + const AggClauseCosts *agg_costs, + grouping_sets_data *gd, + PartitionwiseAggregateType patype, + GroupPathExtraData *extra) +{ + List *grouped_live_children = NIL; + List *partially_grouped_live_children = NIL; + PathTarget *target = grouped_rel->reltarget; + bool partial_grouping_valid = true; + int i; + + Assert(patype != PARTITIONWISE_AGGREGATE_NONE); + Assert(patype != PARTITIONWISE_AGGREGATE_PARTIAL || + partially_grouped_rel != NULL); + + /* Add paths for partitionwise aggregation/grouping. */ + i = -1; + while ((i = bms_next_member(input_rel->live_parts, i)) >= 0) + { + RelOptInfo *child_input_rel = input_rel->part_rels[i]; + PathTarget *child_target; + AppendRelInfo **appinfos; + int nappinfos; + GroupPathExtraData child_extra; + RelOptInfo *child_grouped_rel; + RelOptInfo *child_partially_grouped_rel; + + Assert(child_input_rel != NULL); + + /* Dummy children can be ignored. */ + if (IS_DUMMY_REL(child_input_rel)) + continue; + + child_target = copy_pathtarget(target); + + /* + * Copy the given "extra" structure as is and then override the + * members specific to this child. + */ + memcpy(&child_extra, extra, sizeof(child_extra)); + + appinfos = find_appinfos_by_relids(root, child_input_rel->relids, + &nappinfos); + + child_target->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) target->exprs, + nappinfos, appinfos); + + /* Translate havingQual and targetList. */ + child_extra.havingQual = (Node *) + adjust_appendrel_attrs(root, + extra->havingQual, + nappinfos, appinfos); + child_extra.targetList = (List *) + adjust_appendrel_attrs(root, + (Node *) extra->targetList, + nappinfos, appinfos); + + /* + * extra->patype was the value computed for our parent rel; patype is + * the value for this relation. For the child, our value is its + * parent rel's value. + */ + child_extra.patype = patype; + + /* + * Create grouping relation to hold fully aggregated grouping and/or + * aggregation paths for the child. + */ + child_grouped_rel = make_grouping_rel(root, child_input_rel, + child_target, + extra->target_parallel_safe, + child_extra.havingQual); + + /* Create grouping paths for this child relation. */ + create_ordinary_grouping_paths(root, child_input_rel, + child_grouped_rel, + agg_costs, gd, &child_extra, + &child_partially_grouped_rel); + + if (child_partially_grouped_rel) + { + partially_grouped_live_children = + lappend(partially_grouped_live_children, + child_partially_grouped_rel); + } + else + partial_grouping_valid = false; + + if (patype == PARTITIONWISE_AGGREGATE_FULL) + { + set_cheapest(child_grouped_rel); + grouped_live_children = lappend(grouped_live_children, + child_grouped_rel); + } + + pfree(appinfos); + } + + /* + * Try to create append paths for partially grouped children. For full + * partitionwise aggregation, we might have paths in the partial_pathlist + * if parallel aggregation is possible. For partial partitionwise + * aggregation, we may have paths in both pathlist and partial_pathlist. + * + * NB: We must have a partially grouped path for every child in order to + * generate a partially grouped path for this relation. + */ + if (partially_grouped_rel && partial_grouping_valid) + { + Assert(partially_grouped_live_children != NIL); + + add_paths_to_append_rel(root, partially_grouped_rel, + partially_grouped_live_children); + + /* + * We need call set_cheapest, since the finalization step will use the + * cheapest path from the rel. + */ + if (partially_grouped_rel->pathlist) + set_cheapest(partially_grouped_rel); + } + + /* If possible, create append paths for fully grouped children. */ + if (patype == PARTITIONWISE_AGGREGATE_FULL) + { + Assert(grouped_live_children != NIL); + + add_paths_to_append_rel(root, grouped_rel, grouped_live_children); + } +} + +/* + * group_by_has_partkey + * + * Returns true, if all the partition keys of the given relation are part of + * the GROUP BY clauses, false otherwise. + */ +static bool +group_by_has_partkey(RelOptInfo *input_rel, + List *targetList, + List *groupClause) +{ + List *groupexprs = get_sortgrouplist_exprs(groupClause, targetList); + int cnt = 0; + int partnatts; + + /* Input relation should be partitioned. */ + Assert(input_rel->part_scheme); + + /* Rule out early, if there are no partition keys present. */ + if (!input_rel->partexprs) + return false; + + partnatts = input_rel->part_scheme->partnatts; + + for (cnt = 0; cnt < partnatts; cnt++) + { + List *partexprs = input_rel->partexprs[cnt]; + ListCell *lc; + bool found = false; + + foreach(lc, partexprs) + { + Expr *partexpr = lfirst(lc); + + if (list_member(groupexprs, partexpr)) + { + found = true; + break; + } + } + + /* + * If none of the partition key expressions match with any of the + * GROUP BY expression, return false. + */ + if (!found) + return false; + } + + return true; +} diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c new file mode 100644 index 0000000..9d912a8 --- /dev/null +++ b/src/backend/optimizer/plan/setrefs.c @@ -0,0 +1,3398 @@ +/*------------------------------------------------------------------------- + * + * setrefs.c + * Post-processing of a completed plan tree: fix references to subplan + * vars, compute regproc values for operators, etc + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/setrefs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/transam.h" +#include "catalog/pg_type.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/tlist.h" +#include "tcop/utility.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + + +typedef struct +{ + int varno; /* RT index of Var */ + AttrNumber varattno; /* attr number of Var */ + AttrNumber resno; /* TLE position of Var */ +} tlist_vinfo; + +typedef struct +{ + List *tlist; /* underlying target list */ + int num_vars; /* number of plain Var tlist entries */ + bool has_ph_vars; /* are there PlaceHolderVar entries? */ + bool has_non_vars; /* are there other entries? */ + tlist_vinfo vars[FLEXIBLE_ARRAY_MEMBER]; /* has num_vars entries */ +} indexed_tlist; + +typedef struct +{ + PlannerInfo *root; + int rtoffset; + double num_exec; +} fix_scan_expr_context; + +typedef struct +{ + PlannerInfo *root; + indexed_tlist *outer_itlist; + indexed_tlist *inner_itlist; + Index acceptable_rel; + int rtoffset; + double num_exec; +} fix_join_expr_context; + +typedef struct +{ + PlannerInfo *root; + indexed_tlist *subplan_itlist; + int newvarno; + int rtoffset; + double num_exec; +} fix_upper_expr_context; + +typedef struct +{ + PlannerInfo *root; + indexed_tlist *subplan_itlist; + int newvarno; +} fix_windowagg_cond_context; + +/* + * Selecting the best alternative in an AlternativeSubPlan expression requires + * estimating how many times that expression will be evaluated. For an + * expression in a plan node's targetlist, the plan's estimated number of + * output rows is clearly what to use, but for an expression in a qual it's + * far less clear. Since AlternativeSubPlans aren't heavily used, we don't + * want to expend a lot of cycles making such estimates. What we use is twice + * the number of output rows. That's not entirely unfounded: we know that + * clause_selectivity() would fall back to a default selectivity estimate + * of 0.5 for any SubPlan, so if the qual containing the SubPlan is the last + * to be applied (which it likely would be, thanks to order_qual_clauses()), + * this matches what we could have estimated in a far more laborious fashion. + * Obviously there are many other scenarios, but it's probably not worth the + * trouble to try to improve on this estimate, especially not when we don't + * have a better estimate for the selectivity of the SubPlan qual itself. + */ +#define NUM_EXEC_TLIST(parentplan) ((parentplan)->plan_rows) +#define NUM_EXEC_QUAL(parentplan) ((parentplan)->plan_rows * 2.0) + +/* + * Check if a Const node is a regclass value. We accept plain OID too, + * since a regclass Const will get folded to that type if it's an argument + * to oideq or similar operators. (This might result in some extraneous + * values in a plan's list of relation dependencies, but the worst result + * would be occasional useless replans.) + */ +#define ISREGCLASSCONST(con) \ + (((con)->consttype == REGCLASSOID || (con)->consttype == OIDOID) && \ + !(con)->constisnull) + +#define fix_scan_list(root, lst, rtoffset, num_exec) \ + ((List *) fix_scan_expr(root, (Node *) (lst), rtoffset, num_exec)) + +static void add_rtes_to_flat_rtable(PlannerInfo *root, bool recursing); +static void flatten_unplanned_rtes(PlannerGlobal *glob, RangeTblEntry *rte); +static bool flatten_rtes_walker(Node *node, PlannerGlobal *glob); +static void add_rte_to_flat_rtable(PlannerGlobal *glob, RangeTblEntry *rte); +static Plan *set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset); +static Plan *set_indexonlyscan_references(PlannerInfo *root, + IndexOnlyScan *plan, + int rtoffset); +static Plan *set_subqueryscan_references(PlannerInfo *root, + SubqueryScan *plan, + int rtoffset); +static Plan *clean_up_removed_plan_level(Plan *parent, Plan *child); +static void set_foreignscan_references(PlannerInfo *root, + ForeignScan *fscan, + int rtoffset); +static void set_customscan_references(PlannerInfo *root, + CustomScan *cscan, + int rtoffset); +static Plan *set_append_references(PlannerInfo *root, + Append *aplan, + int rtoffset); +static Plan *set_mergeappend_references(PlannerInfo *root, + MergeAppend *mplan, + int rtoffset); +static void set_hash_references(PlannerInfo *root, Plan *plan, int rtoffset); +static Relids offset_relid_set(Relids relids, int rtoffset); +static Node *fix_scan_expr(PlannerInfo *root, Node *node, + int rtoffset, double num_exec); +static Node *fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context); +static bool fix_scan_expr_walker(Node *node, fix_scan_expr_context *context); +static void set_join_references(PlannerInfo *root, Join *join, int rtoffset); +static void set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset); +static void set_param_references(PlannerInfo *root, Plan *plan); +static Node *convert_combining_aggrefs(Node *node, void *context); +static void set_dummy_tlist_references(Plan *plan, int rtoffset); +static indexed_tlist *build_tlist_index(List *tlist); +static Var *search_indexed_tlist_for_var(Var *var, + indexed_tlist *itlist, + int newvarno, + int rtoffset); +static Var *search_indexed_tlist_for_non_var(Expr *node, + indexed_tlist *itlist, + int newvarno); +static Var *search_indexed_tlist_for_sortgroupref(Expr *node, + Index sortgroupref, + indexed_tlist *itlist, + int newvarno); +static List *fix_join_expr(PlannerInfo *root, + List *clauses, + indexed_tlist *outer_itlist, + indexed_tlist *inner_itlist, + Index acceptable_rel, + int rtoffset, double num_exec); +static Node *fix_join_expr_mutator(Node *node, + fix_join_expr_context *context); +static Node *fix_upper_expr(PlannerInfo *root, + Node *node, + indexed_tlist *subplan_itlist, + int newvarno, + int rtoffset, double num_exec); +static Node *fix_upper_expr_mutator(Node *node, + fix_upper_expr_context *context); +static List *set_returning_clause_references(PlannerInfo *root, + List *rlist, + Plan *topplan, + Index resultRelation, + int rtoffset); +static List *set_windowagg_runcondition_references(PlannerInfo *root, + List *runcondition, + Plan *plan); + + +/***************************************************************************** + * + * SUBPLAN REFERENCES + * + *****************************************************************************/ + +/* + * set_plan_references + * + * This is the final processing pass of the planner/optimizer. The plan + * tree is complete; we just have to adjust some representational details + * for the convenience of the executor: + * + * 1. We flatten the various subquery rangetables into a single list, and + * zero out RangeTblEntry fields that are not useful to the executor. + * + * 2. We adjust Vars in scan nodes to be consistent with the flat rangetable. + * + * 3. We adjust Vars in upper plan nodes to refer to the outputs of their + * subplans. + * + * 4. Aggrefs in Agg plan nodes need to be adjusted in some cases involving + * partial aggregation or minmax aggregate optimization. + * + * 5. PARAM_MULTIEXPR Params are replaced by regular PARAM_EXEC Params, + * now that we have finished planning all MULTIEXPR subplans. + * + * 6. AlternativeSubPlan expressions are replaced by just one of their + * alternatives, using an estimate of how many times they'll be executed. + * + * 7. We compute regproc OIDs for operators (ie, we look up the function + * that implements each op). + * + * 8. We create lists of specific objects that the plan depends on. + * This will be used by plancache.c to drive invalidation of cached plans. + * Relation dependencies are represented by OIDs, and everything else by + * PlanInvalItems (this distinction is motivated by the shared-inval APIs). + * Currently, relations, user-defined functions, and domains are the only + * types of objects that are explicitly tracked this way. + * + * 9. We assign every plan node in the tree a unique ID. + * + * We also perform one final optimization step, which is to delete + * SubqueryScan, Append, and MergeAppend plan nodes that aren't doing + * anything useful. The reason for doing this last is that + * it can't readily be done before set_plan_references, because it would + * break set_upper_references: the Vars in the child plan's top tlist + * wouldn't match up with the Vars in the outer plan tree. A SubqueryScan + * serves a necessary function as a buffer between outer query and subquery + * variable numbering ... but after we've flattened the rangetable this is + * no longer a problem, since then there's only one rtindex namespace. + * Likewise, Append and MergeAppend buffer between the parent and child vars + * of an appendrel, but we don't need to worry about that once we've done + * set_plan_references. + * + * set_plan_references recursively traverses the whole plan tree. + * + * The return value is normally the same Plan node passed in, but can be + * different when the passed-in Plan is a node we decide isn't needed. + * + * The flattened rangetable entries are appended to root->glob->finalrtable. + * Also, rowmarks entries are appended to root->glob->finalrowmarks, and the + * RT indexes of ModifyTable result relations to root->glob->resultRelations, + * and flattened AppendRelInfos are appended to root->glob->appendRelations. + * Plan dependencies are appended to root->glob->relationOids (for relations) + * and root->glob->invalItems (for everything else). + * + * Notice that we modify Plan nodes in-place, but use expression_tree_mutator + * to process targetlist and qual expressions. We can assume that the Plan + * nodes were just built by the planner and are not multiply referenced, but + * it's not so safe to assume that for expression tree nodes. + */ +Plan * +set_plan_references(PlannerInfo *root, Plan *plan) +{ + Plan *result; + PlannerGlobal *glob = root->glob; + int rtoffset = list_length(glob->finalrtable); + ListCell *lc; + + /* + * Add all the query's RTEs to the flattened rangetable. The live ones + * will have their rangetable indexes increased by rtoffset. (Additional + * RTEs, not referenced by the Plan tree, might get added after those.) + */ + add_rtes_to_flat_rtable(root, false); + + /* + * Adjust RT indexes of PlanRowMarks and add to final rowmarks list + */ + foreach(lc, root->rowMarks) + { + PlanRowMark *rc = lfirst_node(PlanRowMark, lc); + PlanRowMark *newrc; + + /* flat copy is enough since all fields are scalars */ + newrc = (PlanRowMark *) palloc(sizeof(PlanRowMark)); + memcpy(newrc, rc, sizeof(PlanRowMark)); + + /* adjust indexes ... but *not* the rowmarkId */ + newrc->rti += rtoffset; + newrc->prti += rtoffset; + + glob->finalrowmarks = lappend(glob->finalrowmarks, newrc); + } + + /* + * Adjust RT indexes of AppendRelInfos and add to final appendrels list. + * We assume the AppendRelInfos were built during planning and don't need + * to be copied. + */ + foreach(lc, root->append_rel_list) + { + AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc); + + /* adjust RT indexes */ + appinfo->parent_relid += rtoffset; + appinfo->child_relid += rtoffset; + + /* + * Rather than adjust the translated_vars entries, just drop 'em. + * Neither the executor nor EXPLAIN currently need that data. + */ + appinfo->translated_vars = NIL; + + glob->appendRelations = lappend(glob->appendRelations, appinfo); + } + + /* If needed, create workspace for processing AlternativeSubPlans */ + if (root->hasAlternativeSubPlans) + { + root->isAltSubplan = (bool *) + palloc0(list_length(glob->subplans) * sizeof(bool)); + root->isUsedSubplan = (bool *) + palloc0(list_length(glob->subplans) * sizeof(bool)); + } + + /* Now fix the Plan tree */ + result = set_plan_refs(root, plan, rtoffset); + + /* + * If we have AlternativeSubPlans, it is likely that we now have some + * unreferenced subplans in glob->subplans. To avoid expending cycles on + * those subplans later, get rid of them by setting those list entries to + * NULL. (Note: we can't do this immediately upon processing an + * AlternativeSubPlan, because there may be multiple copies of the + * AlternativeSubPlan, and they can get resolved differently.) + */ + if (root->hasAlternativeSubPlans) + { + foreach(lc, glob->subplans) + { + int ndx = foreach_current_index(lc); + + /* + * If it was used by some AlternativeSubPlan in this query level, + * but wasn't selected as best by any AlternativeSubPlan, then we + * don't need it. Do not touch subplans that aren't parts of + * AlternativeSubPlans. + */ + if (root->isAltSubplan[ndx] && !root->isUsedSubplan[ndx]) + lfirst(lc) = NULL; + } + } + + return result; +} + +/* + * Extract RangeTblEntries from the plan's rangetable, and add to flat rtable + * + * This can recurse into subquery plans; "recursing" is true if so. + */ +static void +add_rtes_to_flat_rtable(PlannerInfo *root, bool recursing) +{ + PlannerGlobal *glob = root->glob; + Index rti; + ListCell *lc; + + /* + * Add the query's own RTEs to the flattened rangetable. + * + * At top level, we must add all RTEs so that their indexes in the + * flattened rangetable match up with their original indexes. When + * recursing, we only care about extracting relation RTEs. + */ + foreach(lc, root->parse->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + if (!recursing || rte->rtekind == RTE_RELATION) + add_rte_to_flat_rtable(glob, rte); + } + + /* + * If there are any dead subqueries, they are not referenced in the Plan + * tree, so we must add RTEs contained in them to the flattened rtable + * separately. (If we failed to do this, the executor would not perform + * expected permission checks for tables mentioned in such subqueries.) + * + * Note: this pass over the rangetable can't be combined with the previous + * one, because that would mess up the numbering of the live RTEs in the + * flattened rangetable. + */ + rti = 1; + foreach(lc, root->parse->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + /* + * We should ignore inheritance-parent RTEs: their contents have been + * pulled up into our rangetable already. Also ignore any subquery + * RTEs without matching RelOptInfos, as they likewise have been + * pulled up. + */ + if (rte->rtekind == RTE_SUBQUERY && !rte->inh && + rti < root->simple_rel_array_size) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + + if (rel != NULL) + { + Assert(rel->relid == rti); /* sanity check on array */ + + /* + * The subquery might never have been planned at all, if it + * was excluded on the basis of self-contradictory constraints + * in our query level. In this case apply + * flatten_unplanned_rtes. + * + * If it was planned but the result rel is dummy, we assume + * that it has been omitted from our plan tree (see + * set_subquery_pathlist), and recurse to pull up its RTEs. + * + * Otherwise, it should be represented by a SubqueryScan node + * somewhere in our plan tree, and we'll pull up its RTEs when + * we process that plan node. + * + * However, if we're recursing, then we should pull up RTEs + * whether the subquery is dummy or not, because we've found + * that some upper query level is treating this one as dummy, + * and so we won't scan this level's plan tree at all. + */ + if (rel->subroot == NULL) + flatten_unplanned_rtes(glob, rte); + else if (recursing || + IS_DUMMY_REL(fetch_upper_rel(rel->subroot, + UPPERREL_FINAL, NULL))) + add_rtes_to_flat_rtable(rel->subroot, true); + } + } + rti++; + } +} + +/* + * Extract RangeTblEntries from a subquery that was never planned at all + */ +static void +flatten_unplanned_rtes(PlannerGlobal *glob, RangeTblEntry *rte) +{ + /* Use query_tree_walker to find all RTEs in the parse tree */ + (void) query_tree_walker(rte->subquery, + flatten_rtes_walker, + (void *) glob, + QTW_EXAMINE_RTES_BEFORE); +} + +static bool +flatten_rtes_walker(Node *node, PlannerGlobal *glob) +{ + if (node == NULL) + return false; + if (IsA(node, RangeTblEntry)) + { + RangeTblEntry *rte = (RangeTblEntry *) node; + + /* As above, we need only save relation RTEs */ + if (rte->rtekind == RTE_RELATION) + add_rte_to_flat_rtable(glob, rte); + return false; + } + if (IsA(node, Query)) + { + /* Recurse into subselects */ + return query_tree_walker((Query *) node, + flatten_rtes_walker, + (void *) glob, + QTW_EXAMINE_RTES_BEFORE); + } + return expression_tree_walker(node, flatten_rtes_walker, + (void *) glob); +} + +/* + * Add (a copy of) the given RTE to the final rangetable + * + * In the flat rangetable, we zero out substructure pointers that are not + * needed by the executor; this reduces the storage space and copying cost + * for cached plans. We keep only the ctename, alias and eref Alias fields, + * which are needed by EXPLAIN, and the selectedCols, insertedCols, + * updatedCols, and extraUpdatedCols bitmaps, which are needed for + * executor-startup permissions checking and for trigger event checking. + */ +static void +add_rte_to_flat_rtable(PlannerGlobal *glob, RangeTblEntry *rte) +{ + RangeTblEntry *newrte; + + /* flat copy to duplicate all the scalar fields */ + newrte = (RangeTblEntry *) palloc(sizeof(RangeTblEntry)); + memcpy(newrte, rte, sizeof(RangeTblEntry)); + + /* zap unneeded sub-structure */ + newrte->tablesample = NULL; + newrte->subquery = NULL; + newrte->joinaliasvars = NIL; + newrte->joinleftcols = NIL; + newrte->joinrightcols = NIL; + newrte->join_using_alias = NULL; + newrte->functions = NIL; + newrte->tablefunc = NULL; + newrte->values_lists = NIL; + newrte->coltypes = NIL; + newrte->coltypmods = NIL; + newrte->colcollations = NIL; + newrte->securityQuals = NIL; + + glob->finalrtable = lappend(glob->finalrtable, newrte); + + /* + * If it's a plain relation RTE, add the table to relationOids. + * + * We do this even though the RTE might be unreferenced in the plan tree; + * this would correspond to cases such as views that were expanded, child + * tables that were eliminated by constraint exclusion, etc. Schema + * invalidation on such a rel must still force rebuilding of the plan. + * + * Note we don't bother to avoid making duplicate list entries. We could, + * but it would probably cost more cycles than it would save. + */ + if (newrte->rtekind == RTE_RELATION) + glob->relationOids = lappend_oid(glob->relationOids, newrte->relid); +} + +/* + * set_plan_refs: recurse through the Plan nodes of a single subquery level + */ +static Plan * +set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) +{ + ListCell *l; + + if (plan == NULL) + return NULL; + + /* Assign this node a unique ID. */ + plan->plan_node_id = root->glob->lastPlanNodeId++; + + /* + * Plan-type-specific fixes + */ + switch (nodeTag(plan)) + { + case T_SeqScan: + { + SeqScan *splan = (SeqScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + } + break; + case T_SampleScan: + { + SampleScan *splan = (SampleScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + splan->tablesample = (TableSampleClause *) + fix_scan_expr(root, (Node *) splan->tablesample, + rtoffset, 1); + } + break; + case T_IndexScan: + { + IndexScan *splan = (IndexScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + splan->indexqual = + fix_scan_list(root, splan->indexqual, + rtoffset, 1); + splan->indexqualorig = + fix_scan_list(root, splan->indexqualorig, + rtoffset, NUM_EXEC_QUAL(plan)); + splan->indexorderby = + fix_scan_list(root, splan->indexorderby, + rtoffset, 1); + splan->indexorderbyorig = + fix_scan_list(root, splan->indexorderbyorig, + rtoffset, NUM_EXEC_QUAL(plan)); + } + break; + case T_IndexOnlyScan: + { + IndexOnlyScan *splan = (IndexOnlyScan *) plan; + + return set_indexonlyscan_references(root, splan, rtoffset); + } + break; + case T_BitmapIndexScan: + { + BitmapIndexScan *splan = (BitmapIndexScan *) plan; + + splan->scan.scanrelid += rtoffset; + /* no need to fix targetlist and qual */ + Assert(splan->scan.plan.targetlist == NIL); + Assert(splan->scan.plan.qual == NIL); + splan->indexqual = + fix_scan_list(root, splan->indexqual, rtoffset, 1); + splan->indexqualorig = + fix_scan_list(root, splan->indexqualorig, + rtoffset, NUM_EXEC_QUAL(plan)); + } + break; + case T_BitmapHeapScan: + { + BitmapHeapScan *splan = (BitmapHeapScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + splan->bitmapqualorig = + fix_scan_list(root, splan->bitmapqualorig, + rtoffset, NUM_EXEC_QUAL(plan)); + } + break; + case T_TidScan: + { + TidScan *splan = (TidScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + splan->tidquals = + fix_scan_list(root, splan->tidquals, + rtoffset, 1); + } + break; + case T_TidRangeScan: + { + TidRangeScan *splan = (TidRangeScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + splan->tidrangequals = + fix_scan_list(root, splan->tidrangequals, + rtoffset, 1); + } + break; + case T_SubqueryScan: + /* Needs special treatment, see comments below */ + return set_subqueryscan_references(root, + (SubqueryScan *) plan, + rtoffset); + case T_FunctionScan: + { + FunctionScan *splan = (FunctionScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + splan->functions = + fix_scan_list(root, splan->functions, rtoffset, 1); + } + break; + case T_TableFuncScan: + { + TableFuncScan *splan = (TableFuncScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + splan->tablefunc = (TableFunc *) + fix_scan_expr(root, (Node *) splan->tablefunc, + rtoffset, 1); + } + break; + case T_ValuesScan: + { + ValuesScan *splan = (ValuesScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + splan->values_lists = + fix_scan_list(root, splan->values_lists, + rtoffset, 1); + } + break; + case T_CteScan: + { + CteScan *splan = (CteScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + } + break; + case T_NamedTuplestoreScan: + { + NamedTuplestoreScan *splan = (NamedTuplestoreScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + } + break; + case T_WorkTableScan: + { + WorkTableScan *splan = (WorkTableScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + } + break; + case T_ForeignScan: + set_foreignscan_references(root, (ForeignScan *) plan, rtoffset); + break; + case T_CustomScan: + set_customscan_references(root, (CustomScan *) plan, rtoffset); + break; + + case T_NestLoop: + case T_MergeJoin: + case T_HashJoin: + set_join_references(root, (Join *) plan, rtoffset); + break; + + case T_Gather: + case T_GatherMerge: + { + set_upper_references(root, plan, rtoffset); + set_param_references(root, plan); + } + break; + + case T_Hash: + set_hash_references(root, plan, rtoffset); + break; + + case T_Memoize: + { + Memoize *mplan = (Memoize *) plan; + + /* + * Memoize does not evaluate its targetlist. It just uses the + * same targetlist from its outer subnode. + */ + set_dummy_tlist_references(plan, rtoffset); + + mplan->param_exprs = fix_scan_list(root, mplan->param_exprs, + rtoffset, + NUM_EXEC_TLIST(plan)); + break; + } + + case T_Material: + case T_Sort: + case T_IncrementalSort: + case T_Unique: + case T_SetOp: + + /* + * These plan types don't actually bother to evaluate their + * targetlists, because they just return their unmodified input + * tuples. Even though the targetlist won't be used by the + * executor, we fix it up for possible use by EXPLAIN (not to + * mention ease of debugging --- wrong varnos are very confusing). + */ + set_dummy_tlist_references(plan, rtoffset); + + /* + * Since these plan types don't check quals either, we should not + * find any qual expression attached to them. + */ + Assert(plan->qual == NIL); + break; + case T_LockRows: + { + LockRows *splan = (LockRows *) plan; + + /* + * Like the plan types above, LockRows doesn't evaluate its + * tlist or quals. But we have to fix up the RT indexes in + * its rowmarks. + */ + set_dummy_tlist_references(plan, rtoffset); + Assert(splan->plan.qual == NIL); + + foreach(l, splan->rowMarks) + { + PlanRowMark *rc = (PlanRowMark *) lfirst(l); + + rc->rti += rtoffset; + rc->prti += rtoffset; + } + } + break; + case T_Limit: + { + Limit *splan = (Limit *) plan; + + /* + * Like the plan types above, Limit doesn't evaluate its tlist + * or quals. It does have live expressions for limit/offset, + * however; and those cannot contain subplan variable refs, so + * fix_scan_expr works for them. + */ + set_dummy_tlist_references(plan, rtoffset); + Assert(splan->plan.qual == NIL); + + splan->limitOffset = + fix_scan_expr(root, splan->limitOffset, rtoffset, 1); + splan->limitCount = + fix_scan_expr(root, splan->limitCount, rtoffset, 1); + } + break; + case T_Agg: + { + Agg *agg = (Agg *) plan; + + /* + * If this node is combining partial-aggregation results, we + * must convert its Aggrefs to contain references to the + * partial-aggregate subexpressions that will be available + * from the child plan node. + */ + if (DO_AGGSPLIT_COMBINE(agg->aggsplit)) + { + plan->targetlist = (List *) + convert_combining_aggrefs((Node *) plan->targetlist, + NULL); + plan->qual = (List *) + convert_combining_aggrefs((Node *) plan->qual, + NULL); + } + + set_upper_references(root, plan, rtoffset); + } + break; + case T_Group: + set_upper_references(root, plan, rtoffset); + break; + case T_WindowAgg: + { + WindowAgg *wplan = (WindowAgg *) plan; + + /* + * Adjust the WindowAgg's run conditions by swapping the + * WindowFuncs references out to instead reference the Var in + * the scan slot so that when the executor evaluates the + * runCondition, it receives the WindowFunc's value from the + * slot that the result has just been stored into rather than + * evaluating the WindowFunc all over again. + */ + wplan->runCondition = set_windowagg_runcondition_references(root, + wplan->runCondition, + (Plan *) wplan); + + set_upper_references(root, plan, rtoffset); + + /* + * Like Limit node limit/offset expressions, WindowAgg has + * frame offset expressions, which cannot contain subplan + * variable refs, so fix_scan_expr works for them. + */ + wplan->startOffset = + fix_scan_expr(root, wplan->startOffset, rtoffset, 1); + wplan->endOffset = + fix_scan_expr(root, wplan->endOffset, rtoffset, 1); + wplan->runCondition = fix_scan_list(root, + wplan->runCondition, + rtoffset, + NUM_EXEC_TLIST(plan)); + wplan->runConditionOrig = fix_scan_list(root, + wplan->runConditionOrig, + rtoffset, + NUM_EXEC_TLIST(plan)); + } + break; + case T_Result: + { + Result *splan = (Result *) plan; + + /* + * Result may or may not have a subplan; if not, it's more + * like a scan node than an upper node. + */ + if (splan->plan.lefttree != NULL) + set_upper_references(root, plan, rtoffset); + else + { + /* + * The tlist of a childless Result could contain + * unresolved ROWID_VAR Vars, in case it's representing a + * target relation which is completely empty because of + * constraint exclusion. Replace any such Vars by null + * constants, as though they'd been resolved for a leaf + * scan node that doesn't support them. We could have + * fix_scan_expr do this, but since the case is only + * expected to occur here, it seems safer to special-case + * it here and keep the assertions that ROWID_VARs + * shouldn't be seen by fix_scan_expr. + */ + foreach(l, splan->plan.targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + Var *var = (Var *) tle->expr; + + if (var && IsA(var, Var) && var->varno == ROWID_VAR) + tle->expr = (Expr *) makeNullConst(var->vartype, + var->vartypmod, + var->varcollid); + } + + splan->plan.targetlist = + fix_scan_list(root, splan->plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->plan.qual = + fix_scan_list(root, splan->plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + } + /* resconstantqual can't contain any subplan variable refs */ + splan->resconstantqual = + fix_scan_expr(root, splan->resconstantqual, rtoffset, 1); + } + break; + case T_ProjectSet: + set_upper_references(root, plan, rtoffset); + break; + case T_ModifyTable: + { + ModifyTable *splan = (ModifyTable *) plan; + Plan *subplan = outerPlan(splan); + + Assert(splan->plan.targetlist == NIL); + Assert(splan->plan.qual == NIL); + + splan->withCheckOptionLists = + fix_scan_list(root, splan->withCheckOptionLists, + rtoffset, 1); + + if (splan->returningLists) + { + List *newRL = NIL; + ListCell *lcrl, + *lcrr; + + /* + * Pass each per-resultrel returningList through + * set_returning_clause_references(). + */ + Assert(list_length(splan->returningLists) == list_length(splan->resultRelations)); + forboth(lcrl, splan->returningLists, + lcrr, splan->resultRelations) + { + List *rlist = (List *) lfirst(lcrl); + Index resultrel = lfirst_int(lcrr); + + rlist = set_returning_clause_references(root, + rlist, + subplan, + resultrel, + rtoffset); + newRL = lappend(newRL, rlist); + } + splan->returningLists = newRL; + + /* + * Set up the visible plan targetlist as being the same as + * the first RETURNING list. This is for the use of + * EXPLAIN; the executor won't pay any attention to the + * targetlist. We postpone this step until here so that + * we don't have to do set_returning_clause_references() + * twice on identical targetlists. + */ + splan->plan.targetlist = copyObject(linitial(newRL)); + } + + /* + * We treat ModifyTable with ON CONFLICT as a form of 'pseudo + * join', where the inner side is the EXCLUDED tuple. + * Therefore use fix_join_expr to setup the relevant variables + * to INNER_VAR. We explicitly don't create any OUTER_VARs as + * those are already used by RETURNING and it seems better to + * be non-conflicting. + */ + if (splan->onConflictSet) + { + indexed_tlist *itlist; + + itlist = build_tlist_index(splan->exclRelTlist); + + splan->onConflictSet = + fix_join_expr(root, splan->onConflictSet, + NULL, itlist, + linitial_int(splan->resultRelations), + rtoffset, NUM_EXEC_QUAL(plan)); + + splan->onConflictWhere = (Node *) + fix_join_expr(root, (List *) splan->onConflictWhere, + NULL, itlist, + linitial_int(splan->resultRelations), + rtoffset, NUM_EXEC_QUAL(plan)); + + pfree(itlist); + + splan->exclRelTlist = + fix_scan_list(root, splan->exclRelTlist, rtoffset, 1); + } + + /* + * The MERGE statement produces the target rows by performing + * a right join between the target relation and the source + * relation (which could be a plain relation or a subquery). + * The INSERT and UPDATE actions of the MERGE statement + * require access to the columns from the source relation. We + * arrange things so that the source relation attributes are + * available as INNER_VAR and the target relation attributes + * are available from the scan tuple. + */ + if (splan->mergeActionLists != NIL) + { + ListCell *lca, + *lcr; + + /* + * Fix the targetList of individual action nodes so that + * the so-called "source relation" Vars are referenced as + * INNER_VAR. Note that for this to work correctly during + * execution, the ecxt_innertuple must be set to the tuple + * obtained by executing the subplan, which is what + * constitutes the "source relation". + * + * We leave the Vars from the result relation (i.e. the + * target relation) unchanged i.e. those Vars would be + * picked from the scan slot. So during execution, we must + * ensure that ecxt_scantuple is setup correctly to refer + * to the tuple from the target relation. + */ + indexed_tlist *itlist; + + itlist = build_tlist_index(subplan->targetlist); + + forboth(lca, splan->mergeActionLists, + lcr, splan->resultRelations) + { + List *mergeActionList = lfirst(lca); + Index resultrel = lfirst_int(lcr); + + foreach(l, mergeActionList) + { + MergeAction *action = (MergeAction *) lfirst(l); + + /* Fix targetList of each action. */ + action->targetList = fix_join_expr(root, + action->targetList, + NULL, itlist, + resultrel, + rtoffset, + NUM_EXEC_TLIST(plan)); + + /* Fix quals too. */ + action->qual = (Node *) fix_join_expr(root, + (List *) action->qual, + NULL, itlist, + resultrel, + rtoffset, + NUM_EXEC_QUAL(plan)); + } + } + } + + splan->nominalRelation += rtoffset; + if (splan->rootRelation) + splan->rootRelation += rtoffset; + splan->exclRelRTI += rtoffset; + + foreach(l, splan->resultRelations) + { + lfirst_int(l) += rtoffset; + } + foreach(l, splan->rowMarks) + { + PlanRowMark *rc = (PlanRowMark *) lfirst(l); + + rc->rti += rtoffset; + rc->prti += rtoffset; + } + + /* + * Append this ModifyTable node's final result relation RT + * index(es) to the global list for the plan. + */ + root->glob->resultRelations = + list_concat(root->glob->resultRelations, + splan->resultRelations); + if (splan->rootRelation) + { + root->glob->resultRelations = + lappend_int(root->glob->resultRelations, + splan->rootRelation); + } + } + break; + case T_Append: + /* Needs special treatment, see comments below */ + return set_append_references(root, + (Append *) plan, + rtoffset); + case T_MergeAppend: + /* Needs special treatment, see comments below */ + return set_mergeappend_references(root, + (MergeAppend *) plan, + rtoffset); + case T_RecursiveUnion: + /* This doesn't evaluate targetlist or check quals either */ + set_dummy_tlist_references(plan, rtoffset); + Assert(plan->qual == NIL); + break; + case T_BitmapAnd: + { + BitmapAnd *splan = (BitmapAnd *) plan; + + /* BitmapAnd works like Append, but has no tlist */ + Assert(splan->plan.targetlist == NIL); + Assert(splan->plan.qual == NIL); + foreach(l, splan->bitmapplans) + { + lfirst(l) = set_plan_refs(root, + (Plan *) lfirst(l), + rtoffset); + } + } + break; + case T_BitmapOr: + { + BitmapOr *splan = (BitmapOr *) plan; + + /* BitmapOr works like Append, but has no tlist */ + Assert(splan->plan.targetlist == NIL); + Assert(splan->plan.qual == NIL); + foreach(l, splan->bitmapplans) + { + lfirst(l) = set_plan_refs(root, + (Plan *) lfirst(l), + rtoffset); + } + } + break; + default: + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(plan)); + break; + } + + /* + * Now recurse into child plans, if any + * + * NOTE: it is essential that we recurse into child plans AFTER we set + * subplan references in this plan's tlist and quals. If we did the + * reference-adjustments bottom-up, then we would fail to match this + * plan's var nodes against the already-modified nodes of the children. + */ + plan->lefttree = set_plan_refs(root, plan->lefttree, rtoffset); + plan->righttree = set_plan_refs(root, plan->righttree, rtoffset); + + return plan; +} + +/* + * set_indexonlyscan_references + * Do set_plan_references processing on an IndexOnlyScan + * + * This is unlike the handling of a plain IndexScan because we have to + * convert Vars referencing the heap into Vars referencing the index. + * We can use the fix_upper_expr machinery for that, by working from a + * targetlist describing the index columns. + */ +static Plan * +set_indexonlyscan_references(PlannerInfo *root, + IndexOnlyScan *plan, + int rtoffset) +{ + indexed_tlist *index_itlist; + List *stripped_indextlist; + ListCell *lc; + + /* + * Vars in the plan node's targetlist, qual, and recheckqual must only + * reference columns that the index AM can actually return. To ensure + * this, remove non-returnable columns (which are marked as resjunk) from + * the indexed tlist. We can just drop them because the indexed_tlist + * machinery pays attention to TLE resnos, not physical list position. + */ + stripped_indextlist = NIL; + foreach(lc, plan->indextlist) + { + TargetEntry *indextle = (TargetEntry *) lfirst(lc); + + if (!indextle->resjunk) + stripped_indextlist = lappend(stripped_indextlist, indextle); + } + + index_itlist = build_tlist_index(stripped_indextlist); + + plan->scan.scanrelid += rtoffset; + plan->scan.plan.targetlist = (List *) + fix_upper_expr(root, + (Node *) plan->scan.plan.targetlist, + index_itlist, + INDEX_VAR, + rtoffset, + NUM_EXEC_TLIST((Plan *) plan)); + plan->scan.plan.qual = (List *) + fix_upper_expr(root, + (Node *) plan->scan.plan.qual, + index_itlist, + INDEX_VAR, + rtoffset, + NUM_EXEC_QUAL((Plan *) plan)); + plan->recheckqual = (List *) + fix_upper_expr(root, + (Node *) plan->recheckqual, + index_itlist, + INDEX_VAR, + rtoffset, + NUM_EXEC_QUAL((Plan *) plan)); + /* indexqual is already transformed to reference index columns */ + plan->indexqual = fix_scan_list(root, plan->indexqual, + rtoffset, 1); + /* indexorderby is already transformed to reference index columns */ + plan->indexorderby = fix_scan_list(root, plan->indexorderby, + rtoffset, 1); + /* indextlist must NOT be transformed to reference index columns */ + plan->indextlist = fix_scan_list(root, plan->indextlist, + rtoffset, NUM_EXEC_TLIST((Plan *) plan)); + + pfree(index_itlist); + + return (Plan *) plan; +} + +/* + * set_subqueryscan_references + * Do set_plan_references processing on a SubqueryScan + * + * We try to strip out the SubqueryScan entirely; if we can't, we have + * to do the normal processing on it. + */ +static Plan * +set_subqueryscan_references(PlannerInfo *root, + SubqueryScan *plan, + int rtoffset) +{ + RelOptInfo *rel; + Plan *result; + + /* Need to look up the subquery's RelOptInfo, since we need its subroot */ + rel = find_base_rel(root, plan->scan.scanrelid); + + /* Recursively process the subplan */ + plan->subplan = set_plan_references(rel->subroot, plan->subplan); + + if (trivial_subqueryscan(plan)) + { + /* + * We can omit the SubqueryScan node and just pull up the subplan. + */ + result = clean_up_removed_plan_level((Plan *) plan, plan->subplan); + } + else + { + /* + * Keep the SubqueryScan node. We have to do the processing that + * set_plan_references would otherwise have done on it. Notice we do + * not do set_upper_references() here, because a SubqueryScan will + * always have been created with correct references to its subplan's + * outputs to begin with. + */ + plan->scan.scanrelid += rtoffset; + plan->scan.plan.targetlist = + fix_scan_list(root, plan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST((Plan *) plan)); + plan->scan.plan.qual = + fix_scan_list(root, plan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL((Plan *) plan)); + + result = (Plan *) plan; + } + + return result; +} + +/* + * trivial_subqueryscan + * Detect whether a SubqueryScan can be deleted from the plan tree. + * + * We can delete it if it has no qual to check and the targetlist just + * regurgitates the output of the child plan. + * + * This can be called from mark_async_capable_plan(), a helper function for + * create_append_plan(), before set_subqueryscan_references(), to determine + * triviality of a SubqueryScan that is a child of an Append node. So we + * cache the result in the SubqueryScan node to avoid repeated computation. + * + * Note: when called from mark_async_capable_plan(), we determine the result + * before running finalize_plan() on the SubqueryScan node (if needed) and + * set_plan_references() on the subplan tree, but this would be safe, because + * 1) finalize_plan() doesn't modify the tlist or quals for the SubqueryScan + * node (or that for any plan node in the subplan tree), and + * 2) set_plan_references() modifies the tlist for every plan node in the + * subplan tree, but keeps const/resjunk columns as const/resjunk ones and + * preserves the length and order of the tlist, and + * 3) set_plan_references() might delete the topmost plan node like an Append + * or MergeAppend from the subplan tree and pull up the child plan node, + * but in that case, the tlist for the child plan node exactly matches the + * parent. + */ +bool +trivial_subqueryscan(SubqueryScan *plan) +{ + int attrno; + ListCell *lp, + *lc; + + /* We might have detected this already; in which case reuse the result */ + if (plan->scanstatus == SUBQUERY_SCAN_TRIVIAL) + return true; + if (plan->scanstatus == SUBQUERY_SCAN_NONTRIVIAL) + return false; + Assert(plan->scanstatus == SUBQUERY_SCAN_UNKNOWN); + /* Initially, mark the SubqueryScan as non-deletable from the plan tree */ + plan->scanstatus = SUBQUERY_SCAN_NONTRIVIAL; + + if (plan->scan.plan.qual != NIL) + return false; + + if (list_length(plan->scan.plan.targetlist) != + list_length(plan->subplan->targetlist)) + return false; /* tlists not same length */ + + attrno = 1; + forboth(lp, plan->scan.plan.targetlist, lc, plan->subplan->targetlist) + { + TargetEntry *ptle = (TargetEntry *) lfirst(lp); + TargetEntry *ctle = (TargetEntry *) lfirst(lc); + + if (ptle->resjunk != ctle->resjunk) + return false; /* tlist doesn't match junk status */ + + /* + * We accept either a Var referencing the corresponding element of the + * subplan tlist, or a Const equaling the subplan element. See + * generate_setop_tlist() for motivation. + */ + if (ptle->expr && IsA(ptle->expr, Var)) + { + Var *var = (Var *) ptle->expr; + + Assert(var->varno == plan->scan.scanrelid); + Assert(var->varlevelsup == 0); + if (var->varattno != attrno) + return false; /* out of order */ + } + else if (ptle->expr && IsA(ptle->expr, Const)) + { + if (!equal(ptle->expr, ctle->expr)) + return false; + } + else + return false; + + attrno++; + } + + /* Re-mark the SubqueryScan as deletable from the plan tree */ + plan->scanstatus = SUBQUERY_SCAN_TRIVIAL; + + return true; +} + +/* + * clean_up_removed_plan_level + * Do necessary cleanup when we strip out a SubqueryScan, Append, etc + * + * We are dropping the "parent" plan in favor of returning just its "child". + * A few small tweaks are needed. + */ +static Plan * +clean_up_removed_plan_level(Plan *parent, Plan *child) +{ + /* + * We have to be sure we don't lose any initplans, so move any that were + * attached to the parent plan to the child. If we do move any, the child + * is no longer parallel-safe. + */ + if (parent->initPlan) + child->parallel_safe = false; + + /* + * Attach plans this way so that parent's initplans are processed before + * any pre-existing initplans of the child. Probably doesn't matter, but + * let's preserve the ordering just in case. + */ + child->initPlan = list_concat(parent->initPlan, + child->initPlan); + + /* + * We also have to transfer the parent's column labeling info into the + * child, else columns sent to client will be improperly labeled if this + * is the topmost plan level. resjunk and so on may be important too. + */ + apply_tlist_labeling(child->targetlist, parent->targetlist); + + return child; +} + +/* + * set_foreignscan_references + * Do set_plan_references processing on a ForeignScan + */ +static void +set_foreignscan_references(PlannerInfo *root, + ForeignScan *fscan, + int rtoffset) +{ + /* Adjust scanrelid if it's valid */ + if (fscan->scan.scanrelid > 0) + fscan->scan.scanrelid += rtoffset; + + if (fscan->fdw_scan_tlist != NIL || fscan->scan.scanrelid == 0) + { + /* + * Adjust tlist, qual, fdw_exprs, fdw_recheck_quals to reference + * foreign scan tuple + */ + indexed_tlist *itlist = build_tlist_index(fscan->fdw_scan_tlist); + + fscan->scan.plan.targetlist = (List *) + fix_upper_expr(root, + (Node *) fscan->scan.plan.targetlist, + itlist, + INDEX_VAR, + rtoffset, + NUM_EXEC_TLIST((Plan *) fscan)); + fscan->scan.plan.qual = (List *) + fix_upper_expr(root, + (Node *) fscan->scan.plan.qual, + itlist, + INDEX_VAR, + rtoffset, + NUM_EXEC_QUAL((Plan *) fscan)); + fscan->fdw_exprs = (List *) + fix_upper_expr(root, + (Node *) fscan->fdw_exprs, + itlist, + INDEX_VAR, + rtoffset, + NUM_EXEC_QUAL((Plan *) fscan)); + fscan->fdw_recheck_quals = (List *) + fix_upper_expr(root, + (Node *) fscan->fdw_recheck_quals, + itlist, + INDEX_VAR, + rtoffset, + NUM_EXEC_QUAL((Plan *) fscan)); + pfree(itlist); + /* fdw_scan_tlist itself just needs fix_scan_list() adjustments */ + fscan->fdw_scan_tlist = + fix_scan_list(root, fscan->fdw_scan_tlist, + rtoffset, NUM_EXEC_TLIST((Plan *) fscan)); + } + else + { + /* + * Adjust tlist, qual, fdw_exprs, fdw_recheck_quals in the standard + * way + */ + fscan->scan.plan.targetlist = + fix_scan_list(root, fscan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST((Plan *) fscan)); + fscan->scan.plan.qual = + fix_scan_list(root, fscan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL((Plan *) fscan)); + fscan->fdw_exprs = + fix_scan_list(root, fscan->fdw_exprs, + rtoffset, NUM_EXEC_QUAL((Plan *) fscan)); + fscan->fdw_recheck_quals = + fix_scan_list(root, fscan->fdw_recheck_quals, + rtoffset, NUM_EXEC_QUAL((Plan *) fscan)); + } + + fscan->fs_relids = offset_relid_set(fscan->fs_relids, rtoffset); + + /* Adjust resultRelation if it's valid */ + if (fscan->resultRelation > 0) + fscan->resultRelation += rtoffset; +} + +/* + * set_customscan_references + * Do set_plan_references processing on a CustomScan + */ +static void +set_customscan_references(PlannerInfo *root, + CustomScan *cscan, + int rtoffset) +{ + ListCell *lc; + + /* Adjust scanrelid if it's valid */ + if (cscan->scan.scanrelid > 0) + cscan->scan.scanrelid += rtoffset; + + if (cscan->custom_scan_tlist != NIL || cscan->scan.scanrelid == 0) + { + /* Adjust tlist, qual, custom_exprs to reference custom scan tuple */ + indexed_tlist *itlist = build_tlist_index(cscan->custom_scan_tlist); + + cscan->scan.plan.targetlist = (List *) + fix_upper_expr(root, + (Node *) cscan->scan.plan.targetlist, + itlist, + INDEX_VAR, + rtoffset, + NUM_EXEC_TLIST((Plan *) cscan)); + cscan->scan.plan.qual = (List *) + fix_upper_expr(root, + (Node *) cscan->scan.plan.qual, + itlist, + INDEX_VAR, + rtoffset, + NUM_EXEC_QUAL((Plan *) cscan)); + cscan->custom_exprs = (List *) + fix_upper_expr(root, + (Node *) cscan->custom_exprs, + itlist, + INDEX_VAR, + rtoffset, + NUM_EXEC_QUAL((Plan *) cscan)); + pfree(itlist); + /* custom_scan_tlist itself just needs fix_scan_list() adjustments */ + cscan->custom_scan_tlist = + fix_scan_list(root, cscan->custom_scan_tlist, + rtoffset, NUM_EXEC_TLIST((Plan *) cscan)); + } + else + { + /* Adjust tlist, qual, custom_exprs in the standard way */ + cscan->scan.plan.targetlist = + fix_scan_list(root, cscan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST((Plan *) cscan)); + cscan->scan.plan.qual = + fix_scan_list(root, cscan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL((Plan *) cscan)); + cscan->custom_exprs = + fix_scan_list(root, cscan->custom_exprs, + rtoffset, NUM_EXEC_QUAL((Plan *) cscan)); + } + + /* Adjust child plan-nodes recursively, if needed */ + foreach(lc, cscan->custom_plans) + { + lfirst(lc) = set_plan_refs(root, (Plan *) lfirst(lc), rtoffset); + } + + cscan->custom_relids = offset_relid_set(cscan->custom_relids, rtoffset); +} + +/* + * set_append_references + * Do set_plan_references processing on an Append + * + * We try to strip out the Append entirely; if we can't, we have + * to do the normal processing on it. + */ +static Plan * +set_append_references(PlannerInfo *root, + Append *aplan, + int rtoffset) +{ + ListCell *l; + + /* + * Append, like Sort et al, doesn't actually evaluate its targetlist or + * check quals. If it's got exactly one child plan, then it's not doing + * anything useful at all, and we can strip it out. + */ + Assert(aplan->plan.qual == NIL); + + /* First, we gotta recurse on the children */ + foreach(l, aplan->appendplans) + { + lfirst(l) = set_plan_refs(root, (Plan *) lfirst(l), rtoffset); + } + + /* + * See if it's safe to get rid of the Append entirely. For this to be + * safe, there must be only one child plan and that child plan's parallel + * awareness must match that of the Append's. The reason for the latter + * is that the if the Append is parallel aware and the child is not then + * the calling plan may execute the non-parallel aware child multiple + * times. + */ + if (list_length(aplan->appendplans) == 1 && + ((Plan *) linitial(aplan->appendplans))->parallel_aware == aplan->plan.parallel_aware) + return clean_up_removed_plan_level((Plan *) aplan, + (Plan *) linitial(aplan->appendplans)); + + /* + * Otherwise, clean up the Append as needed. It's okay to do this after + * recursing to the children, because set_dummy_tlist_references doesn't + * look at those. + */ + set_dummy_tlist_references((Plan *) aplan, rtoffset); + + aplan->apprelids = offset_relid_set(aplan->apprelids, rtoffset); + + if (aplan->part_prune_info) + { + foreach(l, aplan->part_prune_info->prune_infos) + { + List *prune_infos = lfirst(l); + ListCell *l2; + + foreach(l2, prune_infos) + { + PartitionedRelPruneInfo *pinfo = lfirst(l2); + + pinfo->rtindex += rtoffset; + } + } + } + + /* We don't need to recurse to lefttree or righttree ... */ + Assert(aplan->plan.lefttree == NULL); + Assert(aplan->plan.righttree == NULL); + + return (Plan *) aplan; +} + +/* + * set_mergeappend_references + * Do set_plan_references processing on a MergeAppend + * + * We try to strip out the MergeAppend entirely; if we can't, we have + * to do the normal processing on it. + */ +static Plan * +set_mergeappend_references(PlannerInfo *root, + MergeAppend *mplan, + int rtoffset) +{ + ListCell *l; + + /* + * MergeAppend, like Sort et al, doesn't actually evaluate its targetlist + * or check quals. If it's got exactly one child plan, then it's not + * doing anything useful at all, and we can strip it out. + */ + Assert(mplan->plan.qual == NIL); + + /* First, we gotta recurse on the children */ + foreach(l, mplan->mergeplans) + { + lfirst(l) = set_plan_refs(root, (Plan *) lfirst(l), rtoffset); + } + + /* + * See if it's safe to get rid of the MergeAppend entirely. For this to + * be safe, there must be only one child plan and that child plan's + * parallel awareness must match that of the MergeAppend's. The reason + * for the latter is that the if the MergeAppend is parallel aware and the + * child is not then the calling plan may execute the non-parallel aware + * child multiple times. + */ + if (list_length(mplan->mergeplans) == 1 && + ((Plan *) linitial(mplan->mergeplans))->parallel_aware == mplan->plan.parallel_aware) + return clean_up_removed_plan_level((Plan *) mplan, + (Plan *) linitial(mplan->mergeplans)); + + /* + * Otherwise, clean up the MergeAppend as needed. It's okay to do this + * after recursing to the children, because set_dummy_tlist_references + * doesn't look at those. + */ + set_dummy_tlist_references((Plan *) mplan, rtoffset); + + mplan->apprelids = offset_relid_set(mplan->apprelids, rtoffset); + + if (mplan->part_prune_info) + { + foreach(l, mplan->part_prune_info->prune_infos) + { + List *prune_infos = lfirst(l); + ListCell *l2; + + foreach(l2, prune_infos) + { + PartitionedRelPruneInfo *pinfo = lfirst(l2); + + pinfo->rtindex += rtoffset; + } + } + } + + /* We don't need to recurse to lefttree or righttree ... */ + Assert(mplan->plan.lefttree == NULL); + Assert(mplan->plan.righttree == NULL); + + return (Plan *) mplan; +} + +/* + * set_hash_references + * Do set_plan_references processing on a Hash node + */ +static void +set_hash_references(PlannerInfo *root, Plan *plan, int rtoffset) +{ + Hash *hplan = (Hash *) plan; + Plan *outer_plan = plan->lefttree; + indexed_tlist *outer_itlist; + + /* + * Hash's hashkeys are used when feeding tuples into the hashtable, + * therefore have them reference Hash's outer plan (which itself is the + * inner plan of the HashJoin). + */ + outer_itlist = build_tlist_index(outer_plan->targetlist); + hplan->hashkeys = (List *) + fix_upper_expr(root, + (Node *) hplan->hashkeys, + outer_itlist, + OUTER_VAR, + rtoffset, + NUM_EXEC_QUAL(plan)); + + /* Hash doesn't project */ + set_dummy_tlist_references(plan, rtoffset); + + /* Hash nodes don't have their own quals */ + Assert(plan->qual == NIL); +} + +/* + * offset_relid_set + * Apply rtoffset to the members of a Relids set. + */ +static Relids +offset_relid_set(Relids relids, int rtoffset) +{ + Relids result = NULL; + int rtindex; + + /* If there's no offset to apply, we needn't recompute the value */ + if (rtoffset == 0) + return relids; + rtindex = -1; + while ((rtindex = bms_next_member(relids, rtindex)) >= 0) + result = bms_add_member(result, rtindex + rtoffset); + return result; +} + +/* + * copyVar + * Copy a Var node. + * + * fix_scan_expr and friends do this enough times that it's worth having + * a bespoke routine instead of using the generic copyObject() function. + */ +static inline Var * +copyVar(Var *var) +{ + Var *newvar = (Var *) palloc(sizeof(Var)); + + *newvar = *var; + return newvar; +} + +/* + * fix_expr_common + * Do generic set_plan_references processing on an expression node + * + * This is code that is common to all variants of expression-fixing. + * We must look up operator opcode info for OpExpr and related nodes, + * add OIDs from regclass Const nodes into root->glob->relationOids, and + * add PlanInvalItems for user-defined functions into root->glob->invalItems. + * We also fill in column index lists for GROUPING() expressions. + * + * We assume it's okay to update opcode info in-place. So this could possibly + * scribble on the planner's input data structures, but it's OK. + */ +static void +fix_expr_common(PlannerInfo *root, Node *node) +{ + /* We assume callers won't call us on a NULL pointer */ + if (IsA(node, Aggref)) + { + record_plan_function_dependency(root, + ((Aggref *) node)->aggfnoid); + } + else if (IsA(node, WindowFunc)) + { + record_plan_function_dependency(root, + ((WindowFunc *) node)->winfnoid); + } + else if (IsA(node, FuncExpr)) + { + record_plan_function_dependency(root, + ((FuncExpr *) node)->funcid); + } + else if (IsA(node, OpExpr)) + { + set_opfuncid((OpExpr *) node); + record_plan_function_dependency(root, + ((OpExpr *) node)->opfuncid); + } + else if (IsA(node, DistinctExpr)) + { + set_opfuncid((OpExpr *) node); /* rely on struct equivalence */ + record_plan_function_dependency(root, + ((DistinctExpr *) node)->opfuncid); + } + else if (IsA(node, NullIfExpr)) + { + set_opfuncid((OpExpr *) node); /* rely on struct equivalence */ + record_plan_function_dependency(root, + ((NullIfExpr *) node)->opfuncid); + } + else if (IsA(node, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node; + + set_sa_opfuncid(saop); + record_plan_function_dependency(root, saop->opfuncid); + + if (OidIsValid(saop->hashfuncid)) + record_plan_function_dependency(root, saop->hashfuncid); + + if (OidIsValid(saop->negfuncid)) + record_plan_function_dependency(root, saop->negfuncid); + } + else if (IsA(node, Const)) + { + Const *con = (Const *) node; + + /* Check for regclass reference */ + if (ISREGCLASSCONST(con)) + root->glob->relationOids = + lappend_oid(root->glob->relationOids, + DatumGetObjectId(con->constvalue)); + } + else if (IsA(node, GroupingFunc)) + { + GroupingFunc *g = (GroupingFunc *) node; + AttrNumber *grouping_map = root->grouping_map; + + /* If there are no grouping sets, we don't need this. */ + + Assert(grouping_map || g->cols == NIL); + + if (grouping_map) + { + ListCell *lc; + List *cols = NIL; + + foreach(lc, g->refs) + { + cols = lappend_int(cols, grouping_map[lfirst_int(lc)]); + } + + Assert(!g->cols || equal(cols, g->cols)); + + if (!g->cols) + g->cols = cols; + } + } +} + +/* + * fix_param_node + * Do set_plan_references processing on a Param + * + * If it's a PARAM_MULTIEXPR, replace it with the appropriate Param from + * root->multiexpr_params; otherwise no change is needed. + * Just for paranoia's sake, we make a copy of the node in either case. + */ +static Node * +fix_param_node(PlannerInfo *root, Param *p) +{ + if (p->paramkind == PARAM_MULTIEXPR) + { + int subqueryid = p->paramid >> 16; + int colno = p->paramid & 0xFFFF; + List *params; + + if (subqueryid <= 0 || + subqueryid > list_length(root->multiexpr_params)) + elog(ERROR, "unexpected PARAM_MULTIEXPR ID: %d", p->paramid); + params = (List *) list_nth(root->multiexpr_params, subqueryid - 1); + if (colno <= 0 || colno > list_length(params)) + elog(ERROR, "unexpected PARAM_MULTIEXPR ID: %d", p->paramid); + return copyObject(list_nth(params, colno - 1)); + } + return (Node *) copyObject(p); +} + +/* + * fix_alternative_subplan + * Do set_plan_references processing on an AlternativeSubPlan + * + * Choose one of the alternative implementations and return just that one, + * discarding the rest of the AlternativeSubPlan structure. + * Note: caller must still recurse into the result! + * + * We don't make any attempt to fix up cost estimates in the parent plan + * node or higher-level nodes. + */ +static Node * +fix_alternative_subplan(PlannerInfo *root, AlternativeSubPlan *asplan, + double num_exec) +{ + SubPlan *bestplan = NULL; + Cost bestcost = 0; + ListCell *lc; + + /* + * Compute the estimated cost of each subplan assuming num_exec + * executions, and keep the cheapest one. In event of exact equality of + * estimates, we prefer the later plan; this is a bit arbitrary, but in + * current usage it biases us to break ties against fast-start subplans. + */ + Assert(asplan->subplans != NIL); + + foreach(lc, asplan->subplans) + { + SubPlan *curplan = (SubPlan *) lfirst(lc); + Cost curcost; + + curcost = curplan->startup_cost + num_exec * curplan->per_call_cost; + if (bestplan == NULL || curcost <= bestcost) + { + bestplan = curplan; + bestcost = curcost; + } + + /* Also mark all subplans that are in AlternativeSubPlans */ + root->isAltSubplan[curplan->plan_id - 1] = true; + } + + /* Mark the subplan we selected */ + root->isUsedSubplan[bestplan->plan_id - 1] = true; + + return (Node *) bestplan; +} + +/* + * fix_scan_expr + * Do set_plan_references processing on a scan-level expression + * + * This consists of incrementing all Vars' varnos by rtoffset, + * replacing PARAM_MULTIEXPR Params, expanding PlaceHolderVars, + * replacing Aggref nodes that should be replaced by initplan output Params, + * choosing the best implementation for AlternativeSubPlans, + * looking up operator opcode info for OpExpr and related nodes, + * and adding OIDs from regclass Const nodes into root->glob->relationOids. + * + * 'node': the expression to be modified + * 'rtoffset': how much to increment varnos by + * 'num_exec': estimated number of executions of expression + * + * The expression tree is either copied-and-modified, or modified in-place + * if that seems safe. + */ +static Node * +fix_scan_expr(PlannerInfo *root, Node *node, int rtoffset, double num_exec) +{ + fix_scan_expr_context context; + + context.root = root; + context.rtoffset = rtoffset; + context.num_exec = num_exec; + + if (rtoffset != 0 || + root->multiexpr_params != NIL || + root->glob->lastPHId != 0 || + root->minmax_aggs != NIL || + root->hasAlternativeSubPlans) + { + return fix_scan_expr_mutator(node, &context); + } + else + { + /* + * If rtoffset == 0, we don't need to change any Vars, and if there + * are no MULTIEXPR subqueries then we don't need to replace + * PARAM_MULTIEXPR Params, and if there are no placeholders anywhere + * we won't need to remove them, and if there are no minmax Aggrefs we + * won't need to replace them, and if there are no AlternativeSubPlans + * we won't need to remove them. Then it's OK to just scribble on the + * input node tree instead of copying (since the only change, filling + * in any unset opfuncid fields, is harmless). This saves just enough + * cycles to be noticeable on trivial queries. + */ + (void) fix_scan_expr_walker(node, &context); + return node; + } +} + +static Node * +fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) +{ + if (node == NULL) + return NULL; + if (IsA(node, Var)) + { + Var *var = copyVar((Var *) node); + + Assert(var->varlevelsup == 0); + + /* + * We should not see Vars marked INNER_VAR, OUTER_VAR, or ROWID_VAR. + * But an indexqual expression could contain INDEX_VAR Vars. + */ + Assert(var->varno != INNER_VAR); + Assert(var->varno != OUTER_VAR); + Assert(var->varno != ROWID_VAR); + if (!IS_SPECIAL_VARNO(var->varno)) + var->varno += context->rtoffset; + if (var->varnosyn > 0) + var->varnosyn += context->rtoffset; + return (Node *) var; + } + if (IsA(node, Param)) + return fix_param_node(context->root, (Param *) node); + if (IsA(node, Aggref)) + { + Aggref *aggref = (Aggref *) node; + + /* See if the Aggref should be replaced by a Param */ + if (context->root->minmax_aggs != NIL && + list_length(aggref->args) == 1) + { + TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args); + ListCell *lc; + + foreach(lc, context->root->minmax_aggs) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + + if (mminfo->aggfnoid == aggref->aggfnoid && + equal(mminfo->target, curTarget->expr)) + return (Node *) copyObject(mminfo->param); + } + } + /* If no match, just fall through to process it normally */ + } + if (IsA(node, CurrentOfExpr)) + { + CurrentOfExpr *cexpr = (CurrentOfExpr *) copyObject(node); + + Assert(!IS_SPECIAL_VARNO(cexpr->cvarno)); + cexpr->cvarno += context->rtoffset; + return (Node *) cexpr; + } + if (IsA(node, PlaceHolderVar)) + { + /* At scan level, we should always just evaluate the contained expr */ + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + return fix_scan_expr_mutator((Node *) phv->phexpr, context); + } + if (IsA(node, AlternativeSubPlan)) + return fix_scan_expr_mutator(fix_alternative_subplan(context->root, + (AlternativeSubPlan *) node, + context->num_exec), + context); + fix_expr_common(context->root, node); + return expression_tree_mutator(node, fix_scan_expr_mutator, + (void *) context); +} + +static bool +fix_scan_expr_walker(Node *node, fix_scan_expr_context *context) +{ + if (node == NULL) + return false; + Assert(!(IsA(node, Var) && ((Var *) node)->varno == ROWID_VAR)); + Assert(!IsA(node, PlaceHolderVar)); + Assert(!IsA(node, AlternativeSubPlan)); + fix_expr_common(context->root, node); + return expression_tree_walker(node, fix_scan_expr_walker, + (void *) context); +} + +/* + * set_join_references + * Modify the target list and quals of a join node to reference its + * subplans, by setting the varnos to OUTER_VAR or INNER_VAR and setting + * attno values to the result domain number of either the corresponding + * outer or inner join tuple item. Also perform opcode lookup for these + * expressions, and add regclass OIDs to root->glob->relationOids. + */ +static void +set_join_references(PlannerInfo *root, Join *join, int rtoffset) +{ + Plan *outer_plan = join->plan.lefttree; + Plan *inner_plan = join->plan.righttree; + indexed_tlist *outer_itlist; + indexed_tlist *inner_itlist; + + outer_itlist = build_tlist_index(outer_plan->targetlist); + inner_itlist = build_tlist_index(inner_plan->targetlist); + + /* + * First process the joinquals (including merge or hash clauses). These + * are logically below the join so they can always use all values + * available from the input tlists. It's okay to also handle + * NestLoopParams now, because those couldn't refer to nullable + * subexpressions. + */ + join->joinqual = fix_join_expr(root, + join->joinqual, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset, + NUM_EXEC_QUAL((Plan *) join)); + + /* Now do join-type-specific stuff */ + if (IsA(join, NestLoop)) + { + NestLoop *nl = (NestLoop *) join; + ListCell *lc; + + foreach(lc, nl->nestParams) + { + NestLoopParam *nlp = (NestLoopParam *) lfirst(lc); + + nlp->paramval = (Var *) fix_upper_expr(root, + (Node *) nlp->paramval, + outer_itlist, + OUTER_VAR, + rtoffset, + NUM_EXEC_TLIST(outer_plan)); + /* Check we replaced any PlaceHolderVar with simple Var */ + if (!(IsA(nlp->paramval, Var) && + nlp->paramval->varno == OUTER_VAR)) + elog(ERROR, "NestLoopParam was not reduced to a simple Var"); + } + } + else if (IsA(join, MergeJoin)) + { + MergeJoin *mj = (MergeJoin *) join; + + mj->mergeclauses = fix_join_expr(root, + mj->mergeclauses, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset, + NUM_EXEC_QUAL((Plan *) join)); + } + else if (IsA(join, HashJoin)) + { + HashJoin *hj = (HashJoin *) join; + + hj->hashclauses = fix_join_expr(root, + hj->hashclauses, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset, + NUM_EXEC_QUAL((Plan *) join)); + + /* + * HashJoin's hashkeys are used to look for matching tuples from its + * outer plan (not the Hash node!) in the hashtable. + */ + hj->hashkeys = (List *) fix_upper_expr(root, + (Node *) hj->hashkeys, + outer_itlist, + OUTER_VAR, + rtoffset, + NUM_EXEC_QUAL((Plan *) join)); + } + + /* + * Now we need to fix up the targetlist and qpqual, which are logically + * above the join. This means they should not re-use any input expression + * that was computed in the nullable side of an outer join. Vars and + * PlaceHolderVars are fine, so we can implement this restriction just by + * clearing has_non_vars in the indexed_tlist structs. + * + * XXX This is a grotty workaround for the fact that we don't clearly + * distinguish between a Var appearing below an outer join and the "same" + * Var appearing above it. If we did, we'd not need to hack the matching + * rules this way. + */ + switch (join->jointype) + { + case JOIN_LEFT: + case JOIN_SEMI: + case JOIN_ANTI: + inner_itlist->has_non_vars = false; + break; + case JOIN_RIGHT: + outer_itlist->has_non_vars = false; + break; + case JOIN_FULL: + outer_itlist->has_non_vars = false; + inner_itlist->has_non_vars = false; + break; + default: + break; + } + + join->plan.targetlist = fix_join_expr(root, + join->plan.targetlist, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset, + NUM_EXEC_TLIST((Plan *) join)); + join->plan.qual = fix_join_expr(root, + join->plan.qual, + outer_itlist, + inner_itlist, + (Index) 0, + rtoffset, + NUM_EXEC_QUAL((Plan *) join)); + + pfree(outer_itlist); + pfree(inner_itlist); +} + +/* + * set_upper_references + * Update the targetlist and quals of an upper-level plan node + * to refer to the tuples returned by its lefttree subplan. + * Also perform opcode lookup for these expressions, and + * add regclass OIDs to root->glob->relationOids. + * + * This is used for single-input plan types like Agg, Group, Result. + * + * In most cases, we have to match up individual Vars in the tlist and + * qual expressions with elements of the subplan's tlist (which was + * generated by flattening these selfsame expressions, so it should have all + * the required variables). There is an important exception, however: + * depending on where we are in the plan tree, sort/group columns may have + * been pushed into the subplan tlist unflattened. If these values are also + * needed in the output then we want to reference the subplan tlist element + * rather than recomputing the expression. + */ +static void +set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) +{ + Plan *subplan = plan->lefttree; + indexed_tlist *subplan_itlist; + List *output_targetlist; + ListCell *l; + + subplan_itlist = build_tlist_index(subplan->targetlist); + + output_targetlist = NIL; + foreach(l, plan->targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + Node *newexpr; + + /* If it's a sort/group item, first try to match by sortref */ + if (tle->ressortgroupref != 0) + { + newexpr = (Node *) + search_indexed_tlist_for_sortgroupref(tle->expr, + tle->ressortgroupref, + subplan_itlist, + OUTER_VAR); + if (!newexpr) + newexpr = fix_upper_expr(root, + (Node *) tle->expr, + subplan_itlist, + OUTER_VAR, + rtoffset, + NUM_EXEC_TLIST(plan)); + } + else + newexpr = fix_upper_expr(root, + (Node *) tle->expr, + subplan_itlist, + OUTER_VAR, + rtoffset, + NUM_EXEC_TLIST(plan)); + tle = flatCopyTargetEntry(tle); + tle->expr = (Expr *) newexpr; + output_targetlist = lappend(output_targetlist, tle); + } + plan->targetlist = output_targetlist; + + plan->qual = (List *) + fix_upper_expr(root, + (Node *) plan->qual, + subplan_itlist, + OUTER_VAR, + rtoffset, + NUM_EXEC_QUAL(plan)); + + pfree(subplan_itlist); +} + +/* + * set_param_references + * Initialize the initParam list in Gather or Gather merge node such that + * it contains reference of all the params that needs to be evaluated + * before execution of the node. It contains the initplan params that are + * being passed to the plan nodes below it. + */ +static void +set_param_references(PlannerInfo *root, Plan *plan) +{ + Assert(IsA(plan, Gather) || IsA(plan, GatherMerge)); + + if (plan->lefttree->extParam) + { + PlannerInfo *proot; + Bitmapset *initSetParam = NULL; + ListCell *l; + + for (proot = root; proot != NULL; proot = proot->parent_root) + { + foreach(l, proot->init_plans) + { + SubPlan *initsubplan = (SubPlan *) lfirst(l); + ListCell *l2; + + foreach(l2, initsubplan->setParam) + { + initSetParam = bms_add_member(initSetParam, lfirst_int(l2)); + } + } + } + + /* + * Remember the list of all external initplan params that are used by + * the children of Gather or Gather merge node. + */ + if (IsA(plan, Gather)) + ((Gather *) plan)->initParam = + bms_intersect(plan->lefttree->extParam, initSetParam); + else + ((GatherMerge *) plan)->initParam = + bms_intersect(plan->lefttree->extParam, initSetParam); + } +} + +/* + * Recursively scan an expression tree and convert Aggrefs to the proper + * intermediate form for combining aggregates. This means (1) replacing each + * one's argument list with a single argument that is the original Aggref + * modified to show partial aggregation and (2) changing the upper Aggref to + * show combining aggregation. + * + * After this step, set_upper_references will replace the partial Aggrefs + * with Vars referencing the lower Agg plan node's outputs, so that the final + * form seen by the executor is a combining Aggref with a Var as input. + * + * It's rather messy to postpone this step until setrefs.c; ideally it'd be + * done in createplan.c. The difficulty is that once we modify the Aggref + * expressions, they will no longer be equal() to their original form and + * so cross-plan-node-level matches will fail. So this has to happen after + * the plan node above the Agg has resolved its subplan references. + */ +static Node * +convert_combining_aggrefs(Node *node, void *context) +{ + if (node == NULL) + return NULL; + if (IsA(node, Aggref)) + { + Aggref *orig_agg = (Aggref *) node; + Aggref *child_agg; + Aggref *parent_agg; + + /* Assert we've not chosen to partial-ize any unsupported cases */ + Assert(orig_agg->aggorder == NIL); + Assert(orig_agg->aggdistinct == NIL); + + /* + * Since aggregate calls can't be nested, we needn't recurse into the + * arguments. But for safety, flat-copy the Aggref node itself rather + * than modifying it in-place. + */ + child_agg = makeNode(Aggref); + memcpy(child_agg, orig_agg, sizeof(Aggref)); + + /* + * For the parent Aggref, we want to copy all the fields of the + * original aggregate *except* the args list, which we'll replace + * below, and the aggfilter expression, which should be applied only + * by the child not the parent. Rather than explicitly knowing about + * all the other fields here, we can momentarily modify child_agg to + * provide a suitable source for copyObject. + */ + child_agg->args = NIL; + child_agg->aggfilter = NULL; + parent_agg = copyObject(child_agg); + child_agg->args = orig_agg->args; + child_agg->aggfilter = orig_agg->aggfilter; + + /* + * Now, set up child_agg to represent the first phase of partial + * aggregation. For now, assume serialization is required. + */ + mark_partial_aggref(child_agg, AGGSPLIT_INITIAL_SERIAL); + + /* + * And set up parent_agg to represent the second phase. + */ + parent_agg->args = list_make1(makeTargetEntry((Expr *) child_agg, + 1, NULL, false)); + mark_partial_aggref(parent_agg, AGGSPLIT_FINAL_DESERIAL); + + return (Node *) parent_agg; + } + return expression_tree_mutator(node, convert_combining_aggrefs, + (void *) context); +} + +/* + * set_dummy_tlist_references + * Replace the targetlist of an upper-level plan node with a simple + * list of OUTER_VAR references to its child. + * + * This is used for plan types like Sort and Append that don't evaluate + * their targetlists. Although the executor doesn't care at all what's in + * the tlist, EXPLAIN needs it to be realistic. + * + * Note: we could almost use set_upper_references() here, but it fails for + * Append for lack of a lefttree subplan. Single-purpose code is faster + * anyway. + */ +static void +set_dummy_tlist_references(Plan *plan, int rtoffset) +{ + List *output_targetlist; + ListCell *l; + + output_targetlist = NIL; + foreach(l, plan->targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + Var *oldvar = (Var *) tle->expr; + Var *newvar; + + /* + * As in search_indexed_tlist_for_non_var(), we prefer to keep Consts + * as Consts, not Vars referencing Consts. Here, there's no speed + * advantage to be had, but it makes EXPLAIN output look cleaner, and + * again it avoids confusing the executor. + */ + if (IsA(oldvar, Const)) + { + /* just reuse the existing TLE node */ + output_targetlist = lappend(output_targetlist, tle); + continue; + } + + newvar = makeVar(OUTER_VAR, + tle->resno, + exprType((Node *) oldvar), + exprTypmod((Node *) oldvar), + exprCollation((Node *) oldvar), + 0); + if (IsA(oldvar, Var) && + oldvar->varnosyn > 0) + { + newvar->varnosyn = oldvar->varnosyn + rtoffset; + newvar->varattnosyn = oldvar->varattnosyn; + } + else + { + newvar->varnosyn = 0; /* wasn't ever a plain Var */ + newvar->varattnosyn = 0; + } + + tle = flatCopyTargetEntry(tle); + tle->expr = (Expr *) newvar; + output_targetlist = lappend(output_targetlist, tle); + } + plan->targetlist = output_targetlist; + + /* We don't touch plan->qual here */ +} + + +/* + * build_tlist_index --- build an index data structure for a child tlist + * + * In most cases, subplan tlists will be "flat" tlists with only Vars, + * so we try to optimize that case by extracting information about Vars + * in advance. Matching a parent tlist to a child is still an O(N^2) + * operation, but at least with a much smaller constant factor than plain + * tlist_member() searches. + * + * The result of this function is an indexed_tlist struct to pass to + * search_indexed_tlist_for_var() or search_indexed_tlist_for_non_var(). + * When done, the indexed_tlist may be freed with a single pfree(). + */ +static indexed_tlist * +build_tlist_index(List *tlist) +{ + indexed_tlist *itlist; + tlist_vinfo *vinfo; + ListCell *l; + + /* Create data structure with enough slots for all tlist entries */ + itlist = (indexed_tlist *) + palloc(offsetof(indexed_tlist, vars) + + list_length(tlist) * sizeof(tlist_vinfo)); + + itlist->tlist = tlist; + itlist->has_ph_vars = false; + itlist->has_non_vars = false; + + /* Find the Vars and fill in the index array */ + vinfo = itlist->vars; + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->expr && IsA(tle->expr, Var)) + { + Var *var = (Var *) tle->expr; + + vinfo->varno = var->varno; + vinfo->varattno = var->varattno; + vinfo->resno = tle->resno; + vinfo++; + } + else if (tle->expr && IsA(tle->expr, PlaceHolderVar)) + itlist->has_ph_vars = true; + else + itlist->has_non_vars = true; + } + + itlist->num_vars = (vinfo - itlist->vars); + + return itlist; +} + +/* + * build_tlist_index_other_vars --- build a restricted tlist index + * + * This is like build_tlist_index, but we only index tlist entries that + * are Vars belonging to some rel other than the one specified. We will set + * has_ph_vars (allowing PlaceHolderVars to be matched), but not has_non_vars + * (so nothing other than Vars and PlaceHolderVars can be matched). + */ +static indexed_tlist * +build_tlist_index_other_vars(List *tlist, int ignore_rel) +{ + indexed_tlist *itlist; + tlist_vinfo *vinfo; + ListCell *l; + + /* Create data structure with enough slots for all tlist entries */ + itlist = (indexed_tlist *) + palloc(offsetof(indexed_tlist, vars) + + list_length(tlist) * sizeof(tlist_vinfo)); + + itlist->tlist = tlist; + itlist->has_ph_vars = false; + itlist->has_non_vars = false; + + /* Find the desired Vars and fill in the index array */ + vinfo = itlist->vars; + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->expr && IsA(tle->expr, Var)) + { + Var *var = (Var *) tle->expr; + + if (var->varno != ignore_rel) + { + vinfo->varno = var->varno; + vinfo->varattno = var->varattno; + vinfo->resno = tle->resno; + vinfo++; + } + } + else if (tle->expr && IsA(tle->expr, PlaceHolderVar)) + itlist->has_ph_vars = true; + } + + itlist->num_vars = (vinfo - itlist->vars); + + return itlist; +} + +/* + * search_indexed_tlist_for_var --- find a Var in an indexed tlist + * + * If a match is found, return a copy of the given Var with suitably + * modified varno/varattno (to wit, newvarno and the resno of the TLE entry). + * Also ensure that varnosyn is incremented by rtoffset. + * If no match, return NULL. + */ +static Var * +search_indexed_tlist_for_var(Var *var, indexed_tlist *itlist, + int newvarno, int rtoffset) +{ + int varno = var->varno; + AttrNumber varattno = var->varattno; + tlist_vinfo *vinfo; + int i; + + vinfo = itlist->vars; + i = itlist->num_vars; + while (i-- > 0) + { + if (vinfo->varno == varno && vinfo->varattno == varattno) + { + /* Found a match */ + Var *newvar = copyVar(var); + + newvar->varno = newvarno; + newvar->varattno = vinfo->resno; + if (newvar->varnosyn > 0) + newvar->varnosyn += rtoffset; + return newvar; + } + vinfo++; + } + return NULL; /* no match */ +} + +/* + * search_indexed_tlist_for_non_var --- find a non-Var in an indexed tlist + * + * If a match is found, return a Var constructed to reference the tlist item. + * If no match, return NULL. + * + * NOTE: it is a waste of time to call this unless itlist->has_ph_vars or + * itlist->has_non_vars. Furthermore, set_join_references() relies on being + * able to prevent matching of non-Vars by clearing itlist->has_non_vars, + * so there's a correctness reason not to call it unless that's set. + */ +static Var * +search_indexed_tlist_for_non_var(Expr *node, + indexed_tlist *itlist, int newvarno) +{ + TargetEntry *tle; + + /* + * If it's a simple Const, replacing it with a Var is silly, even if there + * happens to be an identical Const below; a Var is more expensive to + * execute than a Const. What's more, replacing it could confuse some + * places in the executor that expect to see simple Consts for, eg, + * dropped columns. + */ + if (IsA(node, Const)) + return NULL; + + tle = tlist_member(node, itlist->tlist); + if (tle) + { + /* Found a matching subplan output expression */ + Var *newvar; + + newvar = makeVarFromTargetEntry(newvarno, tle); + newvar->varnosyn = 0; /* wasn't ever a plain Var */ + newvar->varattnosyn = 0; + return newvar; + } + return NULL; /* no match */ +} + +/* + * search_indexed_tlist_for_sortgroupref --- find a sort/group expression + * + * If a match is found, return a Var constructed to reference the tlist item. + * If no match, return NULL. + * + * This is needed to ensure that we select the right subplan TLE in cases + * where there are multiple textually-equal()-but-volatile sort expressions. + * And it's also faster than search_indexed_tlist_for_non_var. + */ +static Var * +search_indexed_tlist_for_sortgroupref(Expr *node, + Index sortgroupref, + indexed_tlist *itlist, + int newvarno) +{ + ListCell *lc; + + foreach(lc, itlist->tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + /* The equal() check should be redundant, but let's be paranoid */ + if (tle->ressortgroupref == sortgroupref && + equal(node, tle->expr)) + { + /* Found a matching subplan output expression */ + Var *newvar; + + newvar = makeVarFromTargetEntry(newvarno, tle); + newvar->varnosyn = 0; /* wasn't ever a plain Var */ + newvar->varattnosyn = 0; + return newvar; + } + } + return NULL; /* no match */ +} + +/* + * fix_join_expr + * Create a new set of targetlist entries or join qual clauses by + * changing the varno/varattno values of variables in the clauses + * to reference target list values from the outer and inner join + * relation target lists. Also perform opcode lookup and add + * regclass OIDs to root->glob->relationOids. + * + * This is used in four different scenarios: + * 1) a normal join clause, where all the Vars in the clause *must* be + * replaced by OUTER_VAR or INNER_VAR references. In this case + * acceptable_rel should be zero so that any failure to match a Var will be + * reported as an error. + * 2) RETURNING clauses, which may contain both Vars of the target relation + * and Vars of other relations. In this case we want to replace the + * other-relation Vars by OUTER_VAR references, while leaving target Vars + * alone. Thus inner_itlist = NULL and acceptable_rel = the ID of the + * target relation should be passed. + * 3) ON CONFLICT UPDATE SET/WHERE clauses. Here references to EXCLUDED are + * to be replaced with INNER_VAR references, while leaving target Vars (the + * to-be-updated relation) alone. Correspondingly inner_itlist is to be + * EXCLUDED elements, outer_itlist = NULL and acceptable_rel the target + * relation. + * 4) MERGE. In this case, references to the source relation are to be + * replaced with INNER_VAR references, leaving Vars of the target + * relation (the to-be-modified relation) alone. So inner_itlist is to be + * the source relation elements, outer_itlist = NULL and acceptable_rel + * the target relation. + * + * 'clauses' is the targetlist or list of join clauses + * 'outer_itlist' is the indexed target list of the outer join relation, + * or NULL + * 'inner_itlist' is the indexed target list of the inner join relation, + * or NULL + * 'acceptable_rel' is either zero or the rangetable index of a relation + * whose Vars may appear in the clause without provoking an error + * 'rtoffset': how much to increment varnos by + * 'num_exec': estimated number of executions of expression + * + * Returns the new expression tree. The original clause structure is + * not modified. + */ +static List * +fix_join_expr(PlannerInfo *root, + List *clauses, + indexed_tlist *outer_itlist, + indexed_tlist *inner_itlist, + Index acceptable_rel, + int rtoffset, + double num_exec) +{ + fix_join_expr_context context; + + context.root = root; + context.outer_itlist = outer_itlist; + context.inner_itlist = inner_itlist; + context.acceptable_rel = acceptable_rel; + context.rtoffset = rtoffset; + context.num_exec = num_exec; + return (List *) fix_join_expr_mutator((Node *) clauses, &context); +} + +static Node * +fix_join_expr_mutator(Node *node, fix_join_expr_context *context) +{ + Var *newvar; + + if (node == NULL) + return NULL; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + /* Look for the var in the input tlists, first in the outer */ + if (context->outer_itlist) + { + newvar = search_indexed_tlist_for_var(var, + context->outer_itlist, + OUTER_VAR, + context->rtoffset); + if (newvar) + return (Node *) newvar; + } + + /* then in the inner. */ + if (context->inner_itlist) + { + newvar = search_indexed_tlist_for_var(var, + context->inner_itlist, + INNER_VAR, + context->rtoffset); + if (newvar) + return (Node *) newvar; + } + + /* If it's for acceptable_rel, adjust and return it */ + if (var->varno == context->acceptable_rel) + { + var = copyVar(var); + var->varno += context->rtoffset; + if (var->varnosyn > 0) + var->varnosyn += context->rtoffset; + return (Node *) var; + } + + /* No referent found for Var */ + elog(ERROR, "variable not found in subplan target lists"); + } + if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + /* See if the PlaceHolderVar has bubbled up from a lower plan node */ + if (context->outer_itlist && context->outer_itlist->has_ph_vars) + { + newvar = search_indexed_tlist_for_non_var((Expr *) phv, + context->outer_itlist, + OUTER_VAR); + if (newvar) + return (Node *) newvar; + } + if (context->inner_itlist && context->inner_itlist->has_ph_vars) + { + newvar = search_indexed_tlist_for_non_var((Expr *) phv, + context->inner_itlist, + INNER_VAR); + if (newvar) + return (Node *) newvar; + } + + /* If not supplied by input plans, evaluate the contained expr */ + return fix_join_expr_mutator((Node *) phv->phexpr, context); + } + /* Try matching more complex expressions too, if tlists have any */ + if (context->outer_itlist && context->outer_itlist->has_non_vars) + { + newvar = search_indexed_tlist_for_non_var((Expr *) node, + context->outer_itlist, + OUTER_VAR); + if (newvar) + return (Node *) newvar; + } + if (context->inner_itlist && context->inner_itlist->has_non_vars) + { + newvar = search_indexed_tlist_for_non_var((Expr *) node, + context->inner_itlist, + INNER_VAR); + if (newvar) + return (Node *) newvar; + } + /* Special cases (apply only AFTER failing to match to lower tlist) */ + if (IsA(node, Param)) + return fix_param_node(context->root, (Param *) node); + if (IsA(node, AlternativeSubPlan)) + return fix_join_expr_mutator(fix_alternative_subplan(context->root, + (AlternativeSubPlan *) node, + context->num_exec), + context); + fix_expr_common(context->root, node); + return expression_tree_mutator(node, + fix_join_expr_mutator, + (void *) context); +} + +/* + * fix_upper_expr + * Modifies an expression tree so that all Var nodes reference outputs + * of a subplan. Also looks for Aggref nodes that should be replaced + * by initplan output Params. Also performs opcode lookup, and adds + * regclass OIDs to root->glob->relationOids. + * + * This is used to fix up target and qual expressions of non-join upper-level + * plan nodes, as well as index-only scan nodes. + * + * An error is raised if no matching var can be found in the subplan tlist + * --- so this routine should only be applied to nodes whose subplans' + * targetlists were generated by flattening the expressions used in the + * parent node. + * + * If itlist->has_non_vars is true, then we try to match whole subexpressions + * against elements of the subplan tlist, so that we can avoid recomputing + * expressions that were already computed by the subplan. (This is relatively + * expensive, so we don't want to try it in the common case where the + * subplan tlist is just a flattened list of Vars.) + * + * 'node': the tree to be fixed (a target item or qual) + * 'subplan_itlist': indexed target list for subplan (or index) + * 'newvarno': varno to use for Vars referencing tlist elements + * 'rtoffset': how much to increment varnos by + * 'num_exec': estimated number of executions of expression + * + * The resulting tree is a copy of the original in which all Var nodes have + * varno = newvarno, varattno = resno of corresponding targetlist element. + * The original tree is not modified. + */ +static Node * +fix_upper_expr(PlannerInfo *root, + Node *node, + indexed_tlist *subplan_itlist, + int newvarno, + int rtoffset, + double num_exec) +{ + fix_upper_expr_context context; + + context.root = root; + context.subplan_itlist = subplan_itlist; + context.newvarno = newvarno; + context.rtoffset = rtoffset; + context.num_exec = num_exec; + return fix_upper_expr_mutator(node, &context); +} + +static Node * +fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) +{ + Var *newvar; + + if (node == NULL) + return NULL; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + newvar = search_indexed_tlist_for_var(var, + context->subplan_itlist, + context->newvarno, + context->rtoffset); + if (!newvar) + elog(ERROR, "variable not found in subplan target list"); + return (Node *) newvar; + } + if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + /* See if the PlaceHolderVar has bubbled up from a lower plan node */ + if (context->subplan_itlist->has_ph_vars) + { + newvar = search_indexed_tlist_for_non_var((Expr *) phv, + context->subplan_itlist, + context->newvarno); + if (newvar) + return (Node *) newvar; + } + /* If not supplied by input plan, evaluate the contained expr */ + return fix_upper_expr_mutator((Node *) phv->phexpr, context); + } + /* Try matching more complex expressions too, if tlist has any */ + if (context->subplan_itlist->has_non_vars) + { + newvar = search_indexed_tlist_for_non_var((Expr *) node, + context->subplan_itlist, + context->newvarno); + if (newvar) + return (Node *) newvar; + } + /* Special cases (apply only AFTER failing to match to lower tlist) */ + if (IsA(node, Param)) + return fix_param_node(context->root, (Param *) node); + if (IsA(node, Aggref)) + { + Aggref *aggref = (Aggref *) node; + + /* See if the Aggref should be replaced by a Param */ + if (context->root->minmax_aggs != NIL && + list_length(aggref->args) == 1) + { + TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args); + ListCell *lc; + + foreach(lc, context->root->minmax_aggs) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + + if (mminfo->aggfnoid == aggref->aggfnoid && + equal(mminfo->target, curTarget->expr)) + return (Node *) copyObject(mminfo->param); + } + } + /* If no match, just fall through to process it normally */ + } + if (IsA(node, AlternativeSubPlan)) + return fix_upper_expr_mutator(fix_alternative_subplan(context->root, + (AlternativeSubPlan *) node, + context->num_exec), + context); + fix_expr_common(context->root, node); + return expression_tree_mutator(node, + fix_upper_expr_mutator, + (void *) context); +} + +/* + * set_returning_clause_references + * Perform setrefs.c's work on a RETURNING targetlist + * + * If the query involves more than just the result table, we have to + * adjust any Vars that refer to other tables to reference junk tlist + * entries in the top subplan's targetlist. Vars referencing the result + * table should be left alone, however (the executor will evaluate them + * using the actual heap tuple, after firing triggers if any). In the + * adjusted RETURNING list, result-table Vars will have their original + * varno (plus rtoffset), but Vars for other rels will have varno OUTER_VAR. + * + * We also must perform opcode lookup and add regclass OIDs to + * root->glob->relationOids. + * + * 'rlist': the RETURNING targetlist to be fixed + * 'topplan': the top subplan node that will be just below the ModifyTable + * node (note it's not yet passed through set_plan_refs) + * 'resultRelation': RT index of the associated result relation + * 'rtoffset': how much to increment varnos by + * + * Note: the given 'root' is for the parent query level, not the 'topplan'. + * This does not matter currently since we only access the dependency-item + * lists in root->glob, but it would need some hacking if we wanted a root + * that actually matches the subplan. + * + * Note: resultRelation is not yet adjusted by rtoffset. + */ +static List * +set_returning_clause_references(PlannerInfo *root, + List *rlist, + Plan *topplan, + Index resultRelation, + int rtoffset) +{ + indexed_tlist *itlist; + + /* + * We can perform the desired Var fixup by abusing the fix_join_expr + * machinery that formerly handled inner indexscan fixup. We search the + * top plan's targetlist for Vars of non-result relations, and use + * fix_join_expr to convert RETURNING Vars into references to those tlist + * entries, while leaving result-rel Vars as-is. + * + * PlaceHolderVars will also be sought in the targetlist, but no + * more-complex expressions will be. Note that it is not possible for a + * PlaceHolderVar to refer to the result relation, since the result is + * never below an outer join. If that case could happen, we'd have to be + * prepared to pick apart the PlaceHolderVar and evaluate its contained + * expression instead. + */ + itlist = build_tlist_index_other_vars(topplan->targetlist, resultRelation); + + rlist = fix_join_expr(root, + rlist, + itlist, + NULL, + resultRelation, + rtoffset, + NUM_EXEC_TLIST(topplan)); + + pfree(itlist); + + return rlist; +} + +/* + * fix_windowagg_condition_expr_mutator + * Mutator function for replacing WindowFuncs with the corresponding Var + * in the targetlist which references that WindowFunc. + */ +static Node * +fix_windowagg_condition_expr_mutator(Node *node, + fix_windowagg_cond_context *context) +{ + if (node == NULL) + return NULL; + + if (IsA(node, WindowFunc)) + { + Var *newvar; + + newvar = search_indexed_tlist_for_non_var((Expr *) node, + context->subplan_itlist, + context->newvarno); + if (newvar) + return (Node *) newvar; + elog(ERROR, "WindowFunc not found in subplan target lists"); + } + + return expression_tree_mutator(node, + fix_windowagg_condition_expr_mutator, + (void *) context); +} + +/* + * fix_windowagg_condition_expr + * Converts references in 'runcondition' so that any WindowFunc + * references are swapped out for a Var which references the matching + * WindowFunc in 'subplan_itlist'. + */ +static List * +fix_windowagg_condition_expr(PlannerInfo *root, + List *runcondition, + indexed_tlist *subplan_itlist) +{ + fix_windowagg_cond_context context; + + context.root = root; + context.subplan_itlist = subplan_itlist; + context.newvarno = 0; + + return (List *) fix_windowagg_condition_expr_mutator((Node *) runcondition, + &context); +} + +/* + * set_windowagg_runcondition_references + * Converts references in 'runcondition' so that any WindowFunc + * references are swapped out for a Var which references the matching + * WindowFunc in 'plan' targetlist. + */ +static List * +set_windowagg_runcondition_references(PlannerInfo *root, + List *runcondition, + Plan *plan) +{ + List *newlist; + indexed_tlist *itlist; + + itlist = build_tlist_index(plan->targetlist); + + newlist = fix_windowagg_condition_expr(root, runcondition, itlist); + + pfree(itlist); + + return newlist; +} + +/***************************************************************************** + * QUERY DEPENDENCY MANAGEMENT + *****************************************************************************/ + +/* + * record_plan_function_dependency + * Mark the current plan as depending on a particular function. + * + * This is exported so that the function-inlining code can record a + * dependency on a function that it's removed from the plan tree. + */ +void +record_plan_function_dependency(PlannerInfo *root, Oid funcid) +{ + /* + * For performance reasons, we don't bother to track built-in functions; + * we just assume they'll never change (or at least not in ways that'd + * invalidate plans using them). For this purpose we can consider a + * built-in function to be one with OID less than FirstUnpinnedObjectId. + * Note that the OID generator guarantees never to generate such an OID + * after startup, even at OID wraparound. + */ + if (funcid >= (Oid) FirstUnpinnedObjectId) + { + PlanInvalItem *inval_item = makeNode(PlanInvalItem); + + /* + * It would work to use any syscache on pg_proc, but the easiest is + * PROCOID since we already have the function's OID at hand. Note + * that plancache.c knows we use PROCOID. + */ + inval_item->cacheId = PROCOID; + inval_item->hashValue = GetSysCacheHashValue1(PROCOID, + ObjectIdGetDatum(funcid)); + + root->glob->invalItems = lappend(root->glob->invalItems, inval_item); + } +} + +/* + * record_plan_type_dependency + * Mark the current plan as depending on a particular type. + * + * This is exported so that eval_const_expressions can record a + * dependency on a domain that it's removed a CoerceToDomain node for. + * + * We don't currently need to record dependencies on domains that the + * plan contains CoerceToDomain nodes for, though that might change in + * future. Hence, this isn't actually called in this module, though + * someday fix_expr_common might call it. + */ +void +record_plan_type_dependency(PlannerInfo *root, Oid typid) +{ + /* + * As in record_plan_function_dependency, ignore the possibility that + * someone would change a built-in domain. + */ + if (typid >= (Oid) FirstUnpinnedObjectId) + { + PlanInvalItem *inval_item = makeNode(PlanInvalItem); + + /* + * It would work to use any syscache on pg_type, but the easiest is + * TYPEOID since we already have the type's OID at hand. Note that + * plancache.c knows we use TYPEOID. + */ + inval_item->cacheId = TYPEOID; + inval_item->hashValue = GetSysCacheHashValue1(TYPEOID, + ObjectIdGetDatum(typid)); + + root->glob->invalItems = lappend(root->glob->invalItems, inval_item); + } +} + +/* + * extract_query_dependencies + * Given a rewritten, but not yet planned, query or queries + * (i.e. a Query node or list of Query nodes), extract dependencies + * just as set_plan_references would do. Also detect whether any + * rewrite steps were affected by RLS. + * + * This is needed by plancache.c to handle invalidation of cached unplanned + * queries. + * + * Note: this does not go through eval_const_expressions, and hence doesn't + * reflect its additions of inlined functions and elided CoerceToDomain nodes + * to the invalItems list. This is obviously OK for functions, since we'll + * see them in the original query tree anyway. For domains, it's OK because + * we don't care about domains unless they get elided. That is, a plan might + * have domain dependencies that the query tree doesn't. + */ +void +extract_query_dependencies(Node *query, + List **relationOids, + List **invalItems, + bool *hasRowSecurity) +{ + PlannerGlobal glob; + PlannerInfo root; + + /* Make up dummy planner state so we can use this module's machinery */ + MemSet(&glob, 0, sizeof(glob)); + glob.type = T_PlannerGlobal; + glob.relationOids = NIL; + glob.invalItems = NIL; + /* Hack: we use glob.dependsOnRole to collect hasRowSecurity flags */ + glob.dependsOnRole = false; + + MemSet(&root, 0, sizeof(root)); + root.type = T_PlannerInfo; + root.glob = &glob; + + (void) extract_query_dependencies_walker(query, &root); + + *relationOids = glob.relationOids; + *invalItems = glob.invalItems; + *hasRowSecurity = glob.dependsOnRole; +} + +/* + * Tree walker for extract_query_dependencies. + * + * This is exported so that expression_planner_with_deps can call it on + * simple expressions (post-planning, not before planning, in that case). + * In that usage, glob.dependsOnRole isn't meaningful, but the relationOids + * and invalItems lists are added to as needed. + */ +bool +extract_query_dependencies_walker(Node *node, PlannerInfo *context) +{ + if (node == NULL) + return false; + Assert(!IsA(node, PlaceHolderVar)); + if (IsA(node, Query)) + { + Query *query = (Query *) node; + ListCell *lc; + + if (query->commandType == CMD_UTILITY) + { + /* + * This logic must handle any utility command for which parse + * analysis was nontrivial (cf. stmt_requires_parse_analysis). + * + * Notably, CALL requires its own processing. + */ + if (IsA(query->utilityStmt, CallStmt)) + { + CallStmt *callstmt = (CallStmt *) query->utilityStmt; + + /* We need not examine funccall, just the transformed exprs */ + (void) extract_query_dependencies_walker((Node *) callstmt->funcexpr, + context); + (void) extract_query_dependencies_walker((Node *) callstmt->outargs, + context); + return false; + } + + /* + * Ignore other utility statements, except those (such as EXPLAIN) + * that contain a parsed-but-not-planned query. For those, we + * just need to transfer our attention to the contained query. + */ + query = UtilityContainsQuery(query->utilityStmt); + if (query == NULL) + return false; + } + + /* Remember if any Query has RLS quals applied by rewriter */ + if (query->hasRowSecurity) + context->glob->dependsOnRole = true; + + /* Collect relation OIDs in this Query's rtable */ + foreach(lc, query->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + if (rte->rtekind == RTE_RELATION) + context->glob->relationOids = + lappend_oid(context->glob->relationOids, rte->relid); + else if (rte->rtekind == RTE_NAMEDTUPLESTORE && + OidIsValid(rte->relid)) + context->glob->relationOids = + lappend_oid(context->glob->relationOids, + rte->relid); + } + + /* And recurse into the query's subexpressions */ + return query_tree_walker(query, extract_query_dependencies_walker, + (void *) context, 0); + } + /* Extract function dependencies and check for regclass Consts */ + fix_expr_common(context, node); + return expression_tree_walker(node, extract_query_dependencies_walker, + (void *) context); +} diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c new file mode 100644 index 0000000..a195788 --- /dev/null +++ b/src/backend/optimizer/plan/subselect.c @@ -0,0 +1,2999 @@ +/*------------------------------------------------------------------------- + * + * subselect.c + * Planning routines for subselects. + * + * This module deals with SubLinks and CTEs, but not subquery RTEs (i.e., + * not sub-SELECT-in-FROM cases). + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/optimizer/plan/subselect.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/paramassign.h" +#include "optimizer/pathnode.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/prep.h" +#include "optimizer/subselect.h" +#include "parser/parse_relation.h" +#include "rewrite/rewriteManip.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + + +typedef struct convert_testexpr_context +{ + PlannerInfo *root; + List *subst_nodes; /* Nodes to substitute for Params */ +} convert_testexpr_context; + +typedef struct process_sublinks_context +{ + PlannerInfo *root; + bool isTopQual; +} process_sublinks_context; + +typedef struct finalize_primnode_context +{ + PlannerInfo *root; + Bitmapset *paramids; /* Non-local PARAM_EXEC paramids found */ +} finalize_primnode_context; + +typedef struct inline_cte_walker_context +{ + const char *ctename; /* name and relative level of target CTE */ + int levelsup; + Query *ctequery; /* query to substitute */ +} inline_cte_walker_context; + + +static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, + List *plan_params, + SubLinkType subLinkType, int subLinkId, + Node *testexpr, List *testexpr_paramids, + bool unknownEqFalse); +static List *generate_subquery_params(PlannerInfo *root, List *tlist, + List **paramIds); +static List *generate_subquery_vars(PlannerInfo *root, List *tlist, + Index varno); +static Node *convert_testexpr(PlannerInfo *root, + Node *testexpr, + List *subst_nodes); +static Node *convert_testexpr_mutator(Node *node, + convert_testexpr_context *context); +static bool subplan_is_hashable(Plan *plan); +static bool subpath_is_hashable(Path *path); +static bool testexpr_is_hashable(Node *testexpr, List *param_ids); +static bool test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids); +static bool hash_ok_operator(OpExpr *expr); +static bool contain_dml(Node *node); +static bool contain_dml_walker(Node *node, void *context); +static bool contain_outer_selfref(Node *node); +static bool contain_outer_selfref_walker(Node *node, Index *depth); +static void inline_cte(PlannerInfo *root, CommonTableExpr *cte); +static bool inline_cte_walker(Node *node, inline_cte_walker_context *context); +static bool simplify_EXISTS_query(PlannerInfo *root, Query *query); +static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, + Node **testexpr, List **paramIds); +static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root); +static Node *process_sublinks_mutator(Node *node, + process_sublinks_context *context); +static Bitmapset *finalize_plan(PlannerInfo *root, + Plan *plan, + int gather_param, + Bitmapset *valid_params, + Bitmapset *scan_params); +static bool finalize_primnode(Node *node, finalize_primnode_context *context); +static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context); + + +/* + * Get the datatype/typmod/collation of the first column of the plan's output. + * + * This information is stored for ARRAY_SUBLINK execution and for + * exprType()/exprTypmod()/exprCollation(), which have no way to get at the + * plan associated with a SubPlan node. We really only need the info for + * EXPR_SUBLINK and ARRAY_SUBLINK subplans, but for consistency we save it + * always. + */ +static void +get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod, + Oid *colcollation) +{ + /* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */ + if (plan->targetlist) + { + TargetEntry *tent = linitial_node(TargetEntry, plan->targetlist); + + if (!tent->resjunk) + { + *coltype = exprType((Node *) tent->expr); + *coltypmod = exprTypmod((Node *) tent->expr); + *colcollation = exprCollation((Node *) tent->expr); + return; + } + } + *coltype = VOIDOID; + *coltypmod = -1; + *colcollation = InvalidOid; +} + +/* + * Convert a SubLink (as created by the parser) into a SubPlan. + * + * We are given the SubLink's contained query, type, ID, and testexpr. We are + * also told if this expression appears at top level of a WHERE/HAVING qual. + * + * Note: we assume that the testexpr has been AND/OR flattened (actually, + * it's been through eval_const_expressions), but not converted to + * implicit-AND form; and any SubLinks in it should already have been + * converted to SubPlans. The subquery is as yet untouched, however. + * + * The result is whatever we need to substitute in place of the SubLink node + * in the executable expression. If we're going to do the subplan as a + * regular subplan, this will be the constructed SubPlan node. If we're going + * to do the subplan as an InitPlan, the SubPlan node instead goes into + * root->init_plans, and what we return here is an expression tree + * representing the InitPlan's result: usually just a Param node representing + * a single scalar result, but possibly a row comparison tree containing + * multiple Param nodes, or for a MULTIEXPR subquery a simple NULL constant + * (since the real output Params are elsewhere in the tree, and the MULTIEXPR + * subquery itself is in a resjunk tlist entry whose value is uninteresting). + */ +static Node * +make_subplan(PlannerInfo *root, Query *orig_subquery, + SubLinkType subLinkType, int subLinkId, + Node *testexpr, bool isTopQual) +{ + Query *subquery; + bool simple_exists = false; + double tuple_fraction; + PlannerInfo *subroot; + RelOptInfo *final_rel; + Path *best_path; + Plan *plan; + List *plan_params; + Node *result; + + /* + * Copy the source Query node. This is a quick and dirty kluge to resolve + * the fact that the parser can generate trees with multiple links to the + * same sub-Query node, but the planner wants to scribble on the Query. + * Try to clean this up when we do querytree redesign... + */ + subquery = copyObject(orig_subquery); + + /* + * If it's an EXISTS subplan, we might be able to simplify it. + */ + if (subLinkType == EXISTS_SUBLINK) + simple_exists = simplify_EXISTS_query(root, subquery); + + /* + * For an EXISTS subplan, tell lower-level planner to expect that only the + * first tuple will be retrieved. For ALL and ANY subplans, we will be + * able to stop evaluating if the test condition fails or matches, so very + * often not all the tuples will be retrieved; for lack of a better idea, + * specify 50% retrieval. For EXPR, MULTIEXPR, and ROWCOMPARE subplans, + * use default behavior (we're only expecting one row out, anyway). + * + * NOTE: if you change these numbers, also change cost_subplan() in + * path/costsize.c. + * + * XXX If an ANY subplan is uncorrelated, build_subplan may decide to hash + * its output. In that case it would've been better to specify full + * retrieval. At present, however, we can only check hashability after + * we've made the subplan :-(. (Determining whether it'll fit in hash_mem + * is the really hard part.) Therefore, we don't want to be too + * optimistic about the percentage of tuples retrieved, for fear of + * selecting a plan that's bad for the materialization case. + */ + if (subLinkType == EXISTS_SUBLINK) + tuple_fraction = 1.0; /* just like a LIMIT 1 */ + else if (subLinkType == ALL_SUBLINK || + subLinkType == ANY_SUBLINK) + tuple_fraction = 0.5; /* 50% */ + else + tuple_fraction = 0.0; /* default behavior */ + + /* plan_params should not be in use in current query level */ + Assert(root->plan_params == NIL); + + /* Generate Paths for the subquery */ + subroot = subquery_planner(root->glob, subquery, + root, + false, tuple_fraction); + + /* Isolate the params needed by this specific subplan */ + plan_params = root->plan_params; + root->plan_params = NIL; + + /* + * Select best Path and turn it into a Plan. At least for now, there + * seems no reason to postpone doing that. + */ + final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + best_path = get_cheapest_fractional_path(final_rel, tuple_fraction); + + plan = create_plan(subroot, best_path); + + /* And convert to SubPlan or InitPlan format. */ + result = build_subplan(root, plan, subroot, plan_params, + subLinkType, subLinkId, + testexpr, NIL, isTopQual); + + /* + * If it's a correlated EXISTS with an unimportant targetlist, we might be + * able to transform it to the equivalent of an IN and then implement it + * by hashing. We don't have enough information yet to tell which way is + * likely to be better (it depends on the expected number of executions of + * the EXISTS qual, and we are much too early in planning the outer query + * to be able to guess that). So we generate both plans, if possible, and + * leave it to setrefs.c to decide which to use. + */ + if (simple_exists && IsA(result, SubPlan)) + { + Node *newtestexpr; + List *paramIds; + + /* Make a second copy of the original subquery */ + subquery = copyObject(orig_subquery); + /* and re-simplify */ + simple_exists = simplify_EXISTS_query(root, subquery); + Assert(simple_exists); + /* See if it can be converted to an ANY query */ + subquery = convert_EXISTS_to_ANY(root, subquery, + &newtestexpr, ¶mIds); + if (subquery) + { + /* Generate Paths for the ANY subquery; we'll need all rows */ + subroot = subquery_planner(root->glob, subquery, + root, + false, 0.0); + + /* Isolate the params needed by this specific subplan */ + plan_params = root->plan_params; + root->plan_params = NIL; + + /* Select best Path */ + final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + best_path = final_rel->cheapest_total_path; + + /* Now we can check if it'll fit in hash_mem */ + if (subpath_is_hashable(best_path)) + { + SubPlan *hashplan; + AlternativeSubPlan *asplan; + + /* OK, finish planning the ANY subquery */ + plan = create_plan(subroot, best_path); + + /* ... and convert to SubPlan format */ + hashplan = castNode(SubPlan, + build_subplan(root, plan, subroot, + plan_params, + ANY_SUBLINK, 0, + newtestexpr, + paramIds, + true)); + /* Check we got what we expected */ + Assert(hashplan->parParam == NIL); + Assert(hashplan->useHashTable); + + /* Leave it to setrefs.c to decide which plan to use */ + asplan = makeNode(AlternativeSubPlan); + asplan->subplans = list_make2(result, hashplan); + result = (Node *) asplan; + root->hasAlternativeSubPlans = true; + } + } + } + + return result; +} + +/* + * Build a SubPlan node given the raw inputs --- subroutine for make_subplan + * + * Returns either the SubPlan, or a replacement expression if we decide to + * make it an InitPlan, as explained in the comments for make_subplan. + */ +static Node * +build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, + List *plan_params, + SubLinkType subLinkType, int subLinkId, + Node *testexpr, List *testexpr_paramids, + bool unknownEqFalse) +{ + Node *result; + SubPlan *splan; + bool isInitPlan; + ListCell *lc; + + /* + * Initialize the SubPlan node. Note plan_id, plan_name, and cost fields + * are set further down. + */ + splan = makeNode(SubPlan); + splan->subLinkType = subLinkType; + splan->testexpr = NULL; + splan->paramIds = NIL; + get_first_col_type(plan, &splan->firstColType, &splan->firstColTypmod, + &splan->firstColCollation); + splan->useHashTable = false; + splan->unknownEqFalse = unknownEqFalse; + splan->parallel_safe = plan->parallel_safe; + splan->setParam = NIL; + splan->parParam = NIL; + splan->args = NIL; + + /* + * Make parParam and args lists of param IDs and expressions that current + * query level will pass to this child plan. + */ + foreach(lc, plan_params) + { + PlannerParamItem *pitem = (PlannerParamItem *) lfirst(lc); + Node *arg = pitem->item; + + /* + * The Var, PlaceHolderVar, Aggref or GroupingFunc has already been + * adjusted to have the correct varlevelsup, phlevelsup, or + * agglevelsup. + * + * If it's a PlaceHolderVar, Aggref or GroupingFunc, its arguments + * might contain SubLinks, which have not yet been processed (see the + * comments for SS_replace_correlation_vars). Do that now. + */ + if (IsA(arg, PlaceHolderVar) || + IsA(arg, Aggref) || + IsA(arg, GroupingFunc)) + arg = SS_process_sublinks(root, arg, false); + + splan->parParam = lappend_int(splan->parParam, pitem->paramId); + splan->args = lappend(splan->args, arg); + } + + /* + * Un-correlated or undirect correlated plans of EXISTS, EXPR, ARRAY, + * ROWCOMPARE, or MULTIEXPR types can be used as initPlans. For EXISTS, + * EXPR, or ARRAY, we return a Param referring to the result of evaluating + * the initPlan. For ROWCOMPARE, we must modify the testexpr tree to + * contain PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted + * by the parser, and then return that tree. For MULTIEXPR, we return a + * null constant: the resjunk targetlist item containing the SubLink does + * not need to return anything useful, since the referencing Params are + * elsewhere. + */ + if (splan->parParam == NIL && subLinkType == EXISTS_SUBLINK) + { + Param *prm; + + Assert(testexpr == NULL); + prm = generate_new_exec_param(root, BOOLOID, -1, InvalidOid); + splan->setParam = list_make1_int(prm->paramid); + isInitPlan = true; + result = (Node *) prm; + } + else if (splan->parParam == NIL && subLinkType == EXPR_SUBLINK) + { + TargetEntry *te = linitial(plan->targetlist); + Param *prm; + + Assert(!te->resjunk); + Assert(testexpr == NULL); + prm = generate_new_exec_param(root, + exprType((Node *) te->expr), + exprTypmod((Node *) te->expr), + exprCollation((Node *) te->expr)); + splan->setParam = list_make1_int(prm->paramid); + isInitPlan = true; + result = (Node *) prm; + } + else if (splan->parParam == NIL && subLinkType == ARRAY_SUBLINK) + { + TargetEntry *te = linitial(plan->targetlist); + Oid arraytype; + Param *prm; + + Assert(!te->resjunk); + Assert(testexpr == NULL); + arraytype = get_promoted_array_type(exprType((Node *) te->expr)); + if (!OidIsValid(arraytype)) + elog(ERROR, "could not find array type for datatype %s", + format_type_be(exprType((Node *) te->expr))); + prm = generate_new_exec_param(root, + arraytype, + exprTypmod((Node *) te->expr), + exprCollation((Node *) te->expr)); + splan->setParam = list_make1_int(prm->paramid); + isInitPlan = true; + result = (Node *) prm; + } + else if (splan->parParam == NIL && subLinkType == ROWCOMPARE_SUBLINK) + { + /* Adjust the Params */ + List *params; + + Assert(testexpr != NULL); + params = generate_subquery_params(root, + plan->targetlist, + &splan->paramIds); + result = convert_testexpr(root, + testexpr, + params); + splan->setParam = list_copy(splan->paramIds); + isInitPlan = true; + + /* + * The executable expression is returned to become part of the outer + * plan's expression tree; it is not kept in the initplan node. + */ + } + else if (subLinkType == MULTIEXPR_SUBLINK) + { + /* + * Whether it's an initplan or not, it needs to set a PARAM_EXEC Param + * for each output column. + */ + List *params; + + Assert(testexpr == NULL); + params = generate_subquery_params(root, + plan->targetlist, + &splan->setParam); + + /* + * Save the list of replacement Params in the n'th cell of + * root->multiexpr_params; setrefs.c will use it to replace + * PARAM_MULTIEXPR Params. + */ + while (list_length(root->multiexpr_params) < subLinkId) + root->multiexpr_params = lappend(root->multiexpr_params, NIL); + lc = list_nth_cell(root->multiexpr_params, subLinkId - 1); + Assert(lfirst(lc) == NIL); + lfirst(lc) = params; + + /* It can be an initplan if there are no parParams. */ + if (splan->parParam == NIL) + { + isInitPlan = true; + result = (Node *) makeNullConst(RECORDOID, -1, InvalidOid); + } + else + { + isInitPlan = false; + result = (Node *) splan; + } + } + else + { + /* + * Adjust the Params in the testexpr, unless caller already took care + * of it (as indicated by passing a list of Param IDs). + */ + if (testexpr && testexpr_paramids == NIL) + { + List *params; + + params = generate_subquery_params(root, + plan->targetlist, + &splan->paramIds); + splan->testexpr = convert_testexpr(root, + testexpr, + params); + } + else + { + splan->testexpr = testexpr; + splan->paramIds = testexpr_paramids; + } + + /* + * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to + * initPlans, even when they are uncorrelated or undirect correlated, + * because we need to scan the output of the subplan for each outer + * tuple. But if it's a not-direct-correlated IN (= ANY) test, we + * might be able to use a hashtable to avoid comparing all the tuples. + */ + if (subLinkType == ANY_SUBLINK && + splan->parParam == NIL && + subplan_is_hashable(plan) && + testexpr_is_hashable(splan->testexpr, splan->paramIds)) + splan->useHashTable = true; + + /* + * Otherwise, we have the option to tack a Material node onto the top + * of the subplan, to reduce the cost of reading it repeatedly. This + * is pointless for a direct-correlated subplan, since we'd have to + * recompute its results each time anyway. For uncorrelated/undirect + * correlated subplans, we add Material unless the subplan's top plan + * node would materialize its output anyway. Also, if enable_material + * is false, then the user does not want us to materialize anything + * unnecessarily, so we don't. + */ + else if (splan->parParam == NIL && enable_material && + !ExecMaterializesOutput(nodeTag(plan))) + plan = materialize_finished_plan(plan); + + result = (Node *) splan; + isInitPlan = false; + } + + /* + * Add the subplan and its PlannerInfo to the global lists. + */ + root->glob->subplans = lappend(root->glob->subplans, plan); + root->glob->subroots = lappend(root->glob->subroots, subroot); + splan->plan_id = list_length(root->glob->subplans); + + if (isInitPlan) + root->init_plans = lappend(root->init_plans, splan); + + /* + * A parameterless subplan (not initplan) should be prepared to handle + * REWIND efficiently. If it has direct parameters then there's no point + * since it'll be reset on each scan anyway; and if it's an initplan then + * there's no point since it won't get re-run without parameter changes + * anyway. The input of a hashed subplan doesn't need REWIND either. + */ + if (splan->parParam == NIL && !isInitPlan && !splan->useHashTable) + root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs, + splan->plan_id); + + /* Label the subplan for EXPLAIN purposes */ + splan->plan_name = palloc(32 + 12 * list_length(splan->setParam)); + sprintf(splan->plan_name, "%s %d", + isInitPlan ? "InitPlan" : "SubPlan", + splan->plan_id); + if (splan->setParam) + { + char *ptr = splan->plan_name + strlen(splan->plan_name); + + ptr += sprintf(ptr, " (returns "); + foreach(lc, splan->setParam) + { + ptr += sprintf(ptr, "$%d%s", + lfirst_int(lc), + lnext(splan->setParam, lc) ? "," : ")"); + } + } + + /* Lastly, fill in the cost estimates for use later */ + cost_subplan(root, splan, plan); + + return result; +} + +/* + * generate_subquery_params: build a list of Params representing the output + * columns of a sublink's sub-select, given the sub-select's targetlist. + * + * We also return an integer list of the paramids of the Params. + */ +static List * +generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds) +{ + List *result; + List *ids; + ListCell *lc; + + result = ids = NIL; + foreach(lc, tlist) + { + TargetEntry *tent = (TargetEntry *) lfirst(lc); + Param *param; + + if (tent->resjunk) + continue; + + param = generate_new_exec_param(root, + exprType((Node *) tent->expr), + exprTypmod((Node *) tent->expr), + exprCollation((Node *) tent->expr)); + result = lappend(result, param); + ids = lappend_int(ids, param->paramid); + } + + *paramIds = ids; + return result; +} + +/* + * generate_subquery_vars: build a list of Vars representing the output + * columns of a sublink's sub-select, given the sub-select's targetlist. + * The Vars have the specified varno (RTE index). + */ +static List * +generate_subquery_vars(PlannerInfo *root, List *tlist, Index varno) +{ + List *result; + ListCell *lc; + + result = NIL; + foreach(lc, tlist) + { + TargetEntry *tent = (TargetEntry *) lfirst(lc); + Var *var; + + if (tent->resjunk) + continue; + + var = makeVarFromTargetEntry(varno, tent); + result = lappend(result, var); + } + + return result; +} + +/* + * convert_testexpr: convert the testexpr given by the parser into + * actually executable form. This entails replacing PARAM_SUBLINK Params + * with Params or Vars representing the results of the sub-select. The + * nodes to be substituted are passed in as the List result from + * generate_subquery_params or generate_subquery_vars. + */ +static Node * +convert_testexpr(PlannerInfo *root, + Node *testexpr, + List *subst_nodes) +{ + convert_testexpr_context context; + + context.root = root; + context.subst_nodes = subst_nodes; + return convert_testexpr_mutator(testexpr, &context); +} + +static Node * +convert_testexpr_mutator(Node *node, + convert_testexpr_context *context) +{ + if (node == NULL) + return NULL; + if (IsA(node, Param)) + { + Param *param = (Param *) node; + + if (param->paramkind == PARAM_SUBLINK) + { + if (param->paramid <= 0 || + param->paramid > list_length(context->subst_nodes)) + elog(ERROR, "unexpected PARAM_SUBLINK ID: %d", param->paramid); + + /* + * We copy the list item to avoid having doubly-linked + * substructure in the modified parse tree. This is probably + * unnecessary when it's a Param, but be safe. + */ + return (Node *) copyObject(list_nth(context->subst_nodes, + param->paramid - 1)); + } + } + if (IsA(node, SubLink)) + { + /* + * If we come across a nested SubLink, it is neither necessary nor + * correct to recurse into it: any PARAM_SUBLINKs we might find inside + * belong to the inner SubLink not the outer. So just return it as-is. + * + * This reasoning depends on the assumption that nothing will pull + * subexpressions into or out of the testexpr field of a SubLink, at + * least not without replacing PARAM_SUBLINKs first. If we did want + * to do that we'd need to rethink the parser-output representation + * altogether, since currently PARAM_SUBLINKs are only unique per + * SubLink not globally across the query. The whole point of + * replacing them with Vars or PARAM_EXEC nodes is to make them + * globally unique before they escape from the SubLink's testexpr. + * + * Note: this can't happen when called during SS_process_sublinks, + * because that recursively processes inner SubLinks first. It can + * happen when called from convert_ANY_sublink_to_join, though. + */ + return node; + } + return expression_tree_mutator(node, + convert_testexpr_mutator, + (void *) context); +} + +/* + * subplan_is_hashable: can we implement an ANY subplan by hashing? + * + * This is not responsible for checking whether the combining testexpr + * is suitable for hashing. We only look at the subquery itself. + */ +static bool +subplan_is_hashable(Plan *plan) +{ + double subquery_size; + + /* + * The estimated size of the subquery result must fit in hash_mem. (Note: + * we use heap tuple overhead here even though the tuples will actually be + * stored as MinimalTuples; this provides some fudge factor for hashtable + * overhead.) + */ + subquery_size = plan->plan_rows * + (MAXALIGN(plan->plan_width) + MAXALIGN(SizeofHeapTupleHeader)); + if (subquery_size > get_hash_memory_limit()) + return false; + + return true; +} + +/* + * subpath_is_hashable: can we implement an ANY subplan by hashing? + * + * Identical to subplan_is_hashable, but work from a Path for the subplan. + */ +static bool +subpath_is_hashable(Path *path) +{ + double subquery_size; + + /* + * The estimated size of the subquery result must fit in hash_mem. (Note: + * we use heap tuple overhead here even though the tuples will actually be + * stored as MinimalTuples; this provides some fudge factor for hashtable + * overhead.) + */ + subquery_size = path->rows * + (MAXALIGN(path->pathtarget->width) + MAXALIGN(SizeofHeapTupleHeader)); + if (subquery_size > get_hash_memory_limit()) + return false; + + return true; +} + +/* + * testexpr_is_hashable: is an ANY SubLink's test expression hashable? + * + * To identify LHS vs RHS of the hash expression, we must be given the + * list of output Param IDs of the SubLink's subquery. + */ +static bool +testexpr_is_hashable(Node *testexpr, List *param_ids) +{ + /* + * The testexpr must be a single OpExpr, or an AND-clause containing only + * OpExprs, each of which satisfy test_opexpr_is_hashable(). + */ + if (testexpr && IsA(testexpr, OpExpr)) + { + if (test_opexpr_is_hashable((OpExpr *) testexpr, param_ids)) + return true; + } + else if (is_andclause(testexpr)) + { + ListCell *l; + + foreach(l, ((BoolExpr *) testexpr)->args) + { + Node *andarg = (Node *) lfirst(l); + + if (!IsA(andarg, OpExpr)) + return false; + if (!test_opexpr_is_hashable((OpExpr *) andarg, param_ids)) + return false; + } + return true; + } + + return false; +} + +static bool +test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids) +{ + /* + * The combining operator must be hashable and strict. The need for + * hashability is obvious, since we want to use hashing. Without + * strictness, behavior in the presence of nulls is too unpredictable. We + * actually must assume even more than plain strictness: it can't yield + * NULL for non-null inputs, either (see nodeSubplan.c). However, hash + * indexes and hash joins assume that too. + */ + if (!hash_ok_operator(testexpr)) + return false; + + /* + * The left and right inputs must belong to the outer and inner queries + * respectively; hence Params that will be supplied by the subquery must + * not appear in the LHS, and Vars of the outer query must not appear in + * the RHS. (Ordinarily, this must be true because of the way that the + * parser builds an ANY SubLink's testexpr ... but inlining of functions + * could have changed the expression's structure, so we have to check. + * Such cases do not occur often enough to be worth trying to optimize, so + * we don't worry about trying to commute the clause or anything like + * that; we just need to be sure not to build an invalid plan.) + */ + if (list_length(testexpr->args) != 2) + return false; + if (contain_exec_param((Node *) linitial(testexpr->args), param_ids)) + return false; + if (contain_var_clause((Node *) lsecond(testexpr->args))) + return false; + return true; +} + +/* + * Check expression is hashable + strict + * + * We could use op_hashjoinable() and op_strict(), but do it like this to + * avoid a redundant cache lookup. + */ +static bool +hash_ok_operator(OpExpr *expr) +{ + Oid opid = expr->opno; + + /* quick out if not a binary operator */ + if (list_length(expr->args) != 2) + return false; + if (opid == ARRAY_EQ_OP || + opid == RECORD_EQ_OP) + { + /* these are strict, but must check input type to ensure hashable */ + Node *leftarg = linitial(expr->args); + + return op_hashjoinable(opid, exprType(leftarg)); + } + else + { + /* else must look up the operator properties */ + HeapTuple tup; + Form_pg_operator optup; + + tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for operator %u", opid); + optup = (Form_pg_operator) GETSTRUCT(tup); + if (!optup->oprcanhash || !func_strict(optup->oprcode)) + { + ReleaseSysCache(tup); + return false; + } + ReleaseSysCache(tup); + return true; + } +} + + +/* + * SS_process_ctes: process a query's WITH list + * + * Consider each CTE in the WITH list and either ignore it (if it's an + * unreferenced SELECT), "inline" it to create a regular sub-SELECT-in-FROM, + * or convert it to an initplan. + * + * A side effect is to fill in root->cte_plan_ids with a list that + * parallels root->parse->cteList and provides the subplan ID for + * each CTE's initplan, or a dummy ID (-1) if we didn't make an initplan. + */ +void +SS_process_ctes(PlannerInfo *root) +{ + ListCell *lc; + + Assert(root->cte_plan_ids == NIL); + + foreach(lc, root->parse->cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); + CmdType cmdType = ((Query *) cte->ctequery)->commandType; + Query *subquery; + PlannerInfo *subroot; + RelOptInfo *final_rel; + Path *best_path; + Plan *plan; + SubPlan *splan; + int paramid; + + /* + * Ignore SELECT CTEs that are not actually referenced anywhere. + */ + if (cte->cterefcount == 0 && cmdType == CMD_SELECT) + { + /* Make a dummy entry in cte_plan_ids */ + root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1); + continue; + } + + /* + * Consider inlining the CTE (creating RTE_SUBQUERY RTE(s)) instead of + * implementing it as a separately-planned CTE. + * + * We cannot inline if any of these conditions hold: + * + * 1. The user said not to (the CTEMaterializeAlways option). + * + * 2. The CTE is recursive. + * + * 3. The CTE has side-effects; this includes either not being a plain + * SELECT, or containing volatile functions. Inlining might change + * the side-effects, which would be bad. + * + * 4. The CTE is multiply-referenced and contains a self-reference to + * a recursive CTE outside itself. Inlining would result in multiple + * recursive self-references, which we don't support. + * + * Otherwise, we have an option whether to inline or not. That should + * always be a win if there's just a single reference, but if the CTE + * is multiply-referenced then it's unclear: inlining adds duplicate + * computations, but the ability to absorb restrictions from the outer + * query level could outweigh that. We do not have nearly enough + * information at this point to tell whether that's true, so we let + * the user express a preference. Our default behavior is to inline + * only singly-referenced CTEs, but a CTE marked CTEMaterializeNever + * will be inlined even if multiply referenced. + * + * Note: we check for volatile functions last, because that's more + * expensive than the other tests needed. + */ + if ((cte->ctematerialized == CTEMaterializeNever || + (cte->ctematerialized == CTEMaterializeDefault && + cte->cterefcount == 1)) && + !cte->cterecursive && + cmdType == CMD_SELECT && + !contain_dml(cte->ctequery) && + (cte->cterefcount <= 1 || + !contain_outer_selfref(cte->ctequery)) && + !contain_volatile_functions(cte->ctequery)) + { + inline_cte(root, cte); + /* Make a dummy entry in cte_plan_ids */ + root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1); + continue; + } + + /* + * Copy the source Query node. Probably not necessary, but let's keep + * this similar to make_subplan. + */ + subquery = (Query *) copyObject(cte->ctequery); + + /* plan_params should not be in use in current query level */ + Assert(root->plan_params == NIL); + + /* + * Generate Paths for the CTE query. Always plan for full retrieval + * --- we don't have enough info to predict otherwise. + */ + subroot = subquery_planner(root->glob, subquery, + root, + cte->cterecursive, 0.0); + + /* + * Since the current query level doesn't yet contain any RTEs, it + * should not be possible for the CTE to have requested parameters of + * this level. + */ + if (root->plan_params) + elog(ERROR, "unexpected outer reference in CTE query"); + + /* + * Select best Path and turn it into a Plan. At least for now, there + * seems no reason to postpone doing that. + */ + final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + best_path = final_rel->cheapest_total_path; + + plan = create_plan(subroot, best_path); + + /* + * Make a SubPlan node for it. This is just enough unlike + * build_subplan that we can't share code. + * + * Note plan_id, plan_name, and cost fields are set further down. + */ + splan = makeNode(SubPlan); + splan->subLinkType = CTE_SUBLINK; + splan->testexpr = NULL; + splan->paramIds = NIL; + get_first_col_type(plan, &splan->firstColType, &splan->firstColTypmod, + &splan->firstColCollation); + splan->useHashTable = false; + splan->unknownEqFalse = false; + + /* + * CTE scans are not considered for parallelism (cf + * set_rel_consider_parallel), and even if they were, initPlans aren't + * parallel-safe. + */ + splan->parallel_safe = false; + splan->setParam = NIL; + splan->parParam = NIL; + splan->args = NIL; + + /* + * The node can't have any inputs (since it's an initplan), so the + * parParam and args lists remain empty. (It could contain references + * to earlier CTEs' output param IDs, but CTE outputs are not + * propagated via the args list.) + */ + + /* + * Assign a param ID to represent the CTE's output. No ordinary + * "evaluation" of this param slot ever happens, but we use the param + * ID for setParam/chgParam signaling just as if the CTE plan were + * returning a simple scalar output. (Also, the executor abuses the + * ParamExecData slot for this param ID for communication among + * multiple CteScan nodes that might be scanning this CTE.) + */ + paramid = assign_special_exec_param(root); + splan->setParam = list_make1_int(paramid); + + /* + * Add the subplan and its PlannerInfo to the global lists. + */ + root->glob->subplans = lappend(root->glob->subplans, plan); + root->glob->subroots = lappend(root->glob->subroots, subroot); + splan->plan_id = list_length(root->glob->subplans); + + root->init_plans = lappend(root->init_plans, splan); + + root->cte_plan_ids = lappend_int(root->cte_plan_ids, splan->plan_id); + + /* Label the subplan for EXPLAIN purposes */ + splan->plan_name = psprintf("CTE %s", cte->ctename); + + /* Lastly, fill in the cost estimates for use later */ + cost_subplan(root, splan, plan); + } +} + +/* + * contain_dml: is any subquery not a plain SELECT? + * + * We reject SELECT FOR UPDATE/SHARE as well as INSERT etc. + */ +static bool +contain_dml(Node *node) +{ + return contain_dml_walker(node, NULL); +} + +static bool +contain_dml_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, Query)) + { + Query *query = (Query *) node; + + if (query->commandType != CMD_SELECT || + query->rowMarks != NIL) + return true; + + return query_tree_walker(query, contain_dml_walker, context, 0); + } + return expression_tree_walker(node, contain_dml_walker, context); +} + +/* + * contain_outer_selfref: is there an external recursive self-reference? + */ +static bool +contain_outer_selfref(Node *node) +{ + Index depth = 0; + + /* + * We should be starting with a Query, so that depth will be 1 while + * examining its immediate contents. + */ + Assert(IsA(node, Query)); + + return contain_outer_selfref_walker(node, &depth); +} + +static bool +contain_outer_selfref_walker(Node *node, Index *depth) +{ + if (node == NULL) + return false; + if (IsA(node, RangeTblEntry)) + { + RangeTblEntry *rte = (RangeTblEntry *) node; + + /* + * Check for a self-reference to a CTE that's above the Query that our + * search started at. + */ + if (rte->rtekind == RTE_CTE && + rte->self_reference && + rte->ctelevelsup >= *depth) + return true; + return false; /* allow range_table_walker to continue */ + } + if (IsA(node, Query)) + { + /* Recurse into subquery, tracking nesting depth properly */ + Query *query = (Query *) node; + bool result; + + (*depth)++; + + result = query_tree_walker(query, contain_outer_selfref_walker, + (void *) depth, QTW_EXAMINE_RTES_BEFORE); + + (*depth)--; + + return result; + } + return expression_tree_walker(node, contain_outer_selfref_walker, + (void *) depth); +} + +/* + * inline_cte: convert RTE_CTE references to given CTE into RTE_SUBQUERYs + */ +static void +inline_cte(PlannerInfo *root, CommonTableExpr *cte) +{ + struct inline_cte_walker_context context; + + context.ctename = cte->ctename; + /* Start at levelsup = -1 because we'll immediately increment it */ + context.levelsup = -1; + context.ctequery = castNode(Query, cte->ctequery); + + (void) inline_cte_walker((Node *) root->parse, &context); +} + +static bool +inline_cte_walker(Node *node, inline_cte_walker_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Query)) + { + Query *query = (Query *) node; + + context->levelsup++; + + /* + * Visit the query's RTE nodes after their contents; otherwise + * query_tree_walker would descend into the newly inlined CTE query, + * which we don't want. + */ + (void) query_tree_walker(query, inline_cte_walker, context, + QTW_EXAMINE_RTES_AFTER); + + context->levelsup--; + + return false; + } + else if (IsA(node, RangeTblEntry)) + { + RangeTblEntry *rte = (RangeTblEntry *) node; + + if (rte->rtekind == RTE_CTE && + strcmp(rte->ctename, context->ctename) == 0 && + rte->ctelevelsup == context->levelsup) + { + /* + * Found a reference to replace. Generate a copy of the CTE query + * with appropriate level adjustment for outer references (e.g., + * to other CTEs). + */ + Query *newquery = copyObject(context->ctequery); + + if (context->levelsup > 0) + IncrementVarSublevelsUp((Node *) newquery, context->levelsup, 1); + + /* + * Convert the RTE_CTE RTE into a RTE_SUBQUERY. + * + * Historically, a FOR UPDATE clause has been treated as extending + * into views and subqueries, but not into CTEs. We preserve this + * distinction by not trying to push rowmarks into the new + * subquery. + */ + rte->rtekind = RTE_SUBQUERY; + rte->subquery = newquery; + rte->security_barrier = false; + + /* Zero out CTE-specific fields */ + rte->ctename = NULL; + rte->ctelevelsup = 0; + rte->self_reference = false; + rte->coltypes = NIL; + rte->coltypmods = NIL; + rte->colcollations = NIL; + } + + return false; + } + + return expression_tree_walker(node, inline_cte_walker, context); +} + + +/* + * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join + * + * The caller has found an ANY SubLink at the top level of one of the query's + * qual clauses, but has not checked the properties of the SubLink further. + * Decide whether it is appropriate to process this SubLink in join style. + * If so, form a JoinExpr and return it. Return NULL if the SubLink cannot + * be converted to a join. + * + * The only non-obvious input parameter is available_rels: this is the set + * of query rels that can safely be referenced in the sublink expression. + * (We must restrict this to avoid changing the semantics when a sublink + * is present in an outer join's ON qual.) The conversion must fail if + * the converted qual would reference any but these parent-query relids. + * + * On success, the returned JoinExpr has larg = NULL and rarg = the jointree + * item representing the pulled-up subquery. The caller must set larg to + * represent the relation(s) on the lefthand side of the new join, and insert + * the JoinExpr into the upper query's jointree at an appropriate place + * (typically, where the lefthand relation(s) had been). Note that the + * passed-in SubLink must also be removed from its original position in the + * query quals, since the quals of the returned JoinExpr replace it. + * (Notionally, we replace the SubLink with a constant TRUE, then elide the + * redundant constant from the qual.) + * + * On success, the caller is also responsible for recursively applying + * pull_up_sublinks processing to the rarg and quals of the returned JoinExpr. + * (On failure, there is no need to do anything, since pull_up_sublinks will + * be applied when we recursively plan the sub-select.) + * + * Side effects of a successful conversion include adding the SubLink's + * subselect to the query's rangetable, so that it can be referenced in + * the JoinExpr's rarg. + */ +JoinExpr * +convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, + Relids available_rels) +{ + JoinExpr *result; + Query *parse = root->parse; + Query *subselect = (Query *) sublink->subselect; + Relids upper_varnos; + int rtindex; + ParseNamespaceItem *nsitem; + RangeTblEntry *rte; + RangeTblRef *rtr; + List *subquery_vars; + Node *quals; + ParseState *pstate; + + Assert(sublink->subLinkType == ANY_SUBLINK); + + /* + * The sub-select must not refer to any Vars of the parent query. (Vars of + * higher levels should be okay, though.) + */ + if (contain_vars_of_level((Node *) subselect, 1)) + return NULL; + + /* + * The test expression must contain some Vars of the parent query, else + * it's not gonna be a join. (Note that it won't have Vars referring to + * the subquery, rather Params.) + */ + upper_varnos = pull_varnos(root, sublink->testexpr); + if (bms_is_empty(upper_varnos)) + return NULL; + + /* + * However, it can't refer to anything outside available_rels. + */ + if (!bms_is_subset(upper_varnos, available_rels)) + return NULL; + + /* + * The combining operators and left-hand expressions mustn't be volatile. + */ + if (contain_volatile_functions(sublink->testexpr)) + return NULL; + + /* Create a dummy ParseState for addRangeTableEntryForSubquery */ + pstate = make_parsestate(NULL); + + /* + * Okay, pull up the sub-select into upper range table. + * + * We rely here on the assumption that the outer query has no references + * to the inner (necessarily true, other than the Vars that we build + * below). Therefore this is a lot easier than what pull_up_subqueries has + * to go through. + */ + nsitem = addRangeTableEntryForSubquery(pstate, + subselect, + makeAlias("ANY_subquery", NIL), + false, + false); + rte = nsitem->p_rte; + parse->rtable = lappend(parse->rtable, rte); + rtindex = list_length(parse->rtable); + + /* + * Form a RangeTblRef for the pulled-up sub-select. + */ + rtr = makeNode(RangeTblRef); + rtr->rtindex = rtindex; + + /* + * Build a list of Vars representing the subselect outputs. + */ + subquery_vars = generate_subquery_vars(root, + subselect->targetList, + rtindex); + + /* + * Build the new join's qual expression, replacing Params with these Vars. + */ + quals = convert_testexpr(root, sublink->testexpr, subquery_vars); + + /* + * And finally, build the JoinExpr node. + */ + result = makeNode(JoinExpr); + result->jointype = JOIN_SEMI; + result->isNatural = false; + result->larg = NULL; /* caller must fill this in */ + result->rarg = (Node *) rtr; + result->usingClause = NIL; + result->join_using_alias = NULL; + result->quals = quals; + result->alias = NULL; + result->rtindex = 0; /* we don't need an RTE for it */ + + return result; +} + +/* + * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join + * + * The API of this function is identical to convert_ANY_sublink_to_join's, + * except that we also support the case where the caller has found NOT EXISTS, + * so we need an additional input parameter "under_not". + */ +JoinExpr * +convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, + bool under_not, Relids available_rels) +{ + JoinExpr *result; + Query *parse = root->parse; + Query *subselect = (Query *) sublink->subselect; + Node *whereClause; + int rtoffset; + int varno; + Relids clause_varnos; + Relids upper_varnos; + + Assert(sublink->subLinkType == EXISTS_SUBLINK); + + /* + * Can't flatten if it contains WITH. (We could arrange to pull up the + * WITH into the parent query's cteList, but that risks changing the + * semantics, since a WITH ought to be executed once per associated query + * call.) Note that convert_ANY_sublink_to_join doesn't have to reject + * this case, since it just produces a subquery RTE that doesn't have to + * get flattened into the parent query. + */ + if (subselect->cteList) + return NULL; + + /* + * Copy the subquery so we can modify it safely (see comments in + * make_subplan). + */ + subselect = copyObject(subselect); + + /* + * See if the subquery can be simplified based on the knowledge that it's + * being used in EXISTS(). If we aren't able to get rid of its + * targetlist, we have to fail, because the pullup operation leaves us + * with noplace to evaluate the targetlist. + */ + if (!simplify_EXISTS_query(root, subselect)) + return NULL; + + /* + * Separate out the WHERE clause. (We could theoretically also remove + * top-level plain JOIN/ON clauses, but it's probably not worth the + * trouble.) + */ + whereClause = subselect->jointree->quals; + subselect->jointree->quals = NULL; + + /* + * The rest of the sub-select must not refer to any Vars of the parent + * query. (Vars of higher levels should be okay, though.) + */ + if (contain_vars_of_level((Node *) subselect, 1)) + return NULL; + + /* + * On the other hand, the WHERE clause must contain some Vars of the + * parent query, else it's not gonna be a join. + */ + if (!contain_vars_of_level(whereClause, 1)) + return NULL; + + /* + * We don't risk optimizing if the WHERE clause is volatile, either. + */ + if (contain_volatile_functions(whereClause)) + return NULL; + + /* + * The subquery must have a nonempty jointree, but we can make it so. + */ + replace_empty_jointree(subselect); + + /* + * Prepare to pull up the sub-select into top range table. + * + * We rely here on the assumption that the outer query has no references + * to the inner (necessarily true). Therefore this is a lot easier than + * what pull_up_subqueries has to go through. + * + * In fact, it's even easier than what convert_ANY_sublink_to_join has to + * do. The machinations of simplify_EXISTS_query ensured that there is + * nothing interesting in the subquery except an rtable and jointree, and + * even the jointree FromExpr no longer has quals. So we can just append + * the rtable to our own and use the FromExpr in our jointree. But first, + * adjust all level-zero varnos in the subquery to account for the rtable + * merger. + */ + rtoffset = list_length(parse->rtable); + OffsetVarNodes((Node *) subselect, rtoffset, 0); + OffsetVarNodes(whereClause, rtoffset, 0); + + /* + * Upper-level vars in subquery will now be one level closer to their + * parent than before; in particular, anything that had been level 1 + * becomes level zero. + */ + IncrementVarSublevelsUp((Node *) subselect, -1, 1); + IncrementVarSublevelsUp(whereClause, -1, 1); + + /* + * Now that the WHERE clause is adjusted to match the parent query + * environment, we can easily identify all the level-zero rels it uses. + * The ones <= rtoffset belong to the upper query; the ones > rtoffset do + * not. + */ + clause_varnos = pull_varnos(root, whereClause); + upper_varnos = NULL; + while ((varno = bms_first_member(clause_varnos)) >= 0) + { + if (varno <= rtoffset) + upper_varnos = bms_add_member(upper_varnos, varno); + } + bms_free(clause_varnos); + Assert(!bms_is_empty(upper_varnos)); + + /* + * Now that we've got the set of upper-level varnos, we can make the last + * check: only available_rels can be referenced. + */ + if (!bms_is_subset(upper_varnos, available_rels)) + return NULL; + + /* Now we can attach the modified subquery rtable to the parent */ + parse->rtable = list_concat(parse->rtable, subselect->rtable); + + /* + * And finally, build the JoinExpr node. + */ + result = makeNode(JoinExpr); + result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI; + result->isNatural = false; + result->larg = NULL; /* caller must fill this in */ + /* flatten out the FromExpr node if it's useless */ + if (list_length(subselect->jointree->fromlist) == 1) + result->rarg = (Node *) linitial(subselect->jointree->fromlist); + else + result->rarg = (Node *) subselect->jointree; + result->usingClause = NIL; + result->join_using_alias = NULL; + result->quals = whereClause; + result->alias = NULL; + result->rtindex = 0; /* we don't need an RTE for it */ + + return result; +} + +/* + * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery + * + * The only thing that matters about an EXISTS query is whether it returns + * zero or more than zero rows. Therefore, we can remove certain SQL features + * that won't affect that. The only part that is really likely to matter in + * typical usage is simplifying the targetlist: it's a common habit to write + * "SELECT * FROM" even though there is no need to evaluate any columns. + * + * Note: by suppressing the targetlist we could cause an observable behavioral + * change, namely that any errors that might occur in evaluating the tlist + * won't occur, nor will other side-effects of volatile functions. This seems + * unlikely to bother anyone in practice. + * + * Returns true if was able to discard the targetlist, else false. + */ +static bool +simplify_EXISTS_query(PlannerInfo *root, Query *query) +{ + /* + * We don't try to simplify at all if the query uses set operations, + * aggregates, grouping sets, SRFs, modifying CTEs, HAVING, OFFSET, or FOR + * UPDATE/SHARE; none of these seem likely in normal usage and their + * possible effects are complex. (Note: we could ignore an "OFFSET 0" + * clause, but that traditionally is used as an optimization fence, so we + * don't.) + */ + if (query->commandType != CMD_SELECT || + query->setOperations || + query->hasAggs || + query->groupingSets || + query->hasWindowFuncs || + query->hasTargetSRFs || + query->hasModifyingCTE || + query->havingQual || + query->limitOffset || + query->rowMarks) + return false; + + /* + * LIMIT with a constant positive (or NULL) value doesn't affect the + * semantics of EXISTS, so let's ignore such clauses. This is worth doing + * because people accustomed to certain other DBMSes may be in the habit + * of writing EXISTS(SELECT ... LIMIT 1) as an optimization. If there's a + * LIMIT with anything else as argument, though, we can't simplify. + */ + if (query->limitCount) + { + /* + * The LIMIT clause has not yet been through eval_const_expressions, + * so we have to apply that here. It might seem like this is a waste + * of cycles, since the only case plausibly worth worrying about is + * "LIMIT 1" ... but what we'll actually see is "LIMIT int8(1::int4)", + * so we have to fold constants or we're not going to recognize it. + */ + Node *node = eval_const_expressions(root, query->limitCount); + Const *limit; + + /* Might as well update the query if we simplified the clause. */ + query->limitCount = node; + + if (!IsA(node, Const)) + return false; + + limit = (Const *) node; + Assert(limit->consttype == INT8OID); + if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0) + return false; + + /* Whether or not the targetlist is safe, we can drop the LIMIT. */ + query->limitCount = NULL; + } + + /* + * Otherwise, we can throw away the targetlist, as well as any GROUP, + * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will + * change a nonzero-rows result to zero rows or vice versa. (Furthermore, + * since our parsetree representation of these clauses depends on the + * targetlist, we'd better throw them away if we drop the targetlist.) + */ + query->targetList = NIL; + query->groupClause = NIL; + query->windowClause = NIL; + query->distinctClause = NIL; + query->sortClause = NIL; + query->hasDistinctOn = false; + + return true; +} + +/* + * convert_EXISTS_to_ANY: try to convert EXISTS to a hashable ANY sublink + * + * The subselect is expected to be a fresh copy that we can munge up, + * and to have been successfully passed through simplify_EXISTS_query. + * + * On success, the modified subselect is returned, and we store a suitable + * upper-level test expression at *testexpr, plus a list of the subselect's + * output Params at *paramIds. (The test expression is already Param-ified + * and hence need not go through convert_testexpr, which is why we have to + * deal with the Param IDs specially.) + * + * On failure, returns NULL. + */ +static Query * +convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, + Node **testexpr, List **paramIds) +{ + Node *whereClause; + List *leftargs, + *rightargs, + *opids, + *opcollations, + *newWhere, + *tlist, + *testlist, + *paramids; + ListCell *lc, + *rc, + *oc, + *cc; + AttrNumber resno; + + /* + * Query must not require a targetlist, since we have to insert a new one. + * Caller should have dealt with the case already. + */ + Assert(subselect->targetList == NIL); + + /* + * Separate out the WHERE clause. (We could theoretically also remove + * top-level plain JOIN/ON clauses, but it's probably not worth the + * trouble.) + */ + whereClause = subselect->jointree->quals; + subselect->jointree->quals = NULL; + + /* + * The rest of the sub-select must not refer to any Vars of the parent + * query. (Vars of higher levels should be okay, though.) + * + * Note: we need not check for Aggrefs separately because we know the + * sub-select is as yet unoptimized; any uplevel Aggref must therefore + * contain an uplevel Var reference. This is not the case below ... + */ + if (contain_vars_of_level((Node *) subselect, 1)) + return NULL; + + /* + * We don't risk optimizing if the WHERE clause is volatile, either. + */ + if (contain_volatile_functions(whereClause)) + return NULL; + + /* + * Clean up the WHERE clause by doing const-simplification etc on it. + * Aside from simplifying the processing we're about to do, this is + * important for being able to pull chunks of the WHERE clause up into the + * parent query. Since we are invoked partway through the parent's + * preprocess_expression() work, earlier steps of preprocess_expression() + * wouldn't get applied to the pulled-up stuff unless we do them here. For + * the parts of the WHERE clause that get put back into the child query, + * this work is partially duplicative, but it shouldn't hurt. + * + * Note: we do not run flatten_join_alias_vars. This is OK because any + * parent aliases were flattened already, and we're not going to pull any + * child Vars (of any description) into the parent. + * + * Note: passing the parent's root to eval_const_expressions is + * technically wrong, but we can get away with it since only the + * boundParams (if any) are used, and those would be the same in a + * subroot. + */ + whereClause = eval_const_expressions(root, whereClause); + whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false); + whereClause = (Node *) make_ands_implicit((Expr *) whereClause); + + /* + * We now have a flattened implicit-AND list of clauses, which we try to + * break apart into "outervar = innervar" hash clauses. Anything that + * can't be broken apart just goes back into the newWhere list. Note that + * we aren't trying hard yet to ensure that we have only outer or only + * inner on each side; we'll check that if we get to the end. + */ + leftargs = rightargs = opids = opcollations = newWhere = NIL; + foreach(lc, (List *) whereClause) + { + OpExpr *expr = (OpExpr *) lfirst(lc); + + if (IsA(expr, OpExpr) && + hash_ok_operator(expr)) + { + Node *leftarg = (Node *) linitial(expr->args); + Node *rightarg = (Node *) lsecond(expr->args); + + if (contain_vars_of_level(leftarg, 1)) + { + leftargs = lappend(leftargs, leftarg); + rightargs = lappend(rightargs, rightarg); + opids = lappend_oid(opids, expr->opno); + opcollations = lappend_oid(opcollations, expr->inputcollid); + continue; + } + if (contain_vars_of_level(rightarg, 1)) + { + /* + * We must commute the clause to put the outer var on the + * left, because the hashing code in nodeSubplan.c expects + * that. This probably shouldn't ever fail, since hashable + * operators ought to have commutators, but be paranoid. + */ + expr->opno = get_commutator(expr->opno); + if (OidIsValid(expr->opno) && hash_ok_operator(expr)) + { + leftargs = lappend(leftargs, rightarg); + rightargs = lappend(rightargs, leftarg); + opids = lappend_oid(opids, expr->opno); + opcollations = lappend_oid(opcollations, expr->inputcollid); + continue; + } + /* If no commutator, no chance to optimize the WHERE clause */ + return NULL; + } + } + /* Couldn't handle it as a hash clause */ + newWhere = lappend(newWhere, expr); + } + + /* + * If we didn't find anything we could convert, fail. + */ + if (leftargs == NIL) + return NULL; + + /* + * There mustn't be any parent Vars or Aggs in the stuff that we intend to + * put back into the child query. Note: you might think we don't need to + * check for Aggs separately, because an uplevel Agg must contain an + * uplevel Var in its argument. But it is possible that the uplevel Var + * got optimized away by eval_const_expressions. Consider + * + * SUM(CASE WHEN false THEN uplevelvar ELSE 0 END) + */ + if (contain_vars_of_level((Node *) newWhere, 1) || + contain_vars_of_level((Node *) rightargs, 1)) + return NULL; + if (root->parse->hasAggs && + (contain_aggs_of_level((Node *) newWhere, 1) || + contain_aggs_of_level((Node *) rightargs, 1))) + return NULL; + + /* + * And there can't be any child Vars in the stuff we intend to pull up. + * (Note: we'd need to check for child Aggs too, except we know the child + * has no aggs at all because of simplify_EXISTS_query's check. The same + * goes for window functions.) + */ + if (contain_vars_of_level((Node *) leftargs, 0)) + return NULL; + + /* + * Also reject sublinks in the stuff we intend to pull up. (It might be + * possible to support this, but doesn't seem worth the complication.) + */ + if (contain_subplans((Node *) leftargs)) + return NULL; + + /* + * Okay, adjust the sublevelsup in the stuff we're pulling up. + */ + IncrementVarSublevelsUp((Node *) leftargs, -1, 1); + + /* + * Put back any child-level-only WHERE clauses. + */ + if (newWhere) + subselect->jointree->quals = (Node *) make_ands_explicit(newWhere); + + /* + * Build a new targetlist for the child that emits the expressions we + * need. Concurrently, build a testexpr for the parent using Params to + * reference the child outputs. (Since we generate Params directly here, + * there will be no need to convert the testexpr in build_subplan.) + */ + tlist = testlist = paramids = NIL; + resno = 1; + forfour(lc, leftargs, rc, rightargs, oc, opids, cc, opcollations) + { + Node *leftarg = (Node *) lfirst(lc); + Node *rightarg = (Node *) lfirst(rc); + Oid opid = lfirst_oid(oc); + Oid opcollation = lfirst_oid(cc); + Param *param; + + param = generate_new_exec_param(root, + exprType(rightarg), + exprTypmod(rightarg), + exprCollation(rightarg)); + tlist = lappend(tlist, + makeTargetEntry((Expr *) rightarg, + resno++, + NULL, + false)); + testlist = lappend(testlist, + make_opclause(opid, BOOLOID, false, + (Expr *) leftarg, (Expr *) param, + InvalidOid, opcollation)); + paramids = lappend_int(paramids, param->paramid); + } + + /* Put everything where it should go, and we're done */ + subselect->targetList = tlist; + *testexpr = (Node *) make_ands_explicit(testlist); + *paramIds = paramids; + + return subselect; +} + + +/* + * Replace correlation vars (uplevel vars) with Params. + * + * Uplevel PlaceHolderVars and aggregates are replaced, too. + * + * Note: it is critical that this runs immediately after SS_process_sublinks. + * Since we do not recurse into the arguments of uplevel PHVs and aggregates, + * they will get copied to the appropriate subplan args list in the parent + * query with uplevel vars not replaced by Params, but only adjusted in level + * (see replace_outer_placeholdervar and replace_outer_agg). That's exactly + * what we want for the vars of the parent level --- but if a PHV's or + * aggregate's argument contains any further-up variables, they have to be + * replaced with Params in their turn. That will happen when the parent level + * runs SS_replace_correlation_vars. Therefore it must do so after expanding + * its sublinks to subplans. And we don't want any steps in between, else + * those steps would never get applied to the argument expressions, either in + * the parent or the child level. + * + * Another fairly tricky thing going on here is the handling of SubLinks in + * the arguments of uplevel PHVs/aggregates. Those are not touched inside the + * intermediate query level, either. Instead, SS_process_sublinks recurses on + * them after copying the PHV or Aggref expression into the parent plan level + * (this is actually taken care of in build_subplan). + */ +Node * +SS_replace_correlation_vars(PlannerInfo *root, Node *expr) +{ + /* No setup needed for tree walk, so away we go */ + return replace_correlation_vars_mutator(expr, root); +} + +static Node * +replace_correlation_vars_mutator(Node *node, PlannerInfo *root) +{ + if (node == NULL) + return NULL; + if (IsA(node, Var)) + { + if (((Var *) node)->varlevelsup > 0) + return (Node *) replace_outer_var(root, (Var *) node); + } + if (IsA(node, PlaceHolderVar)) + { + if (((PlaceHolderVar *) node)->phlevelsup > 0) + return (Node *) replace_outer_placeholdervar(root, + (PlaceHolderVar *) node); + } + if (IsA(node, Aggref)) + { + if (((Aggref *) node)->agglevelsup > 0) + return (Node *) replace_outer_agg(root, (Aggref *) node); + } + if (IsA(node, GroupingFunc)) + { + if (((GroupingFunc *) node)->agglevelsup > 0) + return (Node *) replace_outer_grouping(root, (GroupingFunc *) node); + } + return expression_tree_mutator(node, + replace_correlation_vars_mutator, + (void *) root); +} + +/* + * Expand SubLinks to SubPlans in the given expression. + * + * The isQual argument tells whether or not this expression is a WHERE/HAVING + * qualifier expression. If it is, any sublinks appearing at top level need + * not distinguish FALSE from UNKNOWN return values. + */ +Node * +SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual) +{ + process_sublinks_context context; + + context.root = root; + context.isTopQual = isQual; + return process_sublinks_mutator(expr, &context); +} + +static Node * +process_sublinks_mutator(Node *node, process_sublinks_context *context) +{ + process_sublinks_context locContext; + + locContext.root = context->root; + + if (node == NULL) + return NULL; + if (IsA(node, SubLink)) + { + SubLink *sublink = (SubLink *) node; + Node *testexpr; + + /* + * First, recursively process the lefthand-side expressions, if any. + * They're not top-level anymore. + */ + locContext.isTopQual = false; + testexpr = process_sublinks_mutator(sublink->testexpr, &locContext); + + /* + * Now build the SubPlan node and make the expr to return. + */ + return make_subplan(context->root, + (Query *) sublink->subselect, + sublink->subLinkType, + sublink->subLinkId, + testexpr, + context->isTopQual); + } + + /* + * Don't recurse into the arguments of an outer PHV, Aggref or + * GroupingFunc here. Any SubLinks in the arguments have to be dealt with + * at the outer query level; they'll be handled when build_subplan + * collects the PHV, Aggref or GroupingFunc into the arguments to be + * passed down to the current subplan. + */ + if (IsA(node, PlaceHolderVar)) + { + if (((PlaceHolderVar *) node)->phlevelsup > 0) + return node; + } + else if (IsA(node, Aggref)) + { + if (((Aggref *) node)->agglevelsup > 0) + return node; + } + else if (IsA(node, GroupingFunc)) + { + if (((GroupingFunc *) node)->agglevelsup > 0) + return node; + } + + /* + * We should never see a SubPlan expression in the input (since this is + * the very routine that creates 'em to begin with). We shouldn't find + * ourselves invoked directly on a Query, either. + */ + Assert(!IsA(node, SubPlan)); + Assert(!IsA(node, AlternativeSubPlan)); + Assert(!IsA(node, Query)); + + /* + * Because make_subplan() could return an AND or OR clause, we have to + * take steps to preserve AND/OR flatness of a qual. We assume the input + * has been AND/OR flattened and so we need no recursion here. + * + * (Due to the coding here, we will not get called on the List subnodes of + * an AND; and the input is *not* yet in implicit-AND format. So no check + * is needed for a bare List.) + * + * Anywhere within the top-level AND/OR clause structure, we can tell + * make_subplan() that NULL and FALSE are interchangeable. So isTopQual + * propagates down in both cases. (Note that this is unlike the meaning + * of "top level qual" used in most other places in Postgres.) + */ + if (is_andclause(node)) + { + List *newargs = NIL; + ListCell *l; + + /* Still at qual top-level */ + locContext.isTopQual = context->isTopQual; + + foreach(l, ((BoolExpr *) node)->args) + { + Node *newarg; + + newarg = process_sublinks_mutator(lfirst(l), &locContext); + if (is_andclause(newarg)) + newargs = list_concat(newargs, ((BoolExpr *) newarg)->args); + else + newargs = lappend(newargs, newarg); + } + return (Node *) make_andclause(newargs); + } + + if (is_orclause(node)) + { + List *newargs = NIL; + ListCell *l; + + /* Still at qual top-level */ + locContext.isTopQual = context->isTopQual; + + foreach(l, ((BoolExpr *) node)->args) + { + Node *newarg; + + newarg = process_sublinks_mutator(lfirst(l), &locContext); + if (is_orclause(newarg)) + newargs = list_concat(newargs, ((BoolExpr *) newarg)->args); + else + newargs = lappend(newargs, newarg); + } + return (Node *) make_orclause(newargs); + } + + /* + * If we recurse down through anything other than an AND or OR node, we + * are definitely not at top qual level anymore. + */ + locContext.isTopQual = false; + + return expression_tree_mutator(node, + process_sublinks_mutator, + (void *) &locContext); +} + +/* + * SS_identify_outer_params - identify the Params available from outer levels + * + * This must be run after SS_replace_correlation_vars and SS_process_sublinks + * processing is complete in a given query level as well as all of its + * descendant levels (which means it's most practical to do it at the end of + * processing the query level). We compute the set of paramIds that outer + * levels will make available to this level+descendants, and record it in + * root->outer_params for use while computing extParam/allParam sets in final + * plan cleanup. (We can't just compute it then, because the upper levels' + * plan_params lists are transient and will be gone by then.) + */ +void +SS_identify_outer_params(PlannerInfo *root) +{ + Bitmapset *outer_params; + PlannerInfo *proot; + ListCell *l; + + /* + * If no parameters have been assigned anywhere in the tree, we certainly + * don't need to do anything here. + */ + if (root->glob->paramExecTypes == NIL) + return; + + /* + * Scan all query levels above this one to see which parameters are due to + * be available from them, either because lower query levels have + * requested them (via plan_params) or because they will be available from + * initPlans of those levels. + */ + outer_params = NULL; + for (proot = root->parent_root; proot != NULL; proot = proot->parent_root) + { + /* Include ordinary Var/PHV/Aggref/GroupingFunc params */ + foreach(l, proot->plan_params) + { + PlannerParamItem *pitem = (PlannerParamItem *) lfirst(l); + + outer_params = bms_add_member(outer_params, pitem->paramId); + } + /* Include any outputs of outer-level initPlans */ + foreach(l, proot->init_plans) + { + SubPlan *initsubplan = (SubPlan *) lfirst(l); + ListCell *l2; + + foreach(l2, initsubplan->setParam) + { + outer_params = bms_add_member(outer_params, lfirst_int(l2)); + } + } + /* Include worktable ID, if a recursive query is being planned */ + if (proot->wt_param_id >= 0) + outer_params = bms_add_member(outer_params, proot->wt_param_id); + } + root->outer_params = outer_params; +} + +/* + * SS_charge_for_initplans - account for initplans in Path costs & parallelism + * + * If any initPlans have been created in the current query level, they will + * get attached to the Plan tree created from whichever Path we select from + * the given rel. Increment all that rel's Paths' costs to account for them, + * and make sure the paths get marked as parallel-unsafe, since we can't + * currently transmit initPlans to parallel workers. + * + * This is separate from SS_attach_initplans because we might conditionally + * create more initPlans during create_plan(), depending on which Path we + * select. However, Paths that would generate such initPlans are expected + * to have included their cost and parallel-safety effects already. + */ +void +SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel) +{ + Cost initplan_cost; + ListCell *lc; + + /* Nothing to do if no initPlans */ + if (root->init_plans == NIL) + return; + + /* + * Compute the cost increment just once, since it will be the same for all + * Paths. We assume each initPlan gets run once during top plan startup. + * This is a conservative overestimate, since in fact an initPlan might be + * executed later than plan startup, or even not at all. + */ + initplan_cost = 0; + foreach(lc, root->init_plans) + { + SubPlan *initsubplan = (SubPlan *) lfirst(lc); + + initplan_cost += initsubplan->startup_cost + initsubplan->per_call_cost; + } + + /* + * Now adjust the costs and parallel_safe flags. + */ + foreach(lc, final_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + path->startup_cost += initplan_cost; + path->total_cost += initplan_cost; + path->parallel_safe = false; + } + + /* + * Forget about any partial paths and clear consider_parallel, too; + * they're not usable if we attached an initPlan. + */ + final_rel->partial_pathlist = NIL; + final_rel->consider_parallel = false; + + /* We needn't do set_cheapest() here, caller will do it */ +} + +/* + * SS_attach_initplans - attach initplans to topmost plan node + * + * Attach any initplans created in the current query level to the specified + * plan node, which should normally be the topmost node for the query level. + * (In principle the initPlans could go in any node at or above where they're + * referenced; but there seems no reason to put them any lower than the + * topmost node, so we don't bother to track exactly where they came from.) + * + * We do not touch the plan node's cost or parallel_safe flag. The initplans + * must have been accounted for in SS_charge_for_initplans, or by any later + * code that adds initplans via SS_make_initplan_from_plan. + */ +void +SS_attach_initplans(PlannerInfo *root, Plan *plan) +{ + plan->initPlan = root->init_plans; +} + +/* + * SS_finalize_plan - do final parameter processing for a completed Plan. + * + * This recursively computes the extParam and allParam sets for every Plan + * node in the given plan tree. (Oh, and RangeTblFunction.funcparams too.) + * + * We assume that SS_finalize_plan has already been run on any initplans or + * subplans the plan tree could reference. + */ +void +SS_finalize_plan(PlannerInfo *root, Plan *plan) +{ + /* No setup needed, just recurse through plan tree. */ + (void) finalize_plan(root, plan, -1, root->outer_params, NULL); +} + +/* + * Recursive processing of all nodes in the plan tree + * + * gather_param is the rescan_param of an ancestral Gather/GatherMerge, + * or -1 if there is none. + * + * valid_params is the set of param IDs supplied by outer plan levels + * that are valid to reference in this plan node or its children. + * + * scan_params is a set of param IDs to force scan plan nodes to reference. + * This is for EvalPlanQual support, and is always NULL at the top of the + * recursion. + * + * The return value is the computed allParam set for the given Plan node. + * This is just an internal notational convenience: we can add a child + * plan's allParams to the set of param IDs of interest to this level + * in the same statement that recurses to that child. + * + * Do not scribble on caller's values of valid_params or scan_params! + * + * Note: although we attempt to deal with initPlans anywhere in the tree, the + * logic is not really right. The problem is that a plan node might return an + * output Param of its initPlan as a targetlist item, in which case it's valid + * for the parent plan level to reference that same Param; the parent's usage + * will be converted into a Var referencing the child plan node by setrefs.c. + * But this function would see the parent's reference as out of scope and + * complain about it. For now, this does not matter because the planner only + * attaches initPlans to the topmost plan node in a query level, so the case + * doesn't arise. If we ever merge this processing into setrefs.c, maybe it + * can be handled more cleanly. + */ +static Bitmapset * +finalize_plan(PlannerInfo *root, Plan *plan, + int gather_param, + Bitmapset *valid_params, + Bitmapset *scan_params) +{ + finalize_primnode_context context; + int locally_added_param; + Bitmapset *nestloop_params; + Bitmapset *initExtParam; + Bitmapset *initSetParam; + Bitmapset *child_params; + ListCell *l; + + if (plan == NULL) + return NULL; + + context.root = root; + context.paramids = NULL; /* initialize set to empty */ + locally_added_param = -1; /* there isn't one */ + nestloop_params = NULL; /* there aren't any */ + + /* + * Examine any initPlans to determine the set of external params they + * reference and the set of output params they supply. (We assume + * SS_finalize_plan was run on them already.) + */ + initExtParam = initSetParam = NULL; + foreach(l, plan->initPlan) + { + SubPlan *initsubplan = (SubPlan *) lfirst(l); + Plan *initplan = planner_subplan_get_plan(root, initsubplan); + ListCell *l2; + + initExtParam = bms_add_members(initExtParam, initplan->extParam); + foreach(l2, initsubplan->setParam) + { + initSetParam = bms_add_member(initSetParam, lfirst_int(l2)); + } + } + + /* Any setParams are validly referenceable in this node and children */ + if (initSetParam) + valid_params = bms_union(valid_params, initSetParam); + + /* + * When we call finalize_primnode, context.paramids sets are automatically + * merged together. But when recursing to self, we have to do it the hard + * way. We want the paramids set to include params in subplans as well as + * at this level. + */ + + /* Find params in targetlist and qual */ + finalize_primnode((Node *) plan->targetlist, &context); + finalize_primnode((Node *) plan->qual, &context); + + /* + * If it's a parallel-aware scan node, mark it as dependent on the parent + * Gather/GatherMerge's rescan Param. + */ + if (plan->parallel_aware) + { + if (gather_param < 0) + elog(ERROR, "parallel-aware plan node is not below a Gather"); + context.paramids = + bms_add_member(context.paramids, gather_param); + } + + /* Check additional node-type-specific fields */ + switch (nodeTag(plan)) + { + case T_Result: + finalize_primnode(((Result *) plan)->resconstantqual, + &context); + break; + + case T_SeqScan: + context.paramids = bms_add_members(context.paramids, scan_params); + break; + + case T_SampleScan: + finalize_primnode((Node *) ((SampleScan *) plan)->tablesample, + &context); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + + case T_IndexScan: + finalize_primnode((Node *) ((IndexScan *) plan)->indexqual, + &context); + finalize_primnode((Node *) ((IndexScan *) plan)->indexorderby, + &context); + + /* + * we need not look at indexqualorig, since it will have the same + * param references as indexqual. Likewise, we can ignore + * indexorderbyorig. + */ + context.paramids = bms_add_members(context.paramids, scan_params); + break; + + case T_IndexOnlyScan: + finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexqual, + &context); + finalize_primnode((Node *) ((IndexOnlyScan *) plan)->recheckqual, + &context); + finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexorderby, + &context); + + /* + * we need not look at indextlist, since it cannot contain Params. + */ + context.paramids = bms_add_members(context.paramids, scan_params); + break; + + case T_BitmapIndexScan: + finalize_primnode((Node *) ((BitmapIndexScan *) plan)->indexqual, + &context); + + /* + * we need not look at indexqualorig, since it will have the same + * param references as indexqual. + */ + break; + + case T_BitmapHeapScan: + finalize_primnode((Node *) ((BitmapHeapScan *) plan)->bitmapqualorig, + &context); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + + case T_TidScan: + finalize_primnode((Node *) ((TidScan *) plan)->tidquals, + &context); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + + case T_TidRangeScan: + finalize_primnode((Node *) ((TidRangeScan *) plan)->tidrangequals, + &context); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + + case T_SubqueryScan: + { + SubqueryScan *sscan = (SubqueryScan *) plan; + RelOptInfo *rel; + Bitmapset *subquery_params; + + /* We must run finalize_plan on the subquery */ + rel = find_base_rel(root, sscan->scan.scanrelid); + subquery_params = rel->subroot->outer_params; + if (gather_param >= 0) + subquery_params = bms_add_member(bms_copy(subquery_params), + gather_param); + finalize_plan(rel->subroot, sscan->subplan, gather_param, + subquery_params, NULL); + + /* Now we can add its extParams to the parent's params */ + context.paramids = bms_add_members(context.paramids, + sscan->subplan->extParam); + /* We need scan_params too, though */ + context.paramids = bms_add_members(context.paramids, + scan_params); + } + break; + + case T_FunctionScan: + { + FunctionScan *fscan = (FunctionScan *) plan; + ListCell *lc; + + /* + * Call finalize_primnode independently on each function + * expression, so that we can record which params are + * referenced in each, in order to decide which need + * re-evaluating during rescan. + */ + foreach(lc, fscan->functions) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); + finalize_primnode_context funccontext; + + funccontext = context; + funccontext.paramids = NULL; + + finalize_primnode(rtfunc->funcexpr, &funccontext); + + /* remember results for execution */ + rtfunc->funcparams = funccontext.paramids; + + /* add the function's params to the overall set */ + context.paramids = bms_add_members(context.paramids, + funccontext.paramids); + } + + context.paramids = bms_add_members(context.paramids, + scan_params); + } + break; + + case T_TableFuncScan: + finalize_primnode((Node *) ((TableFuncScan *) plan)->tablefunc, + &context); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + + case T_ValuesScan: + finalize_primnode((Node *) ((ValuesScan *) plan)->values_lists, + &context); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + + case T_CteScan: + { + /* + * You might think we should add the node's cteParam to + * paramids, but we shouldn't because that param is just a + * linkage mechanism for multiple CteScan nodes for the same + * CTE; it is never used for changed-param signaling. What we + * have to do instead is to find the referenced CTE plan and + * incorporate its external paramids, so that the correct + * things will happen if the CTE references outer-level + * variables. See test cases for bug #4902. (We assume + * SS_finalize_plan was run on the CTE plan already.) + */ + int plan_id = ((CteScan *) plan)->ctePlanId; + Plan *cteplan; + + /* so, do this ... */ + if (plan_id < 1 || plan_id > list_length(root->glob->subplans)) + elog(ERROR, "could not find plan for CteScan referencing plan ID %d", + plan_id); + cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1); + context.paramids = + bms_add_members(context.paramids, cteplan->extParam); + +#ifdef NOT_USED + /* ... but not this */ + context.paramids = + bms_add_member(context.paramids, + ((CteScan *) plan)->cteParam); +#endif + + context.paramids = bms_add_members(context.paramids, + scan_params); + } + break; + + case T_WorkTableScan: + context.paramids = + bms_add_member(context.paramids, + ((WorkTableScan *) plan)->wtParam); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + + case T_NamedTuplestoreScan: + context.paramids = bms_add_members(context.paramids, scan_params); + break; + + case T_ForeignScan: + { + ForeignScan *fscan = (ForeignScan *) plan; + + finalize_primnode((Node *) fscan->fdw_exprs, + &context); + finalize_primnode((Node *) fscan->fdw_recheck_quals, + &context); + + /* We assume fdw_scan_tlist cannot contain Params */ + context.paramids = bms_add_members(context.paramids, + scan_params); + } + break; + + case T_CustomScan: + { + CustomScan *cscan = (CustomScan *) plan; + ListCell *lc; + + finalize_primnode((Node *) cscan->custom_exprs, + &context); + /* We assume custom_scan_tlist cannot contain Params */ + context.paramids = + bms_add_members(context.paramids, scan_params); + + /* child nodes if any */ + foreach(lc, cscan->custom_plans) + { + context.paramids = + bms_add_members(context.paramids, + finalize_plan(root, + (Plan *) lfirst(lc), + gather_param, + valid_params, + scan_params)); + } + } + break; + + case T_ModifyTable: + { + ModifyTable *mtplan = (ModifyTable *) plan; + + /* Force descendant scan nodes to reference epqParam */ + locally_added_param = mtplan->epqParam; + valid_params = bms_add_member(bms_copy(valid_params), + locally_added_param); + scan_params = bms_add_member(bms_copy(scan_params), + locally_added_param); + finalize_primnode((Node *) mtplan->returningLists, + &context); + finalize_primnode((Node *) mtplan->onConflictSet, + &context); + finalize_primnode((Node *) mtplan->onConflictWhere, + &context); + /* exclRelTlist contains only Vars, doesn't need examination */ + } + break; + + case T_Append: + { + ListCell *l; + + foreach(l, ((Append *) plan)->appendplans) + { + context.paramids = + bms_add_members(context.paramids, + finalize_plan(root, + (Plan *) lfirst(l), + gather_param, + valid_params, + scan_params)); + } + } + break; + + case T_MergeAppend: + { + ListCell *l; + + foreach(l, ((MergeAppend *) plan)->mergeplans) + { + context.paramids = + bms_add_members(context.paramids, + finalize_plan(root, + (Plan *) lfirst(l), + gather_param, + valid_params, + scan_params)); + } + } + break; + + case T_BitmapAnd: + { + ListCell *l; + + foreach(l, ((BitmapAnd *) plan)->bitmapplans) + { + context.paramids = + bms_add_members(context.paramids, + finalize_plan(root, + (Plan *) lfirst(l), + gather_param, + valid_params, + scan_params)); + } + } + break; + + case T_BitmapOr: + { + ListCell *l; + + foreach(l, ((BitmapOr *) plan)->bitmapplans) + { + context.paramids = + bms_add_members(context.paramids, + finalize_plan(root, + (Plan *) lfirst(l), + gather_param, + valid_params, + scan_params)); + } + } + break; + + case T_NestLoop: + { + ListCell *l; + + finalize_primnode((Node *) ((Join *) plan)->joinqual, + &context); + /* collect set of params that will be passed to right child */ + foreach(l, ((NestLoop *) plan)->nestParams) + { + NestLoopParam *nlp = (NestLoopParam *) lfirst(l); + + nestloop_params = bms_add_member(nestloop_params, + nlp->paramno); + } + } + break; + + case T_MergeJoin: + finalize_primnode((Node *) ((Join *) plan)->joinqual, + &context); + finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses, + &context); + break; + + case T_HashJoin: + finalize_primnode((Node *) ((Join *) plan)->joinqual, + &context); + finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses, + &context); + break; + + case T_Hash: + finalize_primnode((Node *) ((Hash *) plan)->hashkeys, + &context); + break; + + case T_Limit: + finalize_primnode(((Limit *) plan)->limitOffset, + &context); + finalize_primnode(((Limit *) plan)->limitCount, + &context); + break; + + case T_RecursiveUnion: + /* child nodes are allowed to reference wtParam */ + locally_added_param = ((RecursiveUnion *) plan)->wtParam; + valid_params = bms_add_member(bms_copy(valid_params), + locally_added_param); + /* wtParam does *not* get added to scan_params */ + break; + + case T_LockRows: + /* Force descendant scan nodes to reference epqParam */ + locally_added_param = ((LockRows *) plan)->epqParam; + valid_params = bms_add_member(bms_copy(valid_params), + locally_added_param); + scan_params = bms_add_member(bms_copy(scan_params), + locally_added_param); + break; + + case T_Agg: + { + Agg *agg = (Agg *) plan; + + /* + * AGG_HASHED plans need to know which Params are referenced + * in aggregate calls. Do a separate scan to identify them. + */ + if (agg->aggstrategy == AGG_HASHED) + { + finalize_primnode_context aggcontext; + + aggcontext.root = root; + aggcontext.paramids = NULL; + finalize_agg_primnode((Node *) agg->plan.targetlist, + &aggcontext); + finalize_agg_primnode((Node *) agg->plan.qual, + &aggcontext); + agg->aggParams = aggcontext.paramids; + } + } + break; + + case T_WindowAgg: + finalize_primnode(((WindowAgg *) plan)->startOffset, + &context); + finalize_primnode(((WindowAgg *) plan)->endOffset, + &context); + break; + + case T_Gather: + /* child nodes are allowed to reference rescan_param, if any */ + locally_added_param = ((Gather *) plan)->rescan_param; + if (locally_added_param >= 0) + { + valid_params = bms_add_member(bms_copy(valid_params), + locally_added_param); + + /* + * We currently don't support nested Gathers. The issue so + * far as this function is concerned would be how to identify + * which child nodes depend on which Gather. + */ + Assert(gather_param < 0); + /* Pass down rescan_param to child parallel-aware nodes */ + gather_param = locally_added_param; + } + /* rescan_param does *not* get added to scan_params */ + break; + + case T_GatherMerge: + /* child nodes are allowed to reference rescan_param, if any */ + locally_added_param = ((GatherMerge *) plan)->rescan_param; + if (locally_added_param >= 0) + { + valid_params = bms_add_member(bms_copy(valid_params), + locally_added_param); + + /* + * We currently don't support nested Gathers. The issue so + * far as this function is concerned would be how to identify + * which child nodes depend on which Gather. + */ + Assert(gather_param < 0); + /* Pass down rescan_param to child parallel-aware nodes */ + gather_param = locally_added_param; + } + /* rescan_param does *not* get added to scan_params */ + break; + + case T_Memoize: + finalize_primnode((Node *) ((Memoize *) plan)->param_exprs, + &context); + break; + + case T_ProjectSet: + case T_Material: + case T_Sort: + case T_IncrementalSort: + case T_Unique: + case T_SetOp: + case T_Group: + /* no node-type-specific fields need fixing */ + break; + + default: + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(plan)); + } + + /* Process left and right child plans, if any */ + child_params = finalize_plan(root, + plan->lefttree, + gather_param, + valid_params, + scan_params); + context.paramids = bms_add_members(context.paramids, child_params); + + if (nestloop_params) + { + /* right child can reference nestloop_params as well as valid_params */ + child_params = finalize_plan(root, + plan->righttree, + gather_param, + bms_union(nestloop_params, valid_params), + scan_params); + /* ... and they don't count as parameters used at my level */ + child_params = bms_difference(child_params, nestloop_params); + bms_free(nestloop_params); + } + else + { + /* easy case */ + child_params = finalize_plan(root, + plan->righttree, + gather_param, + valid_params, + scan_params); + } + context.paramids = bms_add_members(context.paramids, child_params); + + /* + * Any locally generated parameter doesn't count towards its generating + * plan node's external dependencies. (Note: if we changed valid_params + * and/or scan_params, we leak those bitmapsets; not worth the notational + * trouble to clean them up.) + */ + if (locally_added_param >= 0) + { + context.paramids = bms_del_member(context.paramids, + locally_added_param); + } + + /* Now we have all the paramids referenced in this node and children */ + + if (!bms_is_subset(context.paramids, valid_params)) + elog(ERROR, "plan should not reference subplan's variable"); + + /* + * The plan node's allParam and extParam fields should include all its + * referenced paramids, plus contributions from any child initPlans. + * However, any setParams of the initPlans should not be present in the + * parent node's extParams, only in its allParams. (It's possible that + * some initPlans have extParams that are setParams of other initPlans.) + */ + + /* allParam must include initplans' extParams and setParams */ + plan->allParam = bms_union(context.paramids, initExtParam); + plan->allParam = bms_add_members(plan->allParam, initSetParam); + /* extParam must include any initplan extParams */ + plan->extParam = bms_union(context.paramids, initExtParam); + /* but not any initplan setParams */ + plan->extParam = bms_del_members(plan->extParam, initSetParam); + + /* + * For speed at execution time, make sure extParam/allParam are actually + * NULL if they are empty sets. + */ + if (bms_is_empty(plan->extParam)) + plan->extParam = NULL; + if (bms_is_empty(plan->allParam)) + plan->allParam = NULL; + + return plan->allParam; +} + +/* + * finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given + * expression tree to the result set. + */ +static bool +finalize_primnode(Node *node, finalize_primnode_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Param)) + { + if (((Param *) node)->paramkind == PARAM_EXEC) + { + int paramid = ((Param *) node)->paramid; + + context->paramids = bms_add_member(context->paramids, paramid); + } + return false; /* no more to do here */ + } + if (IsA(node, SubPlan)) + { + SubPlan *subplan = (SubPlan *) node; + Plan *plan = planner_subplan_get_plan(context->root, subplan); + ListCell *lc; + Bitmapset *subparamids; + + /* Recurse into the testexpr, but not into the Plan */ + finalize_primnode(subplan->testexpr, context); + + /* + * Remove any param IDs of output parameters of the subplan that were + * referenced in the testexpr. These are not interesting for + * parameter change signaling since we always re-evaluate the subplan. + * Note that this wouldn't work too well if there might be uses of the + * same param IDs elsewhere in the plan, but that can't happen because + * generate_new_exec_param never tries to merge params. + */ + foreach(lc, subplan->paramIds) + { + context->paramids = bms_del_member(context->paramids, + lfirst_int(lc)); + } + + /* Also examine args list */ + finalize_primnode((Node *) subplan->args, context); + + /* + * Add params needed by the subplan to paramids, but excluding those + * we will pass down to it. (We assume SS_finalize_plan was run on + * the subplan already.) + */ + subparamids = bms_copy(plan->extParam); + foreach(lc, subplan->parParam) + { + subparamids = bms_del_member(subparamids, lfirst_int(lc)); + } + context->paramids = bms_join(context->paramids, subparamids); + + return false; /* no more to do here */ + } + return expression_tree_walker(node, finalize_primnode, + (void *) context); +} + +/* + * finalize_agg_primnode: find all Aggref nodes in the given expression tree, + * and add IDs of all PARAM_EXEC params appearing within their aggregated + * arguments to the result set. + */ +static bool +finalize_agg_primnode(Node *node, finalize_primnode_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + Aggref *agg = (Aggref *) node; + + /* we should not consider the direct arguments, if any */ + finalize_primnode((Node *) agg->args, context); + finalize_primnode((Node *) agg->aggfilter, context); + return false; /* there can't be any Aggrefs below here */ + } + return expression_tree_walker(node, finalize_agg_primnode, + (void *) context); +} + +/* + * SS_make_initplan_output_param - make a Param for an initPlan's output + * + * The plan is expected to return a scalar value of the given type/collation. + * + * Note that in some cases the initplan may not ever appear in the finished + * plan tree. If that happens, we'll have wasted a PARAM_EXEC slot, which + * is no big deal. + */ +Param * +SS_make_initplan_output_param(PlannerInfo *root, + Oid resulttype, int32 resulttypmod, + Oid resultcollation) +{ + return generate_new_exec_param(root, resulttype, + resulttypmod, resultcollation); +} + +/* + * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan + * + * We build an EXPR_SUBLINK SubPlan node and put it into the initplan + * list for the outer query level. A Param that represents the initplan's + * output has already been assigned using SS_make_initplan_output_param. + */ +void +SS_make_initplan_from_plan(PlannerInfo *root, + PlannerInfo *subroot, Plan *plan, + Param *prm) +{ + SubPlan *node; + + /* + * Add the subplan and its PlannerInfo to the global lists. + */ + root->glob->subplans = lappend(root->glob->subplans, plan); + root->glob->subroots = lappend(root->glob->subroots, subroot); + + /* + * Create a SubPlan node and add it to the outer list of InitPlans. Note + * it has to appear after any other InitPlans it might depend on (see + * comments in ExecReScan). + */ + node = makeNode(SubPlan); + node->subLinkType = EXPR_SUBLINK; + node->plan_id = list_length(root->glob->subplans); + node->plan_name = psprintf("InitPlan %d (returns $%d)", + node->plan_id, prm->paramid); + get_first_col_type(plan, &node->firstColType, &node->firstColTypmod, + &node->firstColCollation); + node->setParam = list_make1_int(prm->paramid); + + root->init_plans = lappend(root->init_plans, node); + + /* + * The node can't have any inputs (since it's an initplan), so the + * parParam and args lists remain empty. + */ + + /* Set costs of SubPlan using info from the plan tree */ + cost_subplan(subroot, node, plan); +} |