diff options
Diffstat (limited to 'src/backend/optimizer/path/allpaths.c')
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 4668 |
1 files changed, 4668 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c new file mode 100644 index 0000000..4e02439 --- /dev/null +++ b/src/backend/optimizer/path/allpaths.c @@ -0,0 +1,4668 @@ +/*------------------------------------------------------------------------- + * + * allpaths.c + * Routines to find possible search paths for processing a query + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/path/allpaths.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <limits.h> +#include <math.h> + +#include "access/sysattr.h" +#include "access/tsmapi.h" +#include "catalog/pg_class.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_proc.h" +#include "foreign/fdwapi.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#ifdef OPTIMIZER_DEBUG +#include "nodes/print.h" +#endif +#include "optimizer/appendinfo.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/geqo.h" +#include "optimizer/inherit.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/plancat.h" +#include "optimizer/planner.h" +#include "optimizer/restrictinfo.h" +#include "optimizer/tlist.h" +#include "parser/parse_clause.h" +#include "parser/parsetree.h" +#include "partitioning/partbounds.h" +#include "partitioning/partprune.h" +#include "rewrite/rewriteManip.h" +#include "utils/lsyscache.h" + + +/* Bitmask flags for pushdown_safety_info.unsafeFlags */ +#define UNSAFE_HAS_VOLATILE_FUNC (1 << 0) +#define UNSAFE_HAS_SET_FUNC (1 << 1) +#define UNSAFE_NOTIN_DISTINCTON_CLAUSE (1 << 2) +#define UNSAFE_NOTIN_PARTITIONBY_CLAUSE (1 << 3) +#define UNSAFE_TYPE_MISMATCH (1 << 4) + +/* results of subquery_is_pushdown_safe */ +typedef struct pushdown_safety_info +{ + unsigned char *unsafeFlags; /* bitmask of reasons why this target list + * column is unsafe for qual pushdown, or 0 if + * no reason. */ + bool unsafeVolatile; /* don't push down volatile quals */ + bool unsafeLeaky; /* don't push down leaky quals */ +} pushdown_safety_info; + +/* Return type for qual_is_pushdown_safe */ +typedef enum pushdown_safe_type +{ + PUSHDOWN_UNSAFE, /* unsafe to push qual into subquery */ + PUSHDOWN_SAFE, /* safe to push qual into subquery */ + PUSHDOWN_WINDOWCLAUSE_RUNCOND /* unsafe, but may work as WindowClause + * run condition */ +} pushdown_safe_type; + +/* These parameters are set by GUC */ +bool enable_geqo = false; /* just in case GUC doesn't set it */ +int geqo_threshold; +int min_parallel_table_scan_size; +int min_parallel_index_scan_size; + +/* Hook for plugins to get control in set_rel_pathlist() */ +set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL; + +/* Hook for plugins to replace standard_join_search() */ +join_search_hook_type join_search_hook = NULL; + + +static void set_base_rel_consider_startup(PlannerInfo *root); +static void set_base_rel_sizes(PlannerInfo *root); +static void set_base_rel_pathlists(PlannerInfo *root); +static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel); +static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels, + List *all_child_pathkeys); +static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, + RelOptInfo *rel, + Relids required_outer); +static void accumulate_append_subpath(Path *path, + List **subpaths, + List **special_subpaths); +static Path *get_singleton_append_subpath(Path *path); +static void set_dummy_rel_pathlist(RelOptInfo *rel); +static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_result_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); +static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, + pushdown_safety_info *safetyInfo); +static bool recurse_pushdown_safe(Node *setOp, Query *topquery, + pushdown_safety_info *safetyInfo); +static void check_output_expressions(Query *subquery, + pushdown_safety_info *safetyInfo); +static void compare_tlist_datatypes(List *tlist, List *colTypes, + pushdown_safety_info *safetyInfo); +static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query); +static pushdown_safe_type qual_is_pushdown_safe(Query *subquery, Index rti, + RestrictInfo *rinfo, + pushdown_safety_info *safetyInfo); +static void subquery_push_qual(Query *subquery, + RangeTblEntry *rte, Index rti, Node *qual); +static void recurse_push_qual(Node *setOp, Query *topquery, + RangeTblEntry *rte, Index rti, Node *qual); +static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel, + Bitmapset *extra_used_attrs); + + +/* + * make_one_rel + * Finds all possible access paths for executing a query, returning a + * single rel that represents the join of all base rels in the query. + */ +RelOptInfo * +make_one_rel(PlannerInfo *root, List *joinlist) +{ + RelOptInfo *rel; + Index rti; + double total_pages; + + /* + * Construct the all_baserels Relids set. + */ + root->all_baserels = NULL; + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (brel == NULL) + continue; + + Assert(brel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (brel->reloptkind != RELOPT_BASEREL) + continue; + + root->all_baserels = bms_add_member(root->all_baserels, brel->relid); + } + + /* Mark base rels as to whether we care about fast-start plans */ + set_base_rel_consider_startup(root); + + /* + * Compute size estimates and consider_parallel flags for each base rel. + */ + set_base_rel_sizes(root); + + /* + * We should now have size estimates for every actual table involved in + * the query, and we also know which if any have been deleted from the + * query by join removal, pruned by partition pruning, or eliminated by + * constraint exclusion. So we can now compute total_table_pages. + * + * Note that appendrels are not double-counted here, even though we don't + * bother to distinguish RelOptInfos for appendrel parents, because the + * parents will have pages = 0. + * + * XXX if a table is self-joined, we will count it once per appearance, + * which perhaps is the wrong thing ... but that's not completely clear, + * and detecting self-joins here is difficult, so ignore it for now. + */ + total_pages = 0; + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + if (brel == NULL) + continue; + + Assert(brel->relid == rti); /* sanity check on array */ + + if (IS_DUMMY_REL(brel)) + continue; + + if (IS_SIMPLE_REL(brel)) + total_pages += (double) brel->pages; + } + root->total_table_pages = total_pages; + + /* + * Generate access paths for each base rel. + */ + set_base_rel_pathlists(root); + + /* + * Generate access paths for the entire join tree. + */ + rel = make_rel_from_joinlist(root, joinlist); + + /* + * The result should join all and only the query's base rels. + */ + Assert(bms_equal(rel->relids, root->all_baserels)); + + return rel; +} + +/* + * set_base_rel_consider_startup + * Set the consider_[param_]startup flags for each base-relation entry. + * + * For the moment, we only deal with consider_param_startup here; because the + * logic for consider_startup is pretty trivial and is the same for every base + * relation, we just let build_simple_rel() initialize that flag correctly to + * start with. If that logic ever gets more complicated it would probably + * be better to move it here. + */ +static void +set_base_rel_consider_startup(PlannerInfo *root) +{ + /* + * Since parameterized paths can only be used on the inside of a nestloop + * join plan, there is usually little value in considering fast-start + * plans for them. However, for relations that are on the RHS of a SEMI + * or ANTI join, a fast-start plan can be useful because we're only going + * to care about fetching one tuple anyway. + * + * To minimize growth of planning time, we currently restrict this to + * cases where the RHS is a single base relation, not a join; there is no + * provision for consider_param_startup to get set at all on joinrels. + * Also we don't worry about appendrels. costsize.c's costing rules for + * nestloop semi/antijoins don't consider such cases either. + */ + ListCell *lc; + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + int varno; + + if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) && + bms_get_singleton_member(sjinfo->syn_righthand, &varno)) + { + RelOptInfo *rel = find_base_rel(root, varno); + + rel->consider_param_startup = true; + } + } +} + +/* + * set_base_rel_sizes + * Set the size estimates (rows and widths) for each base-relation entry. + * Also determine whether to consider parallel paths for base relations. + * + * We do this in a separate pass over the base rels so that rowcount + * estimates are available for parameterized path generation, and also so + * that each rel's consider_parallel flag is set correctly before we begin to + * generate paths. + */ +static void +set_base_rel_sizes(PlannerInfo *root) +{ + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + RangeTblEntry *rte; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + rte = root->simple_rte_array[rti]; + + /* + * If parallelism is allowable for this query in general, see whether + * it's allowable for this rel in particular. We have to do this + * before set_rel_size(), because (a) if this rel is an inheritance + * parent, set_append_rel_size() will use and perhaps change the rel's + * consider_parallel flag, and (b) for some RTE types, set_rel_size() + * goes ahead and makes paths immediately. + */ + if (root->glob->parallelModeOK) + set_rel_consider_parallel(root, rel, rte); + + set_rel_size(root, rel, rti, rte); + } +} + +/* + * set_base_rel_pathlists + * Finds all paths available for scanning each base-relation entry. + * Sequential scan and any available indices are considered. + * Each useful path is attached to its relation's 'pathlist' field. + */ +static void +set_base_rel_pathlists(PlannerInfo *root) +{ + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]); + } +} + +/* + * set_rel_size + * Set size estimates for a base relation + */ +static void +set_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + if (rel->reloptkind == RELOPT_BASEREL && + relation_excluded_by_constraints(root, rel, rte)) + { + /* + * We proved we don't need to scan the rel via constraint exclusion, + * so set up a single dummy path for it. Here we only check this for + * regular baserels; if it's an otherrel, CE was already checked in + * set_append_rel_size(). + * + * In this case, we go ahead and set up the relation's path right away + * instead of leaving it for set_rel_pathlist to do. This is because + * we don't have a convention for marking a rel as dummy except by + * assigning a dummy path to it. + */ + set_dummy_rel_pathlist(rel); + } + else if (rte->inh) + { + /* It's an "append relation", process accordingly */ + set_append_rel_size(root, rel, rti, rte); + } + else + { + switch (rel->rtekind) + { + case RTE_RELATION: + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + /* Foreign table */ + set_foreign_size(root, rel, rte); + } + else if (rte->relkind == RELKIND_PARTITIONED_TABLE) + { + /* + * We could get here if asked to scan a partitioned table + * with ONLY. In that case we shouldn't scan any of the + * partitions, so mark it as a dummy rel. + */ + set_dummy_rel_pathlist(rel); + } + else if (rte->tablesample != NULL) + { + /* Sampled relation */ + set_tablesample_rel_size(root, rel, rte); + } + else + { + /* Plain relation */ + set_plain_rel_size(root, rel, rte); + } + break; + case RTE_SUBQUERY: + + /* + * Subqueries don't support making a choice between + * parameterized and unparameterized paths, so just go ahead + * and build their paths immediately. + */ + set_subquery_pathlist(root, rel, rti, rte); + break; + case RTE_FUNCTION: + set_function_size_estimates(root, rel); + break; + case RTE_TABLEFUNC: + set_tablefunc_size_estimates(root, rel); + break; + case RTE_VALUES: + set_values_size_estimates(root, rel); + break; + case RTE_CTE: + + /* + * CTEs don't support making a choice between parameterized + * and unparameterized paths, so just go ahead and build their + * paths immediately. + */ + if (rte->self_reference) + set_worktable_pathlist(root, rel, rte); + else + set_cte_pathlist(root, rel, rte); + break; + case RTE_NAMEDTUPLESTORE: + /* Might as well just build the path immediately */ + set_namedtuplestore_pathlist(root, rel, rte); + break; + case RTE_RESULT: + /* Might as well just build the path immediately */ + set_result_pathlist(root, rel, rte); + break; + default: + elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); + break; + } + } + + /* + * We insist that all non-dummy rels have a nonzero rowcount estimate. + */ + Assert(rel->rows > 0 || IS_DUMMY_REL(rel)); +} + +/* + * set_rel_pathlist + * Build access paths for a base relation + */ +static void +set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + if (IS_DUMMY_REL(rel)) + { + /* We already proved the relation empty, so nothing more to do */ + } + else if (rte->inh) + { + /* It's an "append relation", process accordingly */ + set_append_rel_pathlist(root, rel, rti, rte); + } + else + { + switch (rel->rtekind) + { + case RTE_RELATION: + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + /* Foreign table */ + set_foreign_pathlist(root, rel, rte); + } + else if (rte->tablesample != NULL) + { + /* Sampled relation */ + set_tablesample_rel_pathlist(root, rel, rte); + } + else + { + /* Plain relation */ + set_plain_rel_pathlist(root, rel, rte); + } + break; + case RTE_SUBQUERY: + /* Subquery --- fully handled during set_rel_size */ + break; + case RTE_FUNCTION: + /* RangeFunction */ + set_function_pathlist(root, rel, rte); + break; + case RTE_TABLEFUNC: + /* Table Function */ + set_tablefunc_pathlist(root, rel, rte); + break; + case RTE_VALUES: + /* Values list */ + set_values_pathlist(root, rel, rte); + break; + case RTE_CTE: + /* CTE reference --- fully handled during set_rel_size */ + break; + case RTE_NAMEDTUPLESTORE: + /* tuplestore reference --- fully handled during set_rel_size */ + break; + case RTE_RESULT: + /* simple Result --- fully handled during set_rel_size */ + break; + default: + elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); + break; + } + } + + /* + * Allow a plugin to editorialize on the set of Paths for this base + * relation. It could add new paths (such as CustomPaths) by calling + * add_path(), or add_partial_path() if parallel aware. It could also + * delete or modify paths added by the core code. + */ + if (set_rel_pathlist_hook) + (*set_rel_pathlist_hook) (root, rel, rti, rte); + + /* + * If this is a baserel, we should normally consider gathering any partial + * paths we may have created for it. We have to do this after calling the + * set_rel_pathlist_hook, else it cannot add partial paths to be included + * here. + * + * However, if this is an inheritance child, skip it. Otherwise, we could + * end up with a very large number of gather nodes, each trying to grab + * its own pool of workers. Instead, we'll consider gathering partial + * paths for the parent appendrel. + * + * Also, if this is the topmost scan/join rel, we postpone gathering until + * the final scan/join targetlist is available (see grouping_planner). + */ + if (rel->reloptkind == RELOPT_BASEREL && + !bms_equal(rel->relids, root->all_baserels)) + generate_useful_gather_paths(root, rel, false); + + /* Now find the cheapest of the paths for this rel */ + set_cheapest(rel); + +#ifdef OPTIMIZER_DEBUG + debug_print_rel(root, rel); +#endif +} + +/* + * set_plain_rel_size + * Set size estimates for a plain relation (no subquery, no inheritance) + */ +static void +set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + /* + * Test any partial indexes of rel for applicability. We must do this + * first since partial unique indexes can affect size estimates. + */ + check_index_predicates(root, rel); + + /* Mark rel with estimated output rows, width, etc */ + set_baserel_size_estimates(root, rel); +} + +/* + * If this relation could possibly be scanned from within a worker, then set + * its consider_parallel flag. + */ +static void +set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte) +{ + /* + * The flag has previously been initialized to false, so we can just + * return if it becomes clear that we can't safely set it. + */ + Assert(!rel->consider_parallel); + + /* Don't call this if parallelism is disallowed for the entire query. */ + Assert(root->glob->parallelModeOK); + + /* This should only be called for baserels and appendrel children. */ + Assert(IS_SIMPLE_REL(rel)); + + /* Assorted checks based on rtekind. */ + switch (rte->rtekind) + { + case RTE_RELATION: + + /* + * Currently, parallel workers can't access the leader's temporary + * tables. We could possibly relax this if we wrote all of its + * local buffers at the start of the query and made no changes + * thereafter (maybe we could allow hint bit changes), and if we + * taught the workers to read them. Writing a large number of + * temporary buffers could be expensive, though, and we don't have + * the rest of the necessary infrastructure right now anyway. So + * for now, bail out if we see a temporary table. + */ + if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP) + return; + + /* + * Table sampling can be pushed down to workers if the sample + * function and its arguments are safe. + */ + if (rte->tablesample != NULL) + { + char proparallel = func_parallel(rte->tablesample->tsmhandler); + + if (proparallel != PROPARALLEL_SAFE) + return; + if (!is_parallel_safe(root, (Node *) rte->tablesample->args)) + return; + } + + /* + * Ask FDWs whether they can support performing a ForeignScan + * within a worker. Most often, the answer will be no. For + * example, if the nature of the FDW is such that it opens a TCP + * connection with a remote server, each parallel worker would end + * up with a separate connection, and these connections might not + * be appropriately coordinated between workers and the leader. + */ + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + Assert(rel->fdwroutine); + if (!rel->fdwroutine->IsForeignScanParallelSafe) + return; + if (!rel->fdwroutine->IsForeignScanParallelSafe(root, rel, rte)) + return; + } + + /* + * There are additional considerations for appendrels, which we'll + * deal with in set_append_rel_size and set_append_rel_pathlist. + * For now, just set consider_parallel based on the rel's own + * quals and targetlist. + */ + break; + + case RTE_SUBQUERY: + + /* + * There's no intrinsic problem with scanning a subquery-in-FROM + * (as distinct from a SubPlan or InitPlan) in a parallel worker. + * If the subquery doesn't happen to have any parallel-safe paths, + * then flagging it as consider_parallel won't change anything, + * but that's true for plain tables, too. We must set + * consider_parallel based on the rel's own quals and targetlist, + * so that if a subquery path is parallel-safe but the quals and + * projection we're sticking onto it are not, we correctly mark + * the SubqueryScanPath as not parallel-safe. (Note that + * set_subquery_pathlist() might push some of these quals down + * into the subquery itself, but that doesn't change anything.) + * + * We can't push sub-select containing LIMIT/OFFSET to workers as + * there is no guarantee that the row order will be fully + * deterministic, and applying LIMIT/OFFSET will lead to + * inconsistent results at the top-level. (In some cases, where + * the result is ordered, we could relax this restriction. But it + * doesn't currently seem worth expending extra effort to do so.) + */ + { + Query *subquery = castNode(Query, rte->subquery); + + if (limit_needed(subquery)) + return; + } + break; + + case RTE_JOIN: + /* Shouldn't happen; we're only considering baserels here. */ + Assert(false); + return; + + case RTE_FUNCTION: + /* Check for parallel-restricted functions. */ + if (!is_parallel_safe(root, (Node *) rte->functions)) + return; + break; + + case RTE_TABLEFUNC: + /* not parallel safe */ + return; + + case RTE_VALUES: + /* Check for parallel-restricted functions. */ + if (!is_parallel_safe(root, (Node *) rte->values_lists)) + return; + break; + + case RTE_CTE: + + /* + * CTE tuplestores aren't shared among parallel workers, so we + * force all CTE scans to happen in the leader. Also, populating + * the CTE would require executing a subplan that's not available + * in the worker, might be parallel-restricted, and must get + * executed only once. + */ + return; + + case RTE_NAMEDTUPLESTORE: + + /* + * tuplestore cannot be shared, at least without more + * infrastructure to support that. + */ + return; + + case RTE_RESULT: + /* RESULT RTEs, in themselves, are no problem. */ + break; + } + + /* + * If there's anything in baserestrictinfo that's parallel-restricted, we + * give up on parallelizing access to this relation. We could consider + * instead postponing application of the restricted quals until we're + * above all the parallelism in the plan tree, but it's not clear that + * that would be a win in very many cases, and it might be tricky to make + * outer join clauses work correctly. It would likely break equivalence + * classes, too. + */ + if (!is_parallel_safe(root, (Node *) rel->baserestrictinfo)) + return; + + /* + * Likewise, if the relation's outputs are not parallel-safe, give up. + * (Usually, they're just Vars, but sometimes they're not.) + */ + if (!is_parallel_safe(root, (Node *) rel->reltarget->exprs)) + return; + + /* We have a winner. */ + rel->consider_parallel = true; +} + +/* + * set_plain_rel_pathlist + * Build access paths for a plain relation (no subquery, no inheritance) + */ +static void +set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + + /* + * We don't support pushing join clauses into the quals of a seqscan, but + * it could still have required parameterization due to LATERAL refs in + * its tlist. + */ + required_outer = rel->lateral_relids; + + /* Consider sequential scan */ + add_path(rel, create_seqscan_path(root, rel, required_outer, 0)); + + /* If appropriate, consider parallel sequential scan */ + if (rel->consider_parallel && required_outer == NULL) + create_plain_partial_paths(root, rel); + + /* Consider index scans */ + create_index_paths(root, rel); + + /* Consider TID scans */ + create_tidscan_paths(root, rel); +} + +/* + * create_plain_partial_paths + * Build partial access paths for parallel scan of a plain relation + */ +static void +create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel) +{ + int parallel_workers; + + parallel_workers = compute_parallel_worker(rel, rel->pages, -1, + max_parallel_workers_per_gather); + + /* If any limit was set to zero, the user doesn't want a parallel scan. */ + if (parallel_workers <= 0) + return; + + /* Add an unordered partial path based on a parallel sequential scan. */ + add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers)); +} + +/* + * set_tablesample_rel_size + * Set size estimates for a sampled relation + */ +static void +set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + TableSampleClause *tsc = rte->tablesample; + TsmRoutine *tsm; + BlockNumber pages; + double tuples; + + /* + * Test any partial indexes of rel for applicability. We must do this + * first since partial unique indexes can affect size estimates. + */ + check_index_predicates(root, rel); + + /* + * Call the sampling method's estimation function to estimate the number + * of pages it will read and the number of tuples it will return. (Note: + * we assume the function returns sane values.) + */ + tsm = GetTsmRoutine(tsc->tsmhandler); + tsm->SampleScanGetSampleSize(root, rel, tsc->args, + &pages, &tuples); + + /* + * For the moment, because we will only consider a SampleScan path for the + * rel, it's okay to just overwrite the pages and tuples estimates for the + * whole relation. If we ever consider multiple path types for sampled + * rels, we'll need more complication. + */ + rel->pages = pages; + rel->tuples = tuples; + + /* Mark rel with estimated output rows, width, etc */ + set_baserel_size_estimates(root, rel); +} + +/* + * set_tablesample_rel_pathlist + * Build access paths for a sampled relation + */ +static void +set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + Path *path; + + /* + * We don't support pushing join clauses into the quals of a samplescan, + * but it could still have required parameterization due to LATERAL refs + * in its tlist or TABLESAMPLE arguments. + */ + required_outer = rel->lateral_relids; + + /* Consider sampled scan */ + path = create_samplescan_path(root, rel, required_outer); + + /* + * If the sampling method does not support repeatable scans, we must avoid + * plans that would scan the rel multiple times. Ideally, we'd simply + * avoid putting the rel on the inside of a nestloop join; but adding such + * a consideration to the planner seems like a great deal of complication + * to support an uncommon usage of second-rate sampling methods. Instead, + * if there is a risk that the query might perform an unsafe join, just + * wrap the SampleScan in a Materialize node. We can check for joins by + * counting the membership of all_baserels (note that this correctly + * counts inheritance trees as single rels). If we're inside a subquery, + * we can't easily check whether a join might occur in the outer query, so + * just assume one is possible. + * + * GetTsmRoutine is relatively expensive compared to the other tests here, + * so check repeatable_across_scans last, even though that's a bit odd. + */ + if ((root->query_level > 1 || + bms_membership(root->all_baserels) != BMS_SINGLETON) && + !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans)) + { + path = (Path *) create_material_path(rel, path); + } + + add_path(rel, path); + + /* For the moment, at least, there are no other paths to consider */ +} + +/* + * set_foreign_size + * Set size estimates for a foreign table RTE + */ +static void +set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + /* Mark rel with estimated output rows, width, etc */ + set_foreign_size_estimates(root, rel); + + /* Let FDW adjust the size estimates, if it can */ + rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid); + + /* ... but do not let it set the rows estimate to zero */ + rel->rows = clamp_row_est(rel->rows); + + /* + * Also, make sure rel->tuples is not insane relative to rel->rows. + * Notably, this ensures sanity if pg_class.reltuples contains -1 and the + * FDW doesn't do anything to replace that. + */ + rel->tuples = Max(rel->tuples, rel->rows); +} + +/* + * set_foreign_pathlist + * Build access paths for a foreign table RTE + */ +static void +set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + /* Call the FDW's GetForeignPaths function to generate path(s) */ + rel->fdwroutine->GetForeignPaths(root, rel, rte->relid); +} + +/* + * set_append_rel_size + * Set size estimates for a simple "append relation" + * + * The passed-in rel and RTE represent the entire append relation. The + * relation's contents are computed by appending together the output of the + * individual member relations. Note that in the non-partitioned inheritance + * case, the first member relation is actually the same table as is mentioned + * in the parent RTE ... but it has a different RTE and RelOptInfo. This is + * a good thing because their outputs are not the same size. + */ +static void +set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + int parentRTindex = rti; + bool has_live_children; + double parent_rows; + double parent_size; + double *parent_attrsizes; + int nattrs; + ListCell *l; + + /* Guard against stack overflow due to overly deep inheritance tree. */ + check_stack_depth(); + + Assert(IS_SIMPLE_REL(rel)); + + /* + * If this is a partitioned baserel, set the consider_partitionwise_join + * flag; currently, we only consider partitionwise joins with the baserel + * if its targetlist doesn't contain a whole-row Var. + */ + if (enable_partitionwise_join && + rel->reloptkind == RELOPT_BASEREL && + rte->relkind == RELKIND_PARTITIONED_TABLE && + rel->attr_needed[InvalidAttrNumber - rel->min_attr] == NULL) + rel->consider_partitionwise_join = true; + + /* + * Initialize to compute size estimates for whole append relation. + * + * We handle width estimates by weighting the widths of different child + * rels proportionally to their number of rows. This is sensible because + * the use of width estimates is mainly to compute the total relation + * "footprint" if we have to sort or hash it. To do this, we sum the + * total equivalent size (in "double" arithmetic) and then divide by the + * total rowcount estimate. This is done separately for the total rel + * width and each attribute. + * + * Note: if you consider changing this logic, beware that child rels could + * have zero rows and/or width, if they were excluded by constraints. + */ + has_live_children = false; + parent_rows = 0; + parent_size = 0; + nattrs = rel->max_attr - rel->min_attr + 1; + parent_attrsizes = (double *) palloc0(nattrs * sizeof(double)); + + foreach(l, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + int childRTindex; + RangeTblEntry *childRTE; + RelOptInfo *childrel; + ListCell *parentvars; + ListCell *childvars; + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != parentRTindex) + continue; + + childRTindex = appinfo->child_relid; + childRTE = root->simple_rte_array[childRTindex]; + + /* + * The child rel's RelOptInfo was already created during + * add_other_rels_to_query. + */ + childrel = find_base_rel(root, childRTindex); + Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); + + /* We may have already proven the child to be dummy. */ + if (IS_DUMMY_REL(childrel)) + continue; + + /* + * We have to copy the parent's targetlist and quals to the child, + * with appropriate substitution of variables. However, the + * baserestrictinfo quals were already copied/substituted when the + * child RelOptInfo was built. So we don't need any additional setup + * before applying constraint exclusion. + */ + if (relation_excluded_by_constraints(root, childrel, childRTE)) + { + /* + * This child need not be scanned, so we can omit it from the + * appendrel. + */ + set_dummy_rel_pathlist(childrel); + continue; + } + + /* + * Constraint exclusion failed, so copy the parent's join quals and + * targetlist to the child, with appropriate variable substitutions. + * + * NB: the resulting childrel->reltarget->exprs may contain arbitrary + * expressions, which otherwise would not occur in a rel's targetlist. + * Code that might be looking at an appendrel child must cope with + * such. (Normally, a rel's targetlist would only include Vars and + * PlaceHolderVars.) XXX we do not bother to update the cost or width + * fields of childrel->reltarget; not clear if that would be useful. + */ + childrel->joininfo = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->joininfo, + 1, &appinfo); + childrel->reltarget->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->reltarget->exprs, + 1, &appinfo); + + /* + * We have to make child entries in the EquivalenceClass data + * structures as well. This is needed either if the parent + * participates in some eclass joins (because we will want to consider + * inner-indexscan joins on the individual children) or if the parent + * has useful pathkeys (because we should try to build MergeAppend + * paths that produce those sort orderings). + */ + if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) + add_child_rel_equivalences(root, appinfo, rel, childrel); + childrel->has_eclass_joins = rel->has_eclass_joins; + + /* + * Note: we could compute appropriate attr_needed data for the child's + * variables, by transforming the parent's attr_needed through the + * translated_vars mapping. However, currently there's no need + * because attr_needed is only examined for base relations not + * otherrels. So we just leave the child's attr_needed empty. + */ + + /* + * If we consider partitionwise joins with the parent rel, do the same + * for partitioned child rels. + * + * Note: here we abuse the consider_partitionwise_join flag by setting + * it for child rels that are not themselves partitioned. We do so to + * tell try_partitionwise_join() that the child rel is sufficiently + * valid to be used as a per-partition input, even if it later gets + * proven to be dummy. (It's not usable until we've set up the + * reltarget and EC entries, which we just did.) + */ + if (rel->consider_partitionwise_join) + childrel->consider_partitionwise_join = true; + + /* + * If parallelism is allowable for this query in general, see whether + * it's allowable for this childrel in particular. But if we've + * already decided the appendrel is not parallel-safe as a whole, + * there's no point in considering parallelism for this child. For + * consistency, do this before calling set_rel_size() for the child. + */ + if (root->glob->parallelModeOK && rel->consider_parallel) + set_rel_consider_parallel(root, childrel, childRTE); + + /* + * Compute the child's size. + */ + set_rel_size(root, childrel, childRTindex, childRTE); + + /* + * It is possible that constraint exclusion detected a contradiction + * within a child subquery, even though we didn't prove one above. If + * so, we can skip this child. + */ + if (IS_DUMMY_REL(childrel)) + continue; + + /* We have at least one live child. */ + has_live_children = true; + + /* + * If any live child is not parallel-safe, treat the whole appendrel + * as not parallel-safe. In future we might be able to generate plans + * in which some children are farmed out to workers while others are + * not; but we don't have that today, so it's a waste to consider + * partial paths anywhere in the appendrel unless it's all safe. + * (Child rels visited before this one will be unmarked in + * set_append_rel_pathlist().) + */ + if (!childrel->consider_parallel) + rel->consider_parallel = false; + + /* + * Accumulate size information from each live child. + */ + Assert(childrel->rows > 0); + + parent_rows += childrel->rows; + parent_size += childrel->reltarget->width * childrel->rows; + + /* + * Accumulate per-column estimates too. We need not do anything for + * PlaceHolderVars in the parent list. If child expression isn't a + * Var, or we didn't record a width estimate for it, we have to fall + * back on a datatype-based estimate. + * + * By construction, child's targetlist is 1-to-1 with parent's. + */ + forboth(parentvars, rel->reltarget->exprs, + childvars, childrel->reltarget->exprs) + { + Var *parentvar = (Var *) lfirst(parentvars); + Node *childvar = (Node *) lfirst(childvars); + + if (IsA(parentvar, Var) && parentvar->varno == parentRTindex) + { + int pndx = parentvar->varattno - rel->min_attr; + int32 child_width = 0; + + if (IsA(childvar, Var) && + ((Var *) childvar)->varno == childrel->relid) + { + int cndx = ((Var *) childvar)->varattno - childrel->min_attr; + + child_width = childrel->attr_widths[cndx]; + } + if (child_width <= 0) + child_width = get_typavgwidth(exprType(childvar), + exprTypmod(childvar)); + Assert(child_width > 0); + parent_attrsizes[pndx] += child_width * childrel->rows; + } + } + } + + if (has_live_children) + { + /* + * Save the finished size estimates. + */ + int i; + + Assert(parent_rows > 0); + rel->rows = parent_rows; + rel->reltarget->width = rint(parent_size / parent_rows); + for (i = 0; i < nattrs; i++) + rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows); + + /* + * Set "raw tuples" count equal to "rows" for the appendrel; needed + * because some places assume rel->tuples is valid for any baserel. + */ + rel->tuples = parent_rows; + + /* + * Note that we leave rel->pages as zero; this is important to avoid + * double-counting the appendrel tree in total_table_pages. + */ + } + else + { + /* + * All children were excluded by constraints, so mark the whole + * appendrel dummy. We must do this in this phase so that the rel's + * dummy-ness is visible when we generate paths for other rels. + */ + set_dummy_rel_pathlist(rel); + } + + pfree(parent_attrsizes); +} + +/* + * set_append_rel_pathlist + * Build access paths for an "append relation" + */ +static void +set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + int parentRTindex = rti; + List *live_childrels = NIL; + ListCell *l; + + /* + * Generate access paths for each member relation, and remember the + * non-dummy children. + */ + foreach(l, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + int childRTindex; + RangeTblEntry *childRTE; + RelOptInfo *childrel; + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != parentRTindex) + continue; + + /* Re-locate the child RTE and RelOptInfo */ + childRTindex = appinfo->child_relid; + childRTE = root->simple_rte_array[childRTindex]; + childrel = root->simple_rel_array[childRTindex]; + + /* + * If set_append_rel_size() decided the parent appendrel was + * parallel-unsafe at some point after visiting this child rel, we + * need to propagate the unsafety marking down to the child, so that + * we don't generate useless partial paths for it. + */ + if (!rel->consider_parallel) + childrel->consider_parallel = false; + + /* + * Compute the child's access paths. + */ + set_rel_pathlist(root, childrel, childRTindex, childRTE); + + /* + * If child is dummy, ignore it. + */ + if (IS_DUMMY_REL(childrel)) + continue; + + /* + * Child is live, so add it to the live_childrels list for use below. + */ + live_childrels = lappend(live_childrels, childrel); + } + + /* Add paths to the append relation. */ + add_paths_to_append_rel(root, rel, live_childrels); +} + + +/* + * add_paths_to_append_rel + * Generate paths for the given append relation given the set of non-dummy + * child rels. + * + * The function collects all parameterizations and orderings supported by the + * non-dummy children. For every such parameterization or ordering, it creates + * an append path collecting one path from each non-dummy child with given + * parameterization or ordering. Similarly it collects partial paths from + * non-dummy children to create partial append paths. + */ +void +add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels) +{ + List *subpaths = NIL; + bool subpaths_valid = true; + List *partial_subpaths = NIL; + List *pa_partial_subpaths = NIL; + List *pa_nonpartial_subpaths = NIL; + bool partial_subpaths_valid = true; + bool pa_subpaths_valid; + List *all_child_pathkeys = NIL; + List *all_child_outers = NIL; + ListCell *l; + double partial_rows = -1; + + /* If appropriate, consider parallel append */ + pa_subpaths_valid = enable_parallel_append && rel->consider_parallel; + + /* + * For every non-dummy child, remember the cheapest path. Also, identify + * all pathkeys (orderings) and parameterizations (required_outer sets) + * available for the non-dummy member relations. + */ + foreach(l, live_childrels) + { + RelOptInfo *childrel = lfirst(l); + ListCell *lcp; + Path *cheapest_partial_path = NULL; + + /* + * If child has an unparameterized cheapest-total path, add that to + * the unparameterized Append path we are constructing for the parent. + * If not, there's no workable unparameterized path. + * + * With partitionwise aggregates, the child rel's pathlist may be + * empty, so don't assume that a path exists here. + */ + if (childrel->pathlist != NIL && + childrel->cheapest_total_path->param_info == NULL) + accumulate_append_subpath(childrel->cheapest_total_path, + &subpaths, NULL); + else + subpaths_valid = false; + + /* Same idea, but for a partial plan. */ + if (childrel->partial_pathlist != NIL) + { + cheapest_partial_path = linitial(childrel->partial_pathlist); + accumulate_append_subpath(cheapest_partial_path, + &partial_subpaths, NULL); + } + else + partial_subpaths_valid = false; + + /* + * Same idea, but for a parallel append mixing partial and non-partial + * paths. + */ + if (pa_subpaths_valid) + { + Path *nppath = NULL; + + nppath = + get_cheapest_parallel_safe_total_inner(childrel->pathlist); + + if (cheapest_partial_path == NULL && nppath == NULL) + { + /* Neither a partial nor a parallel-safe path? Forget it. */ + pa_subpaths_valid = false; + } + else if (nppath == NULL || + (cheapest_partial_path != NULL && + cheapest_partial_path->total_cost < nppath->total_cost)) + { + /* Partial path is cheaper or the only option. */ + Assert(cheapest_partial_path != NULL); + accumulate_append_subpath(cheapest_partial_path, + &pa_partial_subpaths, + &pa_nonpartial_subpaths); + } + else + { + /* + * Either we've got only a non-partial path, or we think that + * a single backend can execute the best non-partial path + * faster than all the parallel backends working together can + * execute the best partial path. + * + * It might make sense to be more aggressive here. Even if + * the best non-partial path is more expensive than the best + * partial path, it could still be better to choose the + * non-partial path if there are several such paths that can + * be given to different workers. For now, we don't try to + * figure that out. + */ + accumulate_append_subpath(nppath, + &pa_nonpartial_subpaths, + NULL); + } + } + + /* + * Collect lists of all the available path orderings and + * parameterizations for all the children. We use these as a + * heuristic to indicate which sort orderings and parameterizations we + * should build Append and MergeAppend paths for. + */ + foreach(lcp, childrel->pathlist) + { + Path *childpath = (Path *) lfirst(lcp); + List *childkeys = childpath->pathkeys; + Relids childouter = PATH_REQ_OUTER(childpath); + + /* Unsorted paths don't contribute to pathkey list */ + if (childkeys != NIL) + { + ListCell *lpk; + bool found = false; + + /* Have we already seen this ordering? */ + foreach(lpk, all_child_pathkeys) + { + List *existing_pathkeys = (List *) lfirst(lpk); + + if (compare_pathkeys(existing_pathkeys, + childkeys) == PATHKEYS_EQUAL) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_pathkeys */ + all_child_pathkeys = lappend(all_child_pathkeys, + childkeys); + } + } + + /* Unparameterized paths don't contribute to param-set list */ + if (childouter) + { + ListCell *lco; + bool found = false; + + /* Have we already seen this param set? */ + foreach(lco, all_child_outers) + { + Relids existing_outers = (Relids) lfirst(lco); + + if (bms_equal(existing_outers, childouter)) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_outers */ + all_child_outers = lappend(all_child_outers, + childouter); + } + } + } + } + + /* + * If we found unparameterized paths for all children, build an unordered, + * unparameterized Append path for the rel. (Note: this is correct even + * if we have zero or one live subpath due to constraint exclusion.) + */ + if (subpaths_valid) + add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, + NIL, NULL, 0, false, + -1)); + + /* + * Consider an append of unordered, unparameterized partial paths. Make + * it parallel-aware if possible. + */ + if (partial_subpaths_valid && partial_subpaths != NIL) + { + AppendPath *appendpath; + ListCell *lc; + int parallel_workers = 0; + + /* Find the highest number of workers requested for any subpath. */ + foreach(lc, partial_subpaths) + { + Path *path = lfirst(lc); + + parallel_workers = Max(parallel_workers, path->parallel_workers); + } + Assert(parallel_workers > 0); + + /* + * If the use of parallel append is permitted, always request at least + * log2(# of children) workers. We assume it can be useful to have + * extra workers in this case because they will be spread out across + * the children. The precise formula is just a guess, but we don't + * want to end up with a radically different answer for a table with N + * partitions vs. an unpartitioned table with the same data, so the + * use of some kind of log-scaling here seems to make some sense. + */ + if (enable_parallel_append) + { + parallel_workers = Max(parallel_workers, + fls(list_length(live_childrels))); + parallel_workers = Min(parallel_workers, + max_parallel_workers_per_gather); + } + Assert(parallel_workers > 0); + + /* Generate a partial append path. */ + appendpath = create_append_path(root, rel, NIL, partial_subpaths, + NIL, NULL, parallel_workers, + enable_parallel_append, + -1); + + /* + * Make sure any subsequent partial paths use the same row count + * estimate. + */ + partial_rows = appendpath->path.rows; + + /* Add the path. */ + add_partial_path(rel, (Path *) appendpath); + } + + /* + * Consider a parallel-aware append using a mix of partial and non-partial + * paths. (This only makes sense if there's at least one child which has + * a non-partial path that is substantially cheaper than any partial path; + * otherwise, we should use the append path added in the previous step.) + */ + if (pa_subpaths_valid && pa_nonpartial_subpaths != NIL) + { + AppendPath *appendpath; + ListCell *lc; + int parallel_workers = 0; + + /* + * Find the highest number of workers requested for any partial + * subpath. + */ + foreach(lc, pa_partial_subpaths) + { + Path *path = lfirst(lc); + + parallel_workers = Max(parallel_workers, path->parallel_workers); + } + + /* + * Same formula here as above. It's even more important in this + * instance because the non-partial paths won't contribute anything to + * the planned number of parallel workers. + */ + parallel_workers = Max(parallel_workers, + fls(list_length(live_childrels))); + parallel_workers = Min(parallel_workers, + max_parallel_workers_per_gather); + Assert(parallel_workers > 0); + + appendpath = create_append_path(root, rel, pa_nonpartial_subpaths, + pa_partial_subpaths, + NIL, NULL, parallel_workers, true, + partial_rows); + add_partial_path(rel, (Path *) appendpath); + } + + /* + * Also build unparameterized ordered append paths based on the collected + * list of child pathkeys. + */ + if (subpaths_valid) + generate_orderedappend_paths(root, rel, live_childrels, + all_child_pathkeys); + + /* + * Build Append paths for each parameterization seen among the child rels. + * (This may look pretty expensive, but in most cases of practical + * interest, the child rels will expose mostly the same parameterizations, + * so that not that many cases actually get considered here.) + * + * The Append node itself cannot enforce quals, so all qual checking must + * be done in the child paths. This means that to have a parameterized + * Append path, we must have the exact same parameterization for each + * child path; otherwise some children might be failing to check the + * moved-down quals. To make them match up, we can try to increase the + * parameterization of lesser-parameterized paths. + */ + foreach(l, all_child_outers) + { + Relids required_outer = (Relids) lfirst(l); + ListCell *lcr; + + /* Select the child paths for an Append with this parameterization */ + subpaths = NIL; + subpaths_valid = true; + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *subpath; + + if (childrel->pathlist == NIL) + { + /* failed to make a suitable path for this child */ + subpaths_valid = false; + break; + } + + subpath = get_cheapest_parameterized_child_path(root, + childrel, + required_outer); + if (subpath == NULL) + { + /* failed to make a suitable path for this child */ + subpaths_valid = false; + break; + } + accumulate_append_subpath(subpath, &subpaths, NULL); + } + + if (subpaths_valid) + add_path(rel, (Path *) + create_append_path(root, rel, subpaths, NIL, + NIL, required_outer, 0, false, + -1)); + } + + /* + * When there is only a single child relation, the Append path can inherit + * any ordering available for the child rel's path, so that it's useful to + * consider ordered partial paths. Above we only considered the cheapest + * partial path for each child, but let's also make paths using any + * partial paths that have pathkeys. + */ + if (list_length(live_childrels) == 1) + { + RelOptInfo *childrel = (RelOptInfo *) linitial(live_childrels); + + /* skip the cheapest partial path, since we already used that above */ + for_each_from(l, childrel->partial_pathlist, 1) + { + Path *path = (Path *) lfirst(l); + AppendPath *appendpath; + + /* skip paths with no pathkeys. */ + if (path->pathkeys == NIL) + continue; + + appendpath = create_append_path(root, rel, NIL, list_make1(path), + NIL, NULL, + path->parallel_workers, true, + partial_rows); + add_partial_path(rel, (Path *) appendpath); + } + } +} + +/* + * generate_orderedappend_paths + * Generate ordered append paths for an append relation + * + * Usually we generate MergeAppend paths here, but there are some special + * cases where we can generate simple Append paths, because the subpaths + * can provide tuples in the required order already. + * + * We generate a path for each ordering (pathkey list) appearing in + * all_child_pathkeys. + * + * We consider both cheapest-startup and cheapest-total cases, ie, for each + * interesting ordering, collect all the cheapest startup subpaths and all the + * cheapest total paths, and build a suitable path for each case. + * + * We don't currently generate any parameterized ordered paths here. While + * it would not take much more code here to do so, it's very unclear that it + * is worth the planning cycles to investigate such paths: there's little + * use for an ordered path on the inside of a nestloop. In fact, it's likely + * that the current coding of add_path would reject such paths out of hand, + * because add_path gives no credit for sort ordering of parameterized paths, + * and a parameterized MergeAppend is going to be more expensive than the + * corresponding parameterized Append path. If we ever try harder to support + * parameterized mergejoin plans, it might be worth adding support for + * parameterized paths here to feed such joins. (See notes in + * optimizer/README for why that might not ever happen, though.) + */ +static void +generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels, + List *all_child_pathkeys) +{ + ListCell *lcp; + List *partition_pathkeys = NIL; + List *partition_pathkeys_desc = NIL; + bool partition_pathkeys_partial = true; + bool partition_pathkeys_desc_partial = true; + + /* + * Some partitioned table setups may allow us to use an Append node + * instead of a MergeAppend. This is possible in cases such as RANGE + * partitioned tables where it's guaranteed that an earlier partition must + * contain rows which come earlier in the sort order. To detect whether + * this is relevant, build pathkey descriptions of the partition ordering, + * for both forward and reverse scans. + */ + if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) && + partitions_are_ordered(rel->boundinfo, rel->live_parts)) + { + partition_pathkeys = build_partition_pathkeys(root, rel, + ForwardScanDirection, + &partition_pathkeys_partial); + + partition_pathkeys_desc = build_partition_pathkeys(root, rel, + BackwardScanDirection, + &partition_pathkeys_desc_partial); + + /* + * You might think we should truncate_useless_pathkeys here, but + * allowing partition keys which are a subset of the query's pathkeys + * can often be useful. For example, consider a table partitioned by + * RANGE (a, b), and a query with ORDER BY a, b, c. If we have child + * paths that can produce the a, b, c ordering (perhaps via indexes on + * (a, b, c)) then it works to consider the appendrel output as + * ordered by a, b, c. + */ + } + + /* Now consider each interesting sort ordering */ + foreach(lcp, all_child_pathkeys) + { + List *pathkeys = (List *) lfirst(lcp); + List *startup_subpaths = NIL; + List *total_subpaths = NIL; + List *fractional_subpaths = NIL; + bool startup_neq_total = false; + ListCell *lcr; + bool match_partition_order; + bool match_partition_order_desc; + + /* + * Determine if this sort ordering matches any partition pathkeys we + * have, for both ascending and descending partition order. If the + * partition pathkeys happen to be contained in pathkeys then it still + * works, as described above, providing that the partition pathkeys + * are complete and not just a prefix of the partition keys. (In such + * cases we'll be relying on the child paths to have sorted the + * lower-order columns of the required pathkeys.) + */ + match_partition_order = + pathkeys_contained_in(pathkeys, partition_pathkeys) || + (!partition_pathkeys_partial && + pathkeys_contained_in(partition_pathkeys, pathkeys)); + + match_partition_order_desc = !match_partition_order && + (pathkeys_contained_in(pathkeys, partition_pathkeys_desc) || + (!partition_pathkeys_desc_partial && + pathkeys_contained_in(partition_pathkeys_desc, pathkeys))); + + /* Select the child paths for this ordering... */ + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *cheapest_startup, + *cheapest_total, + *cheapest_fractional = NULL; + + /* Locate the right paths, if they are available. */ + cheapest_startup = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + NULL, + STARTUP_COST, + false); + cheapest_total = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + NULL, + TOTAL_COST, + false); + + /* + * If we can't find any paths with the right order just use the + * cheapest-total path; we'll have to sort it later. + */ + if (cheapest_startup == NULL || cheapest_total == NULL) + { + cheapest_startup = cheapest_total = + childrel->cheapest_total_path; + /* Assert we do have an unparameterized path for this child */ + Assert(cheapest_total->param_info == NULL); + } + + /* + * When building a fractional path, determine a cheapest + * fractional path for each child relation too. Looking at startup + * and total costs is not enough, because the cheapest fractional + * path may be dominated by two separate paths (one for startup, + * one for total). + * + * When needed (building fractional path), determine the cheapest + * fractional path too. + */ + if (root->tuple_fraction > 0) + { + double path_fraction = (1.0 / root->tuple_fraction); + + cheapest_fractional = + get_cheapest_fractional_path_for_pathkeys(childrel->pathlist, + pathkeys, + NULL, + path_fraction); + + /* + * If we found no path with matching pathkeys, use the + * cheapest total path instead. + * + * XXX We might consider partially sorted paths too (with an + * incremental sort on top). But we'd have to build all the + * incremental paths, do the costing etc. + */ + if (!cheapest_fractional) + cheapest_fractional = cheapest_total; + } + + /* + * Notice whether we actually have different paths for the + * "cheapest" and "total" cases; frequently there will be no point + * in two create_merge_append_path() calls. + */ + if (cheapest_startup != cheapest_total) + startup_neq_total = true; + + /* + * Collect the appropriate child paths. The required logic varies + * for the Append and MergeAppend cases. + */ + if (match_partition_order) + { + /* + * We're going to make a plain Append path. We don't need + * most of what accumulate_append_subpath would do, but we do + * want to cut out child Appends or MergeAppends if they have + * just a single subpath (and hence aren't doing anything + * useful). + */ + cheapest_startup = get_singleton_append_subpath(cheapest_startup); + cheapest_total = get_singleton_append_subpath(cheapest_total); + + startup_subpaths = lappend(startup_subpaths, cheapest_startup); + total_subpaths = lappend(total_subpaths, cheapest_total); + + if (cheapest_fractional) + { + cheapest_fractional = get_singleton_append_subpath(cheapest_fractional); + fractional_subpaths = lappend(fractional_subpaths, cheapest_fractional); + } + } + else if (match_partition_order_desc) + { + /* + * As above, but we need to reverse the order of the children, + * because nodeAppend.c doesn't know anything about reverse + * ordering and will scan the children in the order presented. + */ + cheapest_startup = get_singleton_append_subpath(cheapest_startup); + cheapest_total = get_singleton_append_subpath(cheapest_total); + + startup_subpaths = lcons(cheapest_startup, startup_subpaths); + total_subpaths = lcons(cheapest_total, total_subpaths); + + if (cheapest_fractional) + { + cheapest_fractional = get_singleton_append_subpath(cheapest_fractional); + fractional_subpaths = lcons(cheapest_fractional, fractional_subpaths); + } + } + else + { + /* + * Otherwise, rely on accumulate_append_subpath to collect the + * child paths for the MergeAppend. + */ + accumulate_append_subpath(cheapest_startup, + &startup_subpaths, NULL); + accumulate_append_subpath(cheapest_total, + &total_subpaths, NULL); + + if (cheapest_fractional) + accumulate_append_subpath(cheapest_fractional, + &fractional_subpaths, NULL); + } + } + + /* ... and build the Append or MergeAppend paths */ + if (match_partition_order || match_partition_order_desc) + { + /* We only need Append */ + add_path(rel, (Path *) create_append_path(root, + rel, + startup_subpaths, + NIL, + pathkeys, + NULL, + 0, + false, + -1)); + if (startup_neq_total) + add_path(rel, (Path *) create_append_path(root, + rel, + total_subpaths, + NIL, + pathkeys, + NULL, + 0, + false, + -1)); + + if (fractional_subpaths) + add_path(rel, (Path *) create_append_path(root, + rel, + fractional_subpaths, + NIL, + pathkeys, + NULL, + 0, + false, + -1)); + } + else + { + /* We need MergeAppend */ + add_path(rel, (Path *) create_merge_append_path(root, + rel, + startup_subpaths, + pathkeys, + NULL)); + if (startup_neq_total) + add_path(rel, (Path *) create_merge_append_path(root, + rel, + total_subpaths, + pathkeys, + NULL)); + + if (fractional_subpaths) + add_path(rel, (Path *) create_merge_append_path(root, + rel, + fractional_subpaths, + pathkeys, + NULL)); + } + } +} + +/* + * get_cheapest_parameterized_child_path + * Get cheapest path for this relation that has exactly the requested + * parameterization. + * + * Returns NULL if unable to create such a path. + */ +static Path * +get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) +{ + Path *cheapest; + ListCell *lc; + + /* + * Look up the cheapest existing path with no more than the needed + * parameterization. If it has exactly the needed parameterization, we're + * done. + */ + cheapest = get_cheapest_path_for_pathkeys(rel->pathlist, + NIL, + required_outer, + TOTAL_COST, + false); + Assert(cheapest != NULL); + if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer)) + return cheapest; + + /* + * Otherwise, we can "reparameterize" an existing path to match the given + * parameterization, which effectively means pushing down additional + * joinquals to be checked within the path's scan. However, some existing + * paths might check the available joinquals already while others don't; + * therefore, it's not clear which existing path will be cheapest after + * reparameterization. We have to go through them all and find out. + */ + cheapest = NULL; + foreach(lc, rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + /* Can't use it if it needs more than requested parameterization */ + if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer)) + continue; + + /* + * Reparameterization can only increase the path's cost, so if it's + * already more expensive than the current cheapest, forget it. + */ + if (cheapest != NULL && + compare_path_costs(cheapest, path, TOTAL_COST) <= 0) + continue; + + /* Reparameterize if needed, then recheck cost */ + if (!bms_equal(PATH_REQ_OUTER(path), required_outer)) + { + path = reparameterize_path(root, path, required_outer, 1.0); + if (path == NULL) + continue; /* failed to reparameterize this one */ + Assert(bms_equal(PATH_REQ_OUTER(path), required_outer)); + + if (cheapest != NULL && + compare_path_costs(cheapest, path, TOTAL_COST) <= 0) + continue; + } + + /* We have a new best path */ + cheapest = path; + } + + /* Return the best path, or NULL if we found no suitable candidate */ + return cheapest; +} + +/* + * accumulate_append_subpath + * Add a subpath to the list being built for an Append or MergeAppend. + * + * It's possible that the child is itself an Append or MergeAppend path, in + * which case we can "cut out the middleman" and just add its child paths to + * our own list. (We don't try to do this earlier because we need to apply + * both levels of transformation to the quals.) + * + * Note that if we omit a child MergeAppend in this way, we are effectively + * omitting a sort step, which seems fine: if the parent is to be an Append, + * its result would be unsorted anyway, while if the parent is to be a + * MergeAppend, there's no point in a separate sort on a child. + * + * Normally, either path is a partial path and subpaths is a list of partial + * paths, or else path is a non-partial plan and subpaths is a list of those. + * However, if path is a parallel-aware Append, then we add its partial path + * children to subpaths and the rest to special_subpaths. If the latter is + * NULL, we don't flatten the path at all (unless it contains only partial + * paths). + */ +static void +accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths) +{ + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *) path; + + if (!apath->path.parallel_aware || apath->first_partial_path == 0) + { + *subpaths = list_concat(*subpaths, apath->subpaths); + return; + } + else if (special_subpaths != NULL) + { + List *new_special_subpaths; + + /* Split Parallel Append into partial and non-partial subpaths */ + *subpaths = list_concat(*subpaths, + list_copy_tail(apath->subpaths, + apath->first_partial_path)); + new_special_subpaths = + list_truncate(list_copy(apath->subpaths), + apath->first_partial_path); + *special_subpaths = list_concat(*special_subpaths, + new_special_subpaths); + return; + } + } + else if (IsA(path, MergeAppendPath)) + { + MergeAppendPath *mpath = (MergeAppendPath *) path; + + *subpaths = list_concat(*subpaths, mpath->subpaths); + return; + } + + *subpaths = lappend(*subpaths, path); +} + +/* + * get_singleton_append_subpath + * Returns the single subpath of an Append/MergeAppend, or just + * return 'path' if it's not a single sub-path Append/MergeAppend. + * + * Note: 'path' must not be a parallel-aware path. + */ +static Path * +get_singleton_append_subpath(Path *path) +{ + Assert(!path->parallel_aware); + + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *) path; + + if (list_length(apath->subpaths) == 1) + return (Path *) linitial(apath->subpaths); + } + else if (IsA(path, MergeAppendPath)) + { + MergeAppendPath *mpath = (MergeAppendPath *) path; + + if (list_length(mpath->subpaths) == 1) + return (Path *) linitial(mpath->subpaths); + } + + return path; +} + +/* + * set_dummy_rel_pathlist + * Build a dummy path for a relation that's been excluded by constraints + * + * Rather than inventing a special "dummy" path type, we represent this as an + * AppendPath with no members (see also IS_DUMMY_APPEND/IS_DUMMY_REL macros). + * + * (See also mark_dummy_rel, which does basically the same thing, but is + * typically used to change a rel into dummy state after we already made + * paths for it.) + */ +static void +set_dummy_rel_pathlist(RelOptInfo *rel) +{ + /* Set dummy size estimates --- we leave attr_widths[] as zeroes */ + rel->rows = 0; + rel->reltarget->width = 0; + + /* Discard any pre-existing paths; no further need for them */ + rel->pathlist = NIL; + rel->partial_pathlist = NIL; + + /* Set up the dummy path */ + add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, + NIL, rel->lateral_relids, + 0, false, -1)); + + /* + * We set the cheapest-path fields immediately, just in case they were + * pointing at some discarded path. This is redundant when we're called + * from set_rel_size(), but not when called from elsewhere, and doing it + * twice is harmless anyway. + */ + set_cheapest(rel); +} + +/* quick-and-dirty test to see if any joining is needed */ +static bool +has_multiple_baserels(PlannerInfo *root) +{ + int num_base_rels = 0; + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + if (brel == NULL) + continue; + + /* ignore RTEs that are "other rels" */ + if (brel->reloptkind == RELOPT_BASEREL) + if (++num_base_rels > 1) + return true; + } + return false; +} + +/* + * find_window_run_conditions + * Determine if 'wfunc' is really a WindowFunc and call its prosupport + * function to determine the function's monotonic properties. We then + * see if 'opexpr' can be used to short-circuit execution. + * + * For example row_number() over (order by ...) always produces a value one + * higher than the previous. If someone has a window function in a subquery + * and has a WHERE clause in the outer query to filter rows <= 10, then we may + * as well stop processing the windowagg once the row number reaches 11. Here + * we check if 'opexpr' might help us to stop doing needless extra processing + * in WindowAgg nodes. + * + * '*keep_original' is set to true if the caller should also use 'opexpr' for + * its original purpose. This is set to false if the caller can assume that + * the run condition will handle all of the required filtering. + * + * Returns true if 'opexpr' was found to be useful and was added to the + * WindowClauses runCondition. We also set *keep_original accordingly and add + * 'attno' to *run_cond_attrs offset by FirstLowInvalidHeapAttributeNumber. + * If the 'opexpr' cannot be used then we set *keep_original to true and + * return false. + */ +static bool +find_window_run_conditions(Query *subquery, RangeTblEntry *rte, Index rti, + AttrNumber attno, WindowFunc *wfunc, OpExpr *opexpr, + bool wfunc_left, bool *keep_original, + Bitmapset **run_cond_attrs) +{ + Oid prosupport; + Expr *otherexpr; + SupportRequestWFuncMonotonic req; + SupportRequestWFuncMonotonic *res; + WindowClause *wclause; + List *opinfos; + OpExpr *runopexpr; + Oid runoperator; + ListCell *lc; + + *keep_original = true; + + while (IsA(wfunc, RelabelType)) + wfunc = (WindowFunc *) ((RelabelType *) wfunc)->arg; + + /* we can only work with window functions */ + if (!IsA(wfunc, WindowFunc)) + return false; + + /* can't use it if there are subplans in the WindowFunc */ + if (contain_subplans((Node *) wfunc)) + return false; + + prosupport = get_func_support(wfunc->winfnoid); + + /* Check if there's a support function for 'wfunc' */ + if (!OidIsValid(prosupport)) + return false; + + /* get the Expr from the other side of the OpExpr */ + if (wfunc_left) + otherexpr = lsecond(opexpr->args); + else + otherexpr = linitial(opexpr->args); + + /* + * The value being compared must not change during the evaluation of the + * window partition. + */ + if (!is_pseudo_constant_clause((Node *) otherexpr)) + return false; + + /* find the window clause belonging to the window function */ + wclause = (WindowClause *) list_nth(subquery->windowClause, + wfunc->winref - 1); + + req.type = T_SupportRequestWFuncMonotonic; + req.window_func = wfunc; + req.window_clause = wclause; + + /* call the support function */ + res = (SupportRequestWFuncMonotonic *) + DatumGetPointer(OidFunctionCall1(prosupport, + PointerGetDatum(&req))); + + /* + * Nothing to do if the function is neither monotonically increasing nor + * monotonically decreasing. + */ + if (res == NULL || res->monotonic == MONOTONICFUNC_NONE) + return false; + + runopexpr = NULL; + runoperator = InvalidOid; + opinfos = get_op_btree_interpretation(opexpr->opno); + + foreach(lc, opinfos) + { + OpBtreeInterpretation *opinfo = (OpBtreeInterpretation *) lfirst(lc); + int strategy = opinfo->strategy; + + /* handle < / <= */ + if (strategy == BTLessStrategyNumber || + strategy == BTLessEqualStrategyNumber) + { + /* + * < / <= is supported for monotonically increasing functions in + * the form <wfunc> op <pseudoconst> and <pseudoconst> op <wfunc> + * for monotonically decreasing functions. + */ + if ((wfunc_left && (res->monotonic & MONOTONICFUNC_INCREASING)) || + (!wfunc_left && (res->monotonic & MONOTONICFUNC_DECREASING))) + { + *keep_original = false; + runopexpr = opexpr; + runoperator = opexpr->opno; + } + break; + } + /* handle > / >= */ + else if (strategy == BTGreaterStrategyNumber || + strategy == BTGreaterEqualStrategyNumber) + { + /* + * > / >= is supported for monotonically decreasing functions in + * the form <wfunc> op <pseudoconst> and <pseudoconst> op <wfunc> + * for monotonically increasing functions. + */ + if ((wfunc_left && (res->monotonic & MONOTONICFUNC_DECREASING)) || + (!wfunc_left && (res->monotonic & MONOTONICFUNC_INCREASING))) + { + *keep_original = false; + runopexpr = opexpr; + runoperator = opexpr->opno; + } + break; + } + /* handle = */ + else if (strategy == BTEqualStrategyNumber) + { + int16 newstrategy; + + /* + * When both monotonically increasing and decreasing then the + * return value of the window function will be the same each time. + * We can simply use 'opexpr' as the run condition without + * modifying it. + */ + if ((res->monotonic & MONOTONICFUNC_BOTH) == MONOTONICFUNC_BOTH) + { + *keep_original = false; + runopexpr = opexpr; + runoperator = opexpr->opno; + break; + } + + /* + * When monotonically increasing we make a qual with <wfunc> <= + * <value> or <value> >= <wfunc> in order to filter out values + * which are above the value in the equality condition. For + * monotonically decreasing functions we want to filter values + * below the value in the equality condition. + */ + if (res->monotonic & MONOTONICFUNC_INCREASING) + newstrategy = wfunc_left ? BTLessEqualStrategyNumber : BTGreaterEqualStrategyNumber; + else + newstrategy = wfunc_left ? BTGreaterEqualStrategyNumber : BTLessEqualStrategyNumber; + + /* We must keep the original equality qual */ + *keep_original = true; + runopexpr = opexpr; + + /* determine the operator to use for the runCondition qual */ + runoperator = get_opfamily_member(opinfo->opfamily_id, + opinfo->oplefttype, + opinfo->oprighttype, + newstrategy); + break; + } + } + + if (runopexpr != NULL) + { + Expr *newexpr; + + /* + * Build the qual required for the run condition keeping the + * WindowFunc on the same side as it was originally. + */ + if (wfunc_left) + newexpr = make_opclause(runoperator, + runopexpr->opresulttype, + runopexpr->opretset, (Expr *) wfunc, + otherexpr, runopexpr->opcollid, + runopexpr->inputcollid); + else + newexpr = make_opclause(runoperator, + runopexpr->opresulttype, + runopexpr->opretset, + otherexpr, (Expr *) wfunc, + runopexpr->opcollid, + runopexpr->inputcollid); + + wclause->runCondition = lappend(wclause->runCondition, newexpr); + + /* record that this attno was used in a run condition */ + *run_cond_attrs = bms_add_member(*run_cond_attrs, + attno - FirstLowInvalidHeapAttributeNumber); + return true; + } + + /* unsupported OpExpr */ + return false; +} + +/* + * check_and_push_window_quals + * Check if 'clause' is a qual that can be pushed into a WindowFunc's + * WindowClause as a 'runCondition' qual. These, when present, allow + * some unnecessary work to be skipped during execution. + * + * 'run_cond_attrs' will be populated with all targetlist resnos of subquery + * targets (offset by FirstLowInvalidHeapAttributeNumber) that we pushed + * window quals for. + * + * Returns true if the caller still must keep the original qual or false if + * the caller can safely ignore the original qual because the WindowAgg node + * will use the runCondition to stop returning tuples. + */ +static bool +check_and_push_window_quals(Query *subquery, RangeTblEntry *rte, Index rti, + Node *clause, Bitmapset **run_cond_attrs) +{ + OpExpr *opexpr = (OpExpr *) clause; + bool keep_original = true; + Var *var1; + Var *var2; + + /* We're only able to use OpExprs with 2 operands */ + if (!IsA(opexpr, OpExpr)) + return true; + + if (list_length(opexpr->args) != 2) + return true; + + /* + * Currently, we restrict this optimization to strict OpExprs. The reason + * for this is that during execution, once the runcondition becomes false, + * we stop evaluating WindowFuncs. To avoid leaving around stale window + * function result values, we set them to NULL. Having only strict + * OpExprs here ensures that we properly filter out the tuples with NULLs + * in the top-level WindowAgg. + */ + set_opfuncid(opexpr); + if (!func_strict(opexpr->opfuncid)) + return true; + + /* + * Check for plain Vars that reference window functions in the subquery. + * If we find any, we'll ask find_window_run_conditions() if 'opexpr' can + * be used as part of the run condition. + */ + + /* Check the left side of the OpExpr */ + var1 = linitial(opexpr->args); + if (IsA(var1, Var) && var1->varattno > 0) + { + TargetEntry *tle = list_nth(subquery->targetList, var1->varattno - 1); + WindowFunc *wfunc = (WindowFunc *) tle->expr; + + if (find_window_run_conditions(subquery, rte, rti, tle->resno, wfunc, + opexpr, true, &keep_original, + run_cond_attrs)) + return keep_original; + } + + /* and check the right side */ + var2 = lsecond(opexpr->args); + if (IsA(var2, Var) && var2->varattno > 0) + { + TargetEntry *tle = list_nth(subquery->targetList, var2->varattno - 1); + WindowFunc *wfunc = (WindowFunc *) tle->expr; + + if (find_window_run_conditions(subquery, rte, rti, tle->resno, wfunc, + opexpr, false, &keep_original, + run_cond_attrs)) + return keep_original; + } + + return true; +} + +/* + * set_subquery_pathlist + * Generate SubqueryScan access paths for a subquery RTE + * + * We don't currently support generating parameterized paths for subqueries + * by pushing join clauses down into them; it seems too expensive to re-plan + * the subquery multiple times to consider different alternatives. + * (XXX that could stand to be reconsidered, now that we use Paths.) + * So the paths made here will be parameterized if the subquery contains + * LATERAL references, otherwise not. As long as that's true, there's no need + * for a separate set_subquery_size phase: just make the paths right away. + */ +static void +set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + Query *parse = root->parse; + Query *subquery = rte->subquery; + Relids required_outer; + pushdown_safety_info safetyInfo; + double tuple_fraction; + RelOptInfo *sub_final_rel; + Bitmapset *run_cond_attrs = NULL; + ListCell *lc; + + /* + * Must copy the Query so that planning doesn't mess up the RTE contents + * (really really need to fix the planner to not scribble on its input, + * someday ... but see remove_unused_subquery_outputs to start with). + */ + subquery = copyObject(subquery); + + /* + * If it's a LATERAL subquery, it might contain some Vars of the current + * query level, requiring it to be treated as parameterized, even though + * we don't support pushing down join quals into subqueries. + */ + required_outer = rel->lateral_relids; + + /* + * Zero out result area for subquery_is_pushdown_safe, so that it can set + * flags as needed while recursing. In particular, we need a workspace + * for keeping track of the reasons why columns are unsafe to reference. + * These reasons are stored in the bits inside unsafeFlags[i] when we + * discover reasons that column i of the subquery is unsafe to be used in + * a pushed-down qual. + */ + memset(&safetyInfo, 0, sizeof(safetyInfo)); + safetyInfo.unsafeFlags = (unsigned char *) + palloc0((list_length(subquery->targetList) + 1) * sizeof(unsigned char)); + + /* + * If the subquery has the "security_barrier" flag, it means the subquery + * originated from a view that must enforce row-level security. Then we + * must not push down quals that contain leaky functions. (Ideally this + * would be checked inside subquery_is_pushdown_safe, but since we don't + * currently pass the RTE to that function, we must do it here.) + */ + safetyInfo.unsafeLeaky = rte->security_barrier; + + /* + * If there are any restriction clauses that have been attached to the + * subquery relation, consider pushing them down to become WHERE or HAVING + * quals of the subquery itself. This transformation is useful because it + * may allow us to generate a better plan for the subquery than evaluating + * all the subquery output rows and then filtering them. + * + * There are several cases where we cannot push down clauses. Restrictions + * involving the subquery are checked by subquery_is_pushdown_safe(). + * Restrictions on individual clauses are checked by + * qual_is_pushdown_safe(). Also, we don't want to push down + * pseudoconstant clauses; better to have the gating node above the + * subquery. + * + * Non-pushed-down clauses will get evaluated as qpquals of the + * SubqueryScan node. + * + * XXX Are there any cases where we want to make a policy decision not to + * push down a pushable qual, because it'd result in a worse plan? + */ + if (rel->baserestrictinfo != NIL && + subquery_is_pushdown_safe(subquery, subquery, &safetyInfo)) + { + /* OK to consider pushing down individual quals */ + List *upperrestrictlist = NIL; + ListCell *l; + + foreach(l, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Node *clause = (Node *) rinfo->clause; + + if (rinfo->pseudoconstant) + { + upperrestrictlist = lappend(upperrestrictlist, rinfo); + continue; + } + + switch (qual_is_pushdown_safe(subquery, rti, rinfo, &safetyInfo)) + { + case PUSHDOWN_SAFE: + /* Push it down */ + subquery_push_qual(subquery, rte, rti, clause); + break; + + case PUSHDOWN_WINDOWCLAUSE_RUNCOND: + + /* + * Since we can't push the qual down into the subquery, + * check if it happens to reference a window function. If + * so then it might be useful to use for the WindowAgg's + * runCondition. + */ + if (!subquery->hasWindowFuncs || + check_and_push_window_quals(subquery, rte, rti, clause, + &run_cond_attrs)) + { + /* + * subquery has no window funcs or the clause is not a + * suitable window run condition qual or it is, but + * the original must also be kept in the upper query. + */ + upperrestrictlist = lappend(upperrestrictlist, rinfo); + } + break; + + case PUSHDOWN_UNSAFE: + upperrestrictlist = lappend(upperrestrictlist, rinfo); + break; + } + } + rel->baserestrictinfo = upperrestrictlist; + /* We don't bother recomputing baserestrict_min_security */ + } + + pfree(safetyInfo.unsafeFlags); + + /* + * The upper query might not use all the subquery's output columns; if + * not, we can simplify. Pass the attributes that were pushed down into + * WindowAgg run conditions to ensure we don't accidentally think those + * are unused. + */ + remove_unused_subquery_outputs(subquery, rel, run_cond_attrs); + + /* + * We can safely pass the outer tuple_fraction down to the subquery if the + * outer level has no joining, aggregation, or sorting to do. Otherwise + * we'd better tell the subquery to plan for full retrieval. (XXX This + * could probably be made more intelligent ...) + */ + if (parse->hasAggs || + parse->groupClause || + parse->groupingSets || + parse->havingQual || + parse->distinctClause || + parse->sortClause || + has_multiple_baserels(root)) + tuple_fraction = 0.0; /* default case */ + else + tuple_fraction = root->tuple_fraction; + + /* plan_params should not be in use in current query level */ + Assert(root->plan_params == NIL); + + /* Generate a subroot and Paths for the subquery */ + rel->subroot = subquery_planner(root->glob, subquery, + root, + false, tuple_fraction); + + /* Isolate the params needed by this specific subplan */ + rel->subplan_params = root->plan_params; + root->plan_params = NIL; + + /* + * It's possible that constraint exclusion proved the subquery empty. If + * so, it's desirable to produce an unadorned dummy path so that we will + * recognize appropriate optimizations at this query level. + */ + sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL); + + if (IS_DUMMY_REL(sub_final_rel)) + { + set_dummy_rel_pathlist(rel); + return; + } + + /* + * Mark rel with estimated output rows, width, etc. Note that we have to + * do this before generating outer-query paths, else cost_subqueryscan is + * not happy. + */ + set_subquery_size_estimates(root, rel); + + /* + * For each Path that subquery_planner produced, make a SubqueryScanPath + * in the outer query. + */ + foreach(lc, sub_final_rel->pathlist) + { + Path *subpath = (Path *) lfirst(lc); + List *pathkeys; + + /* Convert subpath's pathkeys to outer representation */ + pathkeys = convert_subquery_pathkeys(root, + rel, + subpath->pathkeys, + make_tlist_from_pathtarget(subpath->pathtarget)); + + /* Generate outer path using this subpath */ + add_path(rel, (Path *) + create_subqueryscan_path(root, rel, subpath, + pathkeys, required_outer)); + } + + /* If outer rel allows parallelism, do same for partial paths. */ + if (rel->consider_parallel && bms_is_empty(required_outer)) + { + /* If consider_parallel is false, there should be no partial paths. */ + Assert(sub_final_rel->consider_parallel || + sub_final_rel->partial_pathlist == NIL); + + /* Same for partial paths. */ + foreach(lc, sub_final_rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + List *pathkeys; + + /* Convert subpath's pathkeys to outer representation */ + pathkeys = convert_subquery_pathkeys(root, + rel, + subpath->pathkeys, + make_tlist_from_pathtarget(subpath->pathtarget)); + + /* Generate outer path using this subpath */ + add_partial_path(rel, (Path *) + create_subqueryscan_path(root, rel, subpath, + pathkeys, + required_outer)); + } + } +} + +/* + * set_function_pathlist + * Build the (single) access path for a function RTE + */ +static void +set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + List *pathkeys = NIL; + + /* + * We don't support pushing join clauses into the quals of a function + * scan, but it could still have required parameterization due to LATERAL + * refs in the function expression. + */ + required_outer = rel->lateral_relids; + + /* + * The result is considered unordered unless ORDINALITY was used, in which + * case it is ordered by the ordinal column (the last one). See if we + * care, by checking for uses of that Var in equivalence classes. + */ + if (rte->funcordinality) + { + AttrNumber ordattno = rel->max_attr; + Var *var = NULL; + ListCell *lc; + + /* + * Is there a Var for it in rel's targetlist? If not, the query did + * not reference the ordinality column, or at least not in any way + * that would be interesting for sorting. + */ + foreach(lc, rel->reltarget->exprs) + { + Var *node = (Var *) lfirst(lc); + + /* checking varno/varlevelsup is just paranoia */ + if (IsA(node, Var) && + node->varattno == ordattno && + node->varno == rel->relid && + node->varlevelsup == 0) + { + var = node; + break; + } + } + + /* + * Try to build pathkeys for this Var with int8 sorting. We tell + * build_expression_pathkey not to build any new equivalence class; if + * the Var isn't already mentioned in some EC, it means that nothing + * cares about the ordering. + */ + if (var) + pathkeys = build_expression_pathkey(root, + (Expr *) var, + NULL, /* below outer joins */ + Int8LessOperator, + rel->relids, + false); + } + + /* Generate appropriate path */ + add_path(rel, create_functionscan_path(root, rel, + pathkeys, required_outer)); +} + +/* + * set_values_pathlist + * Build the (single) access path for a VALUES RTE + */ +static void +set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + + /* + * We don't support pushing join clauses into the quals of a values scan, + * but it could still have required parameterization due to LATERAL refs + * in the values expressions. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_valuesscan_path(root, rel, required_outer)); +} + +/* + * set_tablefunc_pathlist + * Build the (single) access path for a table func RTE + */ +static void +set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + + /* + * We don't support pushing join clauses into the quals of a tablefunc + * scan, but it could still have required parameterization due to LATERAL + * refs in the function expression. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_tablefuncscan_path(root, rel, + required_outer)); +} + +/* + * set_cte_pathlist + * Build the (single) access path for a non-self-reference CTE RTE + * + * There's no need for a separate set_cte_size phase, since we don't + * support join-qual-parameterized paths for CTEs. + */ +static void +set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Plan *cteplan; + PlannerInfo *cteroot; + Index levelsup; + int ndx; + ListCell *lc; + int plan_id; + Relids required_outer; + + /* + * Find the referenced CTE, and locate the plan previously made for it. + */ + levelsup = rte->ctelevelsup; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + + /* + * Note: cte_plan_ids can be shorter than cteList, if we are still working + * on planning the CTEs (ie, this is a side-reference from another CTE). + * So we mustn't use forboth here. + */ + ndx = 0; + foreach(lc, cteroot->parse->cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); + + if (strcmp(cte->ctename, rte->ctename) == 0) + break; + ndx++; + } + if (lc == NULL) /* shouldn't happen */ + elog(ERROR, "could not find CTE \"%s\"", rte->ctename); + if (ndx >= list_length(cteroot->cte_plan_ids)) + elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); + plan_id = list_nth_int(cteroot->cte_plan_ids, ndx); + if (plan_id <= 0) + elog(ERROR, "no plan was made for CTE \"%s\"", rte->ctename); + cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1); + + /* Mark rel with estimated output rows, width, etc */ + set_cte_size_estimates(root, rel, cteplan->plan_rows); + + /* + * We don't support pushing join clauses into the quals of a CTE scan, but + * it could still have required parameterization due to LATERAL refs in + * its tlist. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_ctescan_path(root, rel, required_outer)); +} + +/* + * set_namedtuplestore_pathlist + * Build the (single) access path for a named tuplestore RTE + * + * There's no need for a separate set_namedtuplestore_size phase, since we + * don't support join-qual-parameterized paths for tuplestores. + */ +static void +set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte) +{ + Relids required_outer; + + /* Mark rel with estimated output rows, width, etc */ + set_namedtuplestore_size_estimates(root, rel); + + /* + * We don't support pushing join clauses into the quals of a tuplestore + * scan, but it could still have required parameterization due to LATERAL + * refs in its tlist. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_namedtuplestorescan_path(root, rel, required_outer)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); +} + +/* + * set_result_pathlist + * Build the (single) access path for an RTE_RESULT RTE + * + * There's no need for a separate set_result_size phase, since we + * don't support join-qual-parameterized paths for these RTEs. + */ +static void +set_result_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte) +{ + Relids required_outer; + + /* Mark rel with estimated output rows, width, etc */ + set_result_size_estimates(root, rel); + + /* + * We don't support pushing join clauses into the quals of a Result scan, + * but it could still have required parameterization due to LATERAL refs + * in its tlist. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_resultscan_path(root, rel, required_outer)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); +} + +/* + * set_worktable_pathlist + * Build the (single) access path for a self-reference CTE RTE + * + * There's no need for a separate set_worktable_size phase, since we don't + * support join-qual-parameterized paths for CTEs. + */ +static void +set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Path *ctepath; + PlannerInfo *cteroot; + Index levelsup; + Relids required_outer; + + /* + * We need to find the non-recursive term's path, which is in the plan + * level that's processing the recursive UNION, which is one level *below* + * where the CTE comes from. + */ + levelsup = rte->ctelevelsup; + if (levelsup == 0) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + levelsup--; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + ctepath = cteroot->non_recursive_path; + if (!ctepath) /* shouldn't happen */ + elog(ERROR, "could not find path for CTE \"%s\"", rte->ctename); + + /* Mark rel with estimated output rows, width, etc */ + set_cte_size_estimates(root, rel, ctepath->rows); + + /* + * We don't support pushing join clauses into the quals of a worktable + * scan, but it could still have required parameterization due to LATERAL + * refs in its tlist. (I'm not sure this is actually possible given the + * restrictions on recursive references, but it's easy enough to support.) + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_worktablescan_path(root, rel, required_outer)); +} + +/* + * generate_gather_paths + * Generate parallel access paths for a relation by pushing a Gather or + * Gather Merge on top of a partial path. + * + * This must not be called until after we're done creating all partial paths + * for the specified relation. (Otherwise, add_partial_path might delete a + * path that some GatherPath or GatherMergePath has a reference to.) + * + * If we're generating paths for a scan or join relation, override_rows will + * be false, and we'll just use the relation's size estimate. When we're + * being called for a partially-grouped path, though, we need to override + * the rowcount estimate. (It's not clear that the particular value we're + * using here is actually best, but the underlying rel has no estimate so + * we must do something.) + */ +void +generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) +{ + Path *cheapest_partial_path; + Path *simple_gather_path; + ListCell *lc; + double rows; + double *rowsp = NULL; + + /* If there are no partial paths, there's nothing to do here. */ + if (rel->partial_pathlist == NIL) + return; + + /* Should we override the rel's rowcount estimate? */ + if (override_rows) + rowsp = &rows; + + /* + * The output of Gather is always unsorted, so there's only one partial + * path of interest: the cheapest one. That will be the one at the front + * of partial_pathlist because of the way add_partial_path works. + */ + cheapest_partial_path = linitial(rel->partial_pathlist); + rows = + cheapest_partial_path->rows * cheapest_partial_path->parallel_workers; + simple_gather_path = (Path *) + create_gather_path(root, rel, cheapest_partial_path, rel->reltarget, + NULL, rowsp); + add_path(rel, simple_gather_path); + + /* + * For each useful ordering, we can consider an order-preserving Gather + * Merge. + */ + foreach(lc, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + GatherMergePath *path; + + if (subpath->pathkeys == NIL) + continue; + + rows = subpath->rows * subpath->parallel_workers; + path = create_gather_merge_path(root, rel, subpath, rel->reltarget, + subpath->pathkeys, NULL, rowsp); + add_path(rel, &path->path); + } +} + +/* + * get_useful_pathkeys_for_relation + * Determine which orderings of a relation might be useful. + * + * Getting data in sorted order can be useful either because the requested + * order matches the final output ordering for the overall query we're + * planning, or because it enables an efficient merge join. Here, we try + * to figure out which pathkeys to consider. + * + * This allows us to do incremental sort on top of an index scan under a gather + * merge node, i.e. parallelized. + * + * If the require_parallel_safe is true, we also require the expressions to + * be parallel safe (which allows pushing the sort below Gather Merge). + * + * XXX At the moment this can only ever return a list with a single element, + * because it looks at query_pathkeys only. So we might return the pathkeys + * directly, but it seems plausible we'll want to consider other orderings + * in the future. For example, we might want to consider pathkeys useful for + * merge joins. + */ +static List * +get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel, + bool require_parallel_safe) +{ + List *useful_pathkeys_list = NIL; + + /* + * Considering query_pathkeys is always worth it, because it might allow + * us to avoid a total sort when we have a partially presorted path + * available or to push the total sort into the parallel portion of the + * query. + */ + if (root->query_pathkeys) + { + ListCell *lc; + int npathkeys = 0; /* useful pathkeys */ + + foreach(lc, root->query_pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + + /* + * We can only build a sort for pathkeys that contain a + * safe-to-compute-early EC member computable from the current + * relation's reltarget, so ignore the remainder of the list as + * soon as we find a pathkey without such a member. + * + * It's still worthwhile to return any prefix of the pathkeys list + * that meets this requirement, as we may be able to do an + * incremental sort. + * + * If requested, ensure the sort expression is parallel-safe too. + */ + if (!relation_can_be_sorted_early(root, rel, pathkey_ec, + require_parallel_safe)) + break; + + npathkeys++; + } + + /* + * The whole query_pathkeys list matches, so append it directly, to + * allow comparing pathkeys easily by comparing list pointer. If we + * have to truncate the pathkeys, we gotta do a copy though. + */ + if (npathkeys == list_length(root->query_pathkeys)) + useful_pathkeys_list = lappend(useful_pathkeys_list, + root->query_pathkeys); + else if (npathkeys > 0) + useful_pathkeys_list = lappend(useful_pathkeys_list, + list_truncate(list_copy(root->query_pathkeys), + npathkeys)); + } + + return useful_pathkeys_list; +} + +/* + * generate_useful_gather_paths + * Generate parallel access paths for a relation by pushing a Gather or + * Gather Merge on top of a partial path. + * + * Unlike plain generate_gather_paths, this looks both at pathkeys of input + * paths (aiming to preserve the ordering), but also considers ordering that + * might be useful for nodes above the gather merge node, and tries to add + * a sort (regular or incremental) to provide that. + */ +void +generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) +{ + ListCell *lc; + double rows; + double *rowsp = NULL; + List *useful_pathkeys_list = NIL; + Path *cheapest_partial_path = NULL; + + /* If there are no partial paths, there's nothing to do here. */ + if (rel->partial_pathlist == NIL) + return; + + /* Should we override the rel's rowcount estimate? */ + if (override_rows) + rowsp = &rows; + + /* generate the regular gather (merge) paths */ + generate_gather_paths(root, rel, override_rows); + + /* consider incremental sort for interesting orderings */ + useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel, true); + + /* used for explicit (full) sort paths */ + cheapest_partial_path = linitial(rel->partial_pathlist); + + /* + * Consider sorted paths for each interesting ordering. We generate both + * incremental and full sort. + */ + foreach(lc, useful_pathkeys_list) + { + List *useful_pathkeys = lfirst(lc); + ListCell *lc2; + bool is_sorted; + int presorted_keys; + + foreach(lc2, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc2); + GatherMergePath *path; + + is_sorted = pathkeys_count_contained_in(useful_pathkeys, + subpath->pathkeys, + &presorted_keys); + + /* + * We don't need to consider the case where a subpath is already + * fully sorted because generate_gather_paths already creates a + * gather merge path for every subpath that has pathkeys present. + * + * But since the subpath is already sorted, we know we don't need + * to consider adding a sort (full or incremental) on top of it, + * so we can continue here. + */ + if (is_sorted) + continue; + + /* + * Consider regular sort for the cheapest partial path (for each + * useful pathkeys). We know the path is not sorted, because we'd + * not get here otherwise. + * + * This is not redundant with the gather paths created in + * generate_gather_paths, because that doesn't generate ordered + * output. Here we add an explicit sort to match the useful + * ordering. + */ + if (cheapest_partial_path == subpath) + { + Path *tmp; + + tmp = (Path *) create_sort_path(root, + rel, + subpath, + useful_pathkeys, + -1.0); + + rows = tmp->rows * tmp->parallel_workers; + + path = create_gather_merge_path(root, rel, + tmp, + rel->reltarget, + tmp->pathkeys, + NULL, + rowsp); + + add_path(rel, &path->path); + + /* Fall through */ + } + + /* + * Consider incremental sort, but only when the subpath is already + * partially sorted on a pathkey prefix. + */ + if (enable_incremental_sort && presorted_keys > 0) + { + Path *tmp; + + /* + * We should have already excluded pathkeys of length 1 + * because then presorted_keys > 0 would imply is_sorted was + * true. + */ + Assert(list_length(useful_pathkeys) != 1); + + tmp = (Path *) create_incremental_sort_path(root, + rel, + subpath, + useful_pathkeys, + presorted_keys, + -1); + + path = create_gather_merge_path(root, rel, + tmp, + rel->reltarget, + tmp->pathkeys, + NULL, + rowsp); + + add_path(rel, &path->path); + } + } + } +} + +/* + * make_rel_from_joinlist + * Build access paths using a "joinlist" to guide the join path search. + * + * See comments for deconstruct_jointree() for definition of the joinlist + * data structure. + */ +static RelOptInfo * +make_rel_from_joinlist(PlannerInfo *root, List *joinlist) +{ + int levels_needed; + List *initial_rels; + ListCell *jl; + + /* + * Count the number of child joinlist nodes. This is the depth of the + * dynamic-programming algorithm we must employ to consider all ways of + * joining the child nodes. + */ + levels_needed = list_length(joinlist); + + if (levels_needed <= 0) + return NULL; /* nothing to do? */ + + /* + * Construct a list of rels corresponding to the child joinlist nodes. + * This may contain both base rels and rels constructed according to + * sub-joinlists. + */ + initial_rels = NIL; + foreach(jl, joinlist) + { + Node *jlnode = (Node *) lfirst(jl); + RelOptInfo *thisrel; + + if (IsA(jlnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jlnode)->rtindex; + + thisrel = find_base_rel(root, varno); + } + else if (IsA(jlnode, List)) + { + /* Recurse to handle subproblem */ + thisrel = make_rel_from_joinlist(root, (List *) jlnode); + } + else + { + elog(ERROR, "unrecognized joinlist node type: %d", + (int) nodeTag(jlnode)); + thisrel = NULL; /* keep compiler quiet */ + } + + initial_rels = lappend(initial_rels, thisrel); + } + + if (levels_needed == 1) + { + /* + * Single joinlist node, so we're done. + */ + return (RelOptInfo *) linitial(initial_rels); + } + else + { + /* + * Consider the different orders in which we could join the rels, + * using a plugin, GEQO, or the regular join search code. + * + * We put the initial_rels list into a PlannerInfo field because + * has_legal_joinclause() needs to look at it (ugly :-(). + */ + root->initial_rels = initial_rels; + + if (join_search_hook) + return (*join_search_hook) (root, levels_needed, initial_rels); + else if (enable_geqo && levels_needed >= geqo_threshold) + return geqo(root, levels_needed, initial_rels); + else + return standard_join_search(root, levels_needed, initial_rels); + } +} + +/* + * standard_join_search + * Find possible joinpaths for a query by successively finding ways + * to join component relations into join relations. + * + * 'levels_needed' is the number of iterations needed, ie, the number of + * independent jointree items in the query. This is > 1. + * + * 'initial_rels' is a list of RelOptInfo nodes for each independent + * jointree item. These are the components to be joined together. + * Note that levels_needed == list_length(initial_rels). + * + * Returns the final level of join relations, i.e., the relation that is + * the result of joining all the original relations together. + * At least one implementation path must be provided for this relation and + * all required sub-relations. + * + * To support loadable plugins that modify planner behavior by changing the + * join searching algorithm, we provide a hook variable that lets a plugin + * replace or supplement this function. Any such hook must return the same + * final join relation as the standard code would, but it might have a + * different set of implementation paths attached, and only the sub-joinrels + * needed for these paths need have been instantiated. + * + * Note to plugin authors: the functions invoked during standard_join_search() + * modify root->join_rel_list and root->join_rel_hash. If you want to do more + * than one join-order search, you'll probably need to save and restore the + * original states of those data structures. See geqo_eval() for an example. + */ +RelOptInfo * +standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) +{ + int lev; + RelOptInfo *rel; + + /* + * This function cannot be invoked recursively within any one planning + * problem, so join_rel_level[] can't be in use already. + */ + Assert(root->join_rel_level == NULL); + + /* + * We employ a simple "dynamic programming" algorithm: we first find all + * ways to build joins of two jointree items, then all ways to build joins + * of three items (from two-item joins and single items), then four-item + * joins, and so on until we have considered all ways to join all the + * items into one rel. + * + * root->join_rel_level[j] is a list of all the j-item rels. Initially we + * set root->join_rel_level[1] to represent all the single-jointree-item + * relations. + */ + root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *)); + + root->join_rel_level[1] = initial_rels; + + for (lev = 2; lev <= levels_needed; lev++) + { + ListCell *lc; + + /* + * Determine all possible pairs of relations to be joined at this + * level, and build paths for making each one from every available + * pair of lower-level relations. + */ + join_search_one_level(root, lev); + + /* + * Run generate_partitionwise_join_paths() and + * generate_useful_gather_paths() for each just-processed joinrel. We + * could not do this earlier because both regular and partial paths + * can get added to a particular joinrel at multiple times within + * join_search_one_level. + * + * After that, we're done creating paths for the joinrel, so run + * set_cheapest(). + */ + foreach(lc, root->join_rel_level[lev]) + { + rel = (RelOptInfo *) lfirst(lc); + + /* Create paths for partitionwise joins. */ + generate_partitionwise_join_paths(root, rel); + + /* + * Except for the topmost scan/join rel, consider gathering + * partial paths. We'll do the same for the topmost scan/join rel + * once we know the final targetlist (see grouping_planner). + */ + if (!bms_equal(rel->relids, root->all_baserels)) + generate_useful_gather_paths(root, rel, false); + + /* Find and save the cheapest paths for this rel */ + set_cheapest(rel); + +#ifdef OPTIMIZER_DEBUG + debug_print_rel(root, rel); +#endif + } + } + + /* + * We should have a single rel at the final level. + */ + if (root->join_rel_level[levels_needed] == NIL) + elog(ERROR, "failed to build any %d-way joins", levels_needed); + Assert(list_length(root->join_rel_level[levels_needed]) == 1); + + rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]); + + root->join_rel_level = NULL; + + return rel; +} + +/***************************************************************************** + * PUSHING QUALS DOWN INTO SUBQUERIES + *****************************************************************************/ + +/* + * subquery_is_pushdown_safe - is a subquery safe for pushing down quals? + * + * subquery is the particular component query being checked. topquery + * is the top component of a set-operations tree (the same Query if no + * set-op is involved). + * + * Conditions checked here: + * + * 1. If the subquery has a LIMIT clause, we must not push down any quals, + * since that could change the set of rows returned. + * + * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push + * quals into it, because that could change the results. + * + * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it. + * This is because upper-level quals should semantically be evaluated only + * once per distinct row, not once per original row, and if the qual is + * volatile then extra evaluations could change the results. (This issue + * does not apply to other forms of aggregation such as GROUP BY, because + * when those are present we push into HAVING not WHERE, so that the quals + * are still applied after aggregation.) + * + * 4. If the subquery contains window functions, we cannot push volatile quals + * into it. The issue here is a bit different from DISTINCT: a volatile qual + * might succeed for some rows of a window partition and fail for others, + * thereby changing the partition contents and thus the window functions' + * results for rows that remain. + * + * 5. If the subquery contains any set-returning functions in its targetlist, + * we cannot push volatile quals into it. That would push them below the SRFs + * and thereby change the number of times they are evaluated. Also, a + * volatile qual could succeed for some SRF output rows and fail for others, + * a behavior that cannot occur if it's evaluated before SRF expansion. + * + * 6. If the subquery has nonempty grouping sets, we cannot push down any + * quals. The concern here is that a qual referencing a "constant" grouping + * column could get constant-folded, which would be improper because the value + * is potentially nullable by grouping-set expansion. This restriction could + * be removed if we had a parsetree representation that shows that such + * grouping columns are not really constant. (There are other ideas that + * could be used to relax this restriction, but that's the approach most + * likely to get taken in the future. Note that there's not much to be gained + * so long as subquery_planner can't move HAVING clauses to WHERE within such + * a subquery.) + * + * In addition, we make several checks on the subquery's output columns to see + * if it is safe to reference them in pushed-down quals. If output column k + * is found to be unsafe to reference, we set the reason for that inside + * safetyInfo->unsafeFlags[k], but we don't reject the subquery overall since + * column k might not be referenced by some/all quals. The unsafeFlags[] + * array will be consulted later by qual_is_pushdown_safe(). It's better to + * do it this way than to make the checks directly in qual_is_pushdown_safe(), + * because when the subquery involves set operations we have to check the + * output expressions in each arm of the set op. + * + * Note: pushing quals into a DISTINCT subquery is theoretically dubious: + * we're effectively assuming that the quals cannot distinguish values that + * the DISTINCT's equality operator sees as equal, yet there are many + * counterexamples to that assumption. However use of such a qual with a + * DISTINCT subquery would be unsafe anyway, since there's no guarantee which + * "equal" value will be chosen as the output value by the DISTINCT operation. + * So we don't worry too much about that. Another objection is that if the + * qual is expensive to evaluate, running it for each original row might cost + * more than we save by eliminating rows before the DISTINCT step. But it + * would be very hard to estimate that at this stage, and in practice pushdown + * seldom seems to make things worse, so we ignore that problem too. + * + * Note: likewise, pushing quals into a subquery with window functions is a + * bit dubious: the quals might remove some rows of a window partition while + * leaving others, causing changes in the window functions' results for the + * surviving rows. We insist that such a qual reference only partitioning + * columns, but again that only protects us if the qual does not distinguish + * values that the partitioning equality operator sees as equal. The risks + * here are perhaps larger than for DISTINCT, since no de-duplication of rows + * occurs and thus there is no theoretical problem with such a qual. But + * we'll do this anyway because the potential performance benefits are very + * large, and we've seen no field complaints about the longstanding comparable + * behavior with DISTINCT. + */ +static bool +subquery_is_pushdown_safe(Query *subquery, Query *topquery, + pushdown_safety_info *safetyInfo) +{ + SetOperationStmt *topop; + + /* Check point 1 */ + if (subquery->limitOffset != NULL || subquery->limitCount != NULL) + return false; + + /* Check point 6 */ + if (subquery->groupClause && subquery->groupingSets) + return false; + + /* Check points 3, 4, and 5 */ + if (subquery->distinctClause || + subquery->hasWindowFuncs || + subquery->hasTargetSRFs) + safetyInfo->unsafeVolatile = true; + + /* + * If we're at a leaf query, check for unsafe expressions in its target + * list, and mark any reasons why they're unsafe in unsafeFlags[]. + * (Non-leaf nodes in setop trees have only simple Vars in their tlists, + * so no need to check them.) + */ + if (subquery->setOperations == NULL) + check_output_expressions(subquery, safetyInfo); + + /* Are we at top level, or looking at a setop component? */ + if (subquery == topquery) + { + /* Top level, so check any component queries */ + if (subquery->setOperations != NULL) + if (!recurse_pushdown_safe(subquery->setOperations, topquery, + safetyInfo)) + return false; + } + else + { + /* Setop component must not have more components (too weird) */ + if (subquery->setOperations != NULL) + return false; + /* Check whether setop component output types match top level */ + topop = castNode(SetOperationStmt, topquery->setOperations); + Assert(topop); + compare_tlist_datatypes(subquery->targetList, + topop->colTypes, + safetyInfo); + } + return true; +} + +/* + * Helper routine to recurse through setOperations tree + */ +static bool +recurse_pushdown_safe(Node *setOp, Query *topquery, + pushdown_safety_info *safetyInfo) +{ + if (IsA(setOp, RangeTblRef)) + { + RangeTblRef *rtr = (RangeTblRef *) setOp; + RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable); + Query *subquery = rte->subquery; + + Assert(subquery != NULL); + return subquery_is_pushdown_safe(subquery, topquery, safetyInfo); + } + else if (IsA(setOp, SetOperationStmt)) + { + SetOperationStmt *op = (SetOperationStmt *) setOp; + + /* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */ + if (op->op == SETOP_EXCEPT) + return false; + /* Else recurse */ + if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo)) + return false; + if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo)) + return false; + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(setOp)); + } + return true; +} + +/* + * check_output_expressions - check subquery's output expressions for safety + * + * There are several cases in which it's unsafe to push down an upper-level + * qual if it references a particular output column of a subquery. We check + * each output column of the subquery and set flags in unsafeFlags[k] when we + * see that column is unsafe for a pushed-down qual to reference. The + * conditions checked here are: + * + * 1. We must not push down any quals that refer to subselect outputs that + * return sets, else we'd introduce functions-returning-sets into the + * subquery's WHERE/HAVING quals. + * + * 2. We must not push down any quals that refer to subselect outputs that + * contain volatile functions, for fear of introducing strange results due + * to multiple evaluation of a volatile function. + * + * 3. If the subquery uses DISTINCT ON, we must not push down any quals that + * refer to non-DISTINCT output columns, because that could change the set + * of rows returned. (This condition is vacuous for DISTINCT, because then + * there are no non-DISTINCT output columns, so we needn't check. Note that + * subquery_is_pushdown_safe already reported that we can't use volatile + * quals if there's DISTINCT or DISTINCT ON.) + * + * 4. If the subquery has any window functions, we must not push down quals + * that reference any output columns that are not listed in all the subquery's + * window PARTITION BY clauses. We can push down quals that use only + * partitioning columns because they should succeed or fail identically for + * every row of any one window partition, and totally excluding some + * partitions will not change a window function's results for remaining + * partitions. (Again, this also requires nonvolatile quals, but + * subquery_is_pushdown_safe handles that.). Subquery columns marked as + * unsafe for this reason can still have WindowClause run conditions pushed + * down. + */ +static void +check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo) +{ + ListCell *lc; + + foreach(lc, subquery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + + /* Functions returning sets are unsafe (point 1) */ + if (subquery->hasTargetSRFs && + (safetyInfo->unsafeFlags[tle->resno] & + UNSAFE_HAS_SET_FUNC) == 0 && + expression_returns_set((Node *) tle->expr)) + { + safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_HAS_SET_FUNC; + continue; + } + + /* Volatile functions are unsafe (point 2) */ + if ((safetyInfo->unsafeFlags[tle->resno] & + UNSAFE_HAS_VOLATILE_FUNC) == 0 && + contain_volatile_functions((Node *) tle->expr)) + { + safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_HAS_VOLATILE_FUNC; + continue; + } + + /* If subquery uses DISTINCT ON, check point 3 */ + if (subquery->hasDistinctOn && + (safetyInfo->unsafeFlags[tle->resno] & + UNSAFE_NOTIN_DISTINCTON_CLAUSE) == 0 && + !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) + { + /* non-DISTINCT column, so mark it unsafe */ + safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_NOTIN_DISTINCTON_CLAUSE; + continue; + } + + /* If subquery uses window functions, check point 4 */ + if (subquery->hasWindowFuncs && + (safetyInfo->unsafeFlags[tle->resno] & + UNSAFE_NOTIN_DISTINCTON_CLAUSE) == 0 && + !targetIsInAllPartitionLists(tle, subquery)) + { + /* not present in all PARTITION BY clauses, so mark it unsafe */ + safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_NOTIN_PARTITIONBY_CLAUSE; + continue; + } + } +} + +/* + * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can + * push quals into each component query, but the quals can only reference + * subquery columns that suffer no type coercions in the set operation. + * Otherwise there are possible semantic gotchas. So, we check the + * component queries to see if any of them have output types different from + * the top-level setop outputs. We set the UNSAFE_TYPE_MISMATCH bit in + * unsafeFlags[k] if column k has different type in any component. + * + * We don't have to care about typmods here: the only allowed difference + * between set-op input and output typmods is input is a specific typmod + * and output is -1, and that does not require a coercion. + * + * tlist is a subquery tlist. + * colTypes is an OID list of the top-level setop's output column types. + * safetyInfo is the pushdown_safety_info to set unsafeFlags[] for. + */ +static void +compare_tlist_datatypes(List *tlist, List *colTypes, + pushdown_safety_info *safetyInfo) +{ + ListCell *l; + ListCell *colType = list_head(colTypes); + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + if (colType == NULL) + elog(ERROR, "wrong number of tlist entries"); + if (exprType((Node *) tle->expr) != lfirst_oid(colType)) + safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_TYPE_MISMATCH; + colType = lnext(colTypes, colType); + } + if (colType != NULL) + elog(ERROR, "wrong number of tlist entries"); +} + +/* + * targetIsInAllPartitionLists + * True if the TargetEntry is listed in the PARTITION BY clause + * of every window defined in the query. + * + * It would be safe to ignore windows not actually used by any window + * function, but it's not easy to get that info at this stage; and it's + * unlikely to be useful to spend any extra cycles getting it, since + * unreferenced window definitions are probably infrequent in practice. + */ +static bool +targetIsInAllPartitionLists(TargetEntry *tle, Query *query) +{ + ListCell *lc; + + foreach(lc, query->windowClause) + { + WindowClause *wc = (WindowClause *) lfirst(lc); + + if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause)) + return false; + } + return true; +} + +/* + * qual_is_pushdown_safe - is a particular rinfo safe to push down? + * + * rinfo is a restriction clause applying to the given subquery (whose RTE + * has index rti in the parent query). + * + * Conditions checked here: + * + * 1. rinfo's clause must not contain any SubPlans (mainly because it's + * unclear that it will work correctly: SubLinks will already have been + * transformed into SubPlans in the qual, but not in the subquery). Note that + * SubLinks that transform to initplans are safe, and will be accepted here + * because what we'll see in the qual is just a Param referencing the initplan + * output. + * + * 2. If unsafeVolatile is set, rinfo's clause must not contain any volatile + * functions. + * + * 3. If unsafeLeaky is set, rinfo's clause must not contain any leaky + * functions that are passed Var nodes, and therefore might reveal values from + * the subquery as side effects. + * + * 4. rinfo's clause must not refer to the whole-row output of the subquery + * (since there is no easy way to name that within the subquery itself). + * + * 5. rinfo's clause must not refer to any subquery output columns that were + * found to be unsafe to reference by subquery_is_pushdown_safe(). + */ +static pushdown_safe_type +qual_is_pushdown_safe(Query *subquery, Index rti, RestrictInfo *rinfo, + pushdown_safety_info *safetyInfo) +{ + pushdown_safe_type safe = PUSHDOWN_SAFE; + Node *qual = (Node *) rinfo->clause; + List *vars; + ListCell *vl; + + /* Refuse subselects (point 1) */ + if (contain_subplans(qual)) + return PUSHDOWN_UNSAFE; + + /* Refuse volatile quals if we found they'd be unsafe (point 2) */ + if (safetyInfo->unsafeVolatile && + contain_volatile_functions((Node *) rinfo)) + return PUSHDOWN_UNSAFE; + + /* Refuse leaky quals if told to (point 3) */ + if (safetyInfo->unsafeLeaky && + contain_leaked_vars(qual)) + return PUSHDOWN_UNSAFE; + + /* + * It would be unsafe to push down window function calls, but at least for + * the moment we could never see any in a qual anyhow. (The same applies + * to aggregates, which we check for in pull_var_clause below.) + */ + Assert(!contain_window_function(qual)); + + /* + * Examine all Vars used in clause. Since it's a restriction clause, all + * such Vars must refer to subselect output columns ... unless this is + * part of a LATERAL subquery, in which case there could be lateral + * references. + */ + vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS); + foreach(vl, vars) + { + Var *var = (Var *) lfirst(vl); + + /* + * XXX Punt if we find any PlaceHolderVars in the restriction clause. + * It's not clear whether a PHV could safely be pushed down, and even + * less clear whether such a situation could arise in any cases of + * practical interest anyway. So for the moment, just refuse to push + * down. + */ + if (!IsA(var, Var)) + { + safe = PUSHDOWN_UNSAFE; + break; + } + + /* + * Punt if we find any lateral references. It would be safe to push + * these down, but we'd have to convert them into outer references, + * which subquery_push_qual lacks the infrastructure to do. The case + * arises so seldom that it doesn't seem worth working hard on. + */ + if (var->varno != rti) + { + safe = PUSHDOWN_UNSAFE; + break; + } + + /* Subqueries have no system columns */ + Assert(var->varattno >= 0); + + /* Check point 4 */ + if (var->varattno == 0) + { + safe = PUSHDOWN_UNSAFE; + break; + } + + /* Check point 5 */ + if (safetyInfo->unsafeFlags[var->varattno] != 0) + { + if (safetyInfo->unsafeFlags[var->varattno] & + (UNSAFE_HAS_VOLATILE_FUNC | UNSAFE_HAS_SET_FUNC | + UNSAFE_NOTIN_DISTINCTON_CLAUSE | UNSAFE_TYPE_MISMATCH)) + { + safe = PUSHDOWN_UNSAFE; + break; + } + else + { + /* UNSAFE_NOTIN_PARTITIONBY_CLAUSE is ok for run conditions */ + safe = PUSHDOWN_WINDOWCLAUSE_RUNCOND; + /* don't break, we might find another Var that's unsafe */ + } + } + } + + list_free(vars); + + return safe; +} + +/* + * subquery_push_qual - push down a qual that we have determined is safe + */ +static void +subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) +{ + if (subquery->setOperations != NULL) + { + /* Recurse to push it separately to each component query */ + recurse_push_qual(subquery->setOperations, subquery, + rte, rti, qual); + } + else + { + /* + * We need to replace Vars in the qual (which must refer to outputs of + * the subquery) with copies of the subquery's targetlist expressions. + * Note that at this point, any uplevel Vars in the qual should have + * been replaced with Params, so they need no work. + * + * This step also ensures that when we are pushing into a setop tree, + * each component query gets its own copy of the qual. + */ + qual = ReplaceVarsFromTargetList(qual, rti, 0, rte, + subquery->targetList, + REPLACEVARS_REPORT_ERROR, 0, + &subquery->hasSubLinks); + + /* + * Now attach the qual to the proper place: normally WHERE, but if the + * subquery uses grouping or aggregation, put it in HAVING (since the + * qual really refers to the group-result rows). + */ + if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual) + subquery->havingQual = make_and_qual(subquery->havingQual, qual); + else + subquery->jointree->quals = + make_and_qual(subquery->jointree->quals, qual); + + /* + * We need not change the subquery's hasAggs or hasSubLinks flags, + * since we can't be pushing down any aggregates that weren't there + * before, and we don't push down subselects at all. + */ + } +} + +/* + * Helper routine to recurse through setOperations tree + */ +static void +recurse_push_qual(Node *setOp, Query *topquery, + RangeTblEntry *rte, Index rti, Node *qual) +{ + if (IsA(setOp, RangeTblRef)) + { + RangeTblRef *rtr = (RangeTblRef *) setOp; + RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable); + Query *subquery = subrte->subquery; + + Assert(subquery != NULL); + subquery_push_qual(subquery, rte, rti, qual); + } + else if (IsA(setOp, SetOperationStmt)) + { + SetOperationStmt *op = (SetOperationStmt *) setOp; + + recurse_push_qual(op->larg, topquery, rte, rti, qual); + recurse_push_qual(op->rarg, topquery, rte, rti, qual); + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(setOp)); + } +} + +/***************************************************************************** + * SIMPLIFYING SUBQUERY TARGETLISTS + *****************************************************************************/ + +/* + * remove_unused_subquery_outputs + * Remove subquery targetlist items we don't need + * + * It's possible, even likely, that the upper query does not read all the + * output columns of the subquery. We can remove any such outputs that are + * not needed by the subquery itself (e.g., as sort/group columns) and do not + * affect semantics otherwise (e.g., volatile functions can't be removed). + * This is useful not only because we might be able to remove expensive-to- + * compute expressions, but because deletion of output columns might allow + * optimizations such as join removal to occur within the subquery. + * + * extra_used_attrs can be passed as non-NULL to mark any columns (offset by + * FirstLowInvalidHeapAttributeNumber) that we should not remove. This + * parameter is modifed by the function, so callers must make a copy if they + * need to use the passed in Bitmapset after calling this function. + * + * To avoid affecting column numbering in the targetlist, we don't physically + * remove unused tlist entries, but rather replace their expressions with NULL + * constants. This is implemented by modifying subquery->targetList. + */ +static void +remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel, + Bitmapset *extra_used_attrs) +{ + Bitmapset *attrs_used; + ListCell *lc; + + /* + * Just point directly to extra_used_attrs. No need to bms_copy as none of + * the current callers use the Bitmapset after calling this function. + */ + attrs_used = extra_used_attrs; + + /* + * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we + * could update all the child SELECTs' tlists, but it seems not worth the + * trouble presently. + */ + if (subquery->setOperations) + return; + + /* + * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our + * time: all its output columns must be used in the distinctClause. + */ + if (subquery->distinctClause && !subquery->hasDistinctOn) + return; + + /* + * Collect a bitmap of all the output column numbers used by the upper + * query. + * + * Add all the attributes needed for joins or final output. Note: we must + * look at rel's targetlist, not the attr_needed data, because attr_needed + * isn't computed for inheritance child rels, cf set_append_rel_size(). + * (XXX might be worth changing that sometime.) + */ + pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &attrs_used); + + /* Add all the attributes used by un-pushed-down restriction clauses. */ + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used); + } + + /* + * If there's a whole-row reference to the subquery, we can't remove + * anything. + */ + if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used)) + return; + + /* + * Run through the tlist and zap entries we don't need. It's okay to + * modify the tlist items in-place because set_subquery_pathlist made a + * copy of the subquery. + */ + foreach(lc, subquery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + Node *texpr = (Node *) tle->expr; + + /* + * If it has a sortgroupref number, it's used in some sort/group + * clause so we'd better not remove it. Also, don't remove any + * resjunk columns, since their reason for being has nothing to do + * with anybody reading the subquery's output. (It's likely that + * resjunk columns in a sub-SELECT would always have ressortgroupref + * set, but even if they don't, it seems imprudent to remove them.) + */ + if (tle->ressortgroupref || tle->resjunk) + continue; + + /* + * If it's used by the upper query, we can't remove it. + */ + if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber, + attrs_used)) + continue; + + /* + * If it contains a set-returning function, we can't remove it since + * that could change the number of rows returned by the subquery. + */ + if (subquery->hasTargetSRFs && + expression_returns_set(texpr)) + continue; + + /* + * If it contains volatile functions, we daren't remove it for fear + * that the user is expecting their side-effects to happen. + */ + if (contain_volatile_functions(texpr)) + continue; + + /* + * OK, we don't need it. Replace the expression with a NULL constant. + * Preserve the exposed type of the expression, in case something + * looks at the rowtype of the subquery's result. + */ + tle->expr = (Expr *) makeNullConst(exprType(texpr), + exprTypmod(texpr), + exprCollation(texpr)); + } +} + +/* + * create_partial_bitmap_paths + * Build partial bitmap heap path for the relation + */ +void +create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, + Path *bitmapqual) +{ + int parallel_workers; + double pages_fetched; + + /* Compute heap pages for bitmap heap scan */ + pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0, + NULL, NULL); + + parallel_workers = compute_parallel_worker(rel, pages_fetched, -1, + max_parallel_workers_per_gather); + + if (parallel_workers <= 0) + return; + + add_partial_path(rel, (Path *) create_bitmap_heap_path(root, rel, + bitmapqual, rel->lateral_relids, 1.0, parallel_workers)); +} + +/* + * Compute the number of parallel workers that should be used to scan a + * relation. We compute the parallel workers based on the size of the heap to + * be scanned and the size of the index to be scanned, then choose a minimum + * of those. + * + * "heap_pages" is the number of pages from the table that we expect to scan, or + * -1 if we don't expect to scan any. + * + * "index_pages" is the number of pages from the index that we expect to scan, or + * -1 if we don't expect to scan any. + * + * "max_workers" is caller's limit on the number of workers. This typically + * comes from a GUC. + */ +int +compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages, + int max_workers) +{ + int parallel_workers = 0; + + /* + * If the user has set the parallel_workers reloption, use that; otherwise + * select a default number of workers. + */ + if (rel->rel_parallel_workers != -1) + parallel_workers = rel->rel_parallel_workers; + else + { + /* + * If the number of pages being scanned is insufficient to justify a + * parallel scan, just return zero ... unless it's an inheritance + * child. In that case, we want to generate a parallel path here + * anyway. It might not be worthwhile just for this relation, but + * when combined with all of its inheritance siblings it may well pay + * off. + */ + if (rel->reloptkind == RELOPT_BASEREL && + ((heap_pages >= 0 && heap_pages < min_parallel_table_scan_size) || + (index_pages >= 0 && index_pages < min_parallel_index_scan_size))) + return 0; + + if (heap_pages >= 0) + { + int heap_parallel_threshold; + int heap_parallel_workers = 1; + + /* + * Select the number of workers based on the log of the size of + * the relation. This probably needs to be a good deal more + * sophisticated, but we need something here for now. Note that + * the upper limit of the min_parallel_table_scan_size GUC is + * chosen to prevent overflow here. + */ + heap_parallel_threshold = Max(min_parallel_table_scan_size, 1); + while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3)) + { + heap_parallel_workers++; + heap_parallel_threshold *= 3; + if (heap_parallel_threshold > INT_MAX / 3) + break; /* avoid overflow */ + } + + parallel_workers = heap_parallel_workers; + } + + if (index_pages >= 0) + { + int index_parallel_workers = 1; + int index_parallel_threshold; + + /* same calculation as for heap_pages above */ + index_parallel_threshold = Max(min_parallel_index_scan_size, 1); + while (index_pages >= (BlockNumber) (index_parallel_threshold * 3)) + { + index_parallel_workers++; + index_parallel_threshold *= 3; + if (index_parallel_threshold > INT_MAX / 3) + break; /* avoid overflow */ + } + + if (parallel_workers > 0) + parallel_workers = Min(parallel_workers, index_parallel_workers); + else + parallel_workers = index_parallel_workers; + } + } + + /* In no case use more than caller supplied maximum number of workers */ + parallel_workers = Min(parallel_workers, max_workers); + + return parallel_workers; +} + +/* + * generate_partitionwise_join_paths + * Create paths representing partitionwise join for given partitioned + * join relation. + * + * This must not be called until after we are done adding paths for all + * child-joins. Otherwise, add_path might delete a path to which some path + * generated here has a reference. + */ +void +generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel) +{ + List *live_children = NIL; + int cnt_parts; + int num_parts; + RelOptInfo **part_rels; + + /* Handle only join relations here. */ + if (!IS_JOIN_REL(rel)) + return; + + /* We've nothing to do if the relation is not partitioned. */ + if (!IS_PARTITIONED_REL(rel)) + return; + + /* The relation should have consider_partitionwise_join set. */ + Assert(rel->consider_partitionwise_join); + + /* Guard against stack overflow due to overly deep partition hierarchy. */ + check_stack_depth(); + + num_parts = rel->nparts; + part_rels = rel->part_rels; + + /* Collect non-dummy child-joins. */ + for (cnt_parts = 0; cnt_parts < num_parts; cnt_parts++) + { + RelOptInfo *child_rel = part_rels[cnt_parts]; + + /* If it's been pruned entirely, it's certainly dummy. */ + if (child_rel == NULL) + continue; + + /* Make partitionwise join paths for this partitioned child-join. */ + generate_partitionwise_join_paths(root, child_rel); + + /* If we failed to make any path for this child, we must give up. */ + if (child_rel->pathlist == NIL) + { + /* + * Mark the parent joinrel as unpartitioned so that later + * functions treat it correctly. + */ + rel->nparts = 0; + return; + } + + /* Else, identify the cheapest path for it. */ + set_cheapest(child_rel); + + /* Dummy children need not be scanned, so ignore those. */ + if (IS_DUMMY_REL(child_rel)) + continue; + +#ifdef OPTIMIZER_DEBUG + debug_print_rel(root, child_rel); +#endif + + live_children = lappend(live_children, child_rel); + } + + /* If all child-joins are dummy, parent join is also dummy. */ + if (!live_children) + { + mark_dummy_rel(rel); + return; + } + + /* Build additional paths for this rel from child-join paths. */ + add_paths_to_append_rel(root, rel, live_children); + list_free(live_children); +} + + +/***************************************************************************** + * DEBUG SUPPORT + *****************************************************************************/ + +#ifdef OPTIMIZER_DEBUG + +static void +print_relids(PlannerInfo *root, Relids relids) +{ + int x; + bool first = true; + + x = -1; + while ((x = bms_next_member(relids, x)) >= 0) + { + if (!first) + printf(" "); + if (x < root->simple_rel_array_size && + root->simple_rte_array[x]) + printf("%s", root->simple_rte_array[x]->eref->aliasname); + else + printf("%d", x); + first = false; + } +} + +static void +print_restrictclauses(PlannerInfo *root, List *clauses) +{ + ListCell *l; + + foreach(l, clauses) + { + RestrictInfo *c = lfirst(l); + + print_expr((Node *) c->clause, root->parse->rtable); + if (lnext(clauses, l)) + printf(", "); + } +} + +static void +print_path(PlannerInfo *root, Path *path, int indent) +{ + const char *ptype; + bool join = false; + Path *subpath = NULL; + int i; + + switch (nodeTag(path)) + { + case T_Path: + switch (path->pathtype) + { + case T_SeqScan: + ptype = "SeqScan"; + break; + case T_SampleScan: + ptype = "SampleScan"; + break; + case T_FunctionScan: + ptype = "FunctionScan"; + break; + case T_TableFuncScan: + ptype = "TableFuncScan"; + break; + case T_ValuesScan: + ptype = "ValuesScan"; + break; + case T_CteScan: + ptype = "CteScan"; + break; + case T_NamedTuplestoreScan: + ptype = "NamedTuplestoreScan"; + break; + case T_Result: + ptype = "Result"; + break; + case T_WorkTableScan: + ptype = "WorkTableScan"; + break; + default: + ptype = "???Path"; + break; + } + break; + case T_IndexPath: + ptype = "IdxScan"; + break; + case T_BitmapHeapPath: + ptype = "BitmapHeapScan"; + break; + case T_BitmapAndPath: + ptype = "BitmapAndPath"; + break; + case T_BitmapOrPath: + ptype = "BitmapOrPath"; + break; + case T_TidPath: + ptype = "TidScan"; + break; + case T_TidRangePath: + ptype = "TidRangePath"; + break; + case T_SubqueryScanPath: + ptype = "SubqueryScan"; + break; + case T_ForeignPath: + ptype = "ForeignScan"; + break; + case T_CustomPath: + ptype = "CustomScan"; + break; + case T_NestPath: + ptype = "NestLoop"; + join = true; + break; + case T_MergePath: + ptype = "MergeJoin"; + join = true; + break; + case T_HashPath: + ptype = "HashJoin"; + join = true; + break; + case T_AppendPath: + ptype = "Append"; + break; + case T_MergeAppendPath: + ptype = "MergeAppend"; + break; + case T_GroupResultPath: + ptype = "GroupResult"; + break; + case T_MaterialPath: + ptype = "Material"; + subpath = ((MaterialPath *) path)->subpath; + break; + case T_MemoizePath: + ptype = "Memoize"; + subpath = ((MemoizePath *) path)->subpath; + break; + case T_UniquePath: + ptype = "Unique"; + subpath = ((UniquePath *) path)->subpath; + break; + case T_GatherPath: + ptype = "Gather"; + subpath = ((GatherPath *) path)->subpath; + break; + case T_GatherMergePath: + ptype = "GatherMerge"; + subpath = ((GatherMergePath *) path)->subpath; + break; + case T_ProjectionPath: + ptype = "Projection"; + subpath = ((ProjectionPath *) path)->subpath; + break; + case T_ProjectSetPath: + ptype = "ProjectSet"; + subpath = ((ProjectSetPath *) path)->subpath; + break; + case T_SortPath: + ptype = "Sort"; + subpath = ((SortPath *) path)->subpath; + break; + case T_IncrementalSortPath: + ptype = "IncrementalSort"; + subpath = ((SortPath *) path)->subpath; + break; + case T_GroupPath: + ptype = "Group"; + subpath = ((GroupPath *) path)->subpath; + break; + case T_UpperUniquePath: + ptype = "UpperUnique"; + subpath = ((UpperUniquePath *) path)->subpath; + break; + case T_AggPath: + ptype = "Agg"; + subpath = ((AggPath *) path)->subpath; + break; + case T_GroupingSetsPath: + ptype = "GroupingSets"; + subpath = ((GroupingSetsPath *) path)->subpath; + break; + case T_MinMaxAggPath: + ptype = "MinMaxAgg"; + break; + case T_WindowAggPath: + ptype = "WindowAgg"; + subpath = ((WindowAggPath *) path)->subpath; + break; + case T_SetOpPath: + ptype = "SetOp"; + subpath = ((SetOpPath *) path)->subpath; + break; + case T_RecursiveUnionPath: + ptype = "RecursiveUnion"; + break; + case T_LockRowsPath: + ptype = "LockRows"; + subpath = ((LockRowsPath *) path)->subpath; + break; + case T_ModifyTablePath: + ptype = "ModifyTable"; + break; + case T_LimitPath: + ptype = "Limit"; + subpath = ((LimitPath *) path)->subpath; + break; + default: + ptype = "???Path"; + break; + } + + for (i = 0; i < indent; i++) + printf("\t"); + printf("%s", ptype); + + if (path->parent) + { + printf("("); + print_relids(root, path->parent->relids); + printf(")"); + } + if (path->param_info) + { + printf(" required_outer ("); + print_relids(root, path->param_info->ppi_req_outer); + printf(")"); + } + printf(" rows=%.0f cost=%.2f..%.2f\n", + path->rows, path->startup_cost, path->total_cost); + + if (path->pathkeys) + { + for (i = 0; i < indent; i++) + printf("\t"); + printf(" pathkeys: "); + print_pathkeys(path->pathkeys, root->parse->rtable); + } + + if (join) + { + JoinPath *jp = (JoinPath *) path; + + for (i = 0; i < indent; i++) + printf("\t"); + printf(" clauses: "); + print_restrictclauses(root, jp->joinrestrictinfo); + printf("\n"); + + if (IsA(path, MergePath)) + { + MergePath *mp = (MergePath *) path; + + for (i = 0; i < indent; i++) + printf("\t"); + printf(" sortouter=%d sortinner=%d materializeinner=%d\n", + ((mp->outersortkeys) ? 1 : 0), + ((mp->innersortkeys) ? 1 : 0), + ((mp->materialize_inner) ? 1 : 0)); + } + + print_path(root, jp->outerjoinpath, indent + 1); + print_path(root, jp->innerjoinpath, indent + 1); + } + + if (subpath) + print_path(root, subpath, indent + 1); +} + +void +debug_print_rel(PlannerInfo *root, RelOptInfo *rel) +{ + ListCell *l; + + printf("RELOPTINFO ("); + print_relids(root, rel->relids); + printf("): rows=%.0f width=%d\n", rel->rows, rel->reltarget->width); + + if (rel->baserestrictinfo) + { + printf("\tbaserestrictinfo: "); + print_restrictclauses(root, rel->baserestrictinfo); + printf("\n"); + } + + if (rel->joininfo) + { + printf("\tjoininfo: "); + print_restrictclauses(root, rel->joininfo); + printf("\n"); + } + + printf("\tpath list:\n"); + foreach(l, rel->pathlist) + print_path(root, lfirst(l), 1); + if (rel->cheapest_parameterized_paths) + { + printf("\n\tcheapest parameterized paths:\n"); + foreach(l, rel->cheapest_parameterized_paths) + print_path(root, lfirst(l), 1); + } + if (rel->cheapest_startup_path) + { + printf("\n\tcheapest startup path:\n"); + print_path(root, rel->cheapest_startup_path, 1); + } + if (rel->cheapest_total_path) + { + printf("\n\tcheapest total path:\n"); + print_path(root, rel->cheapest_total_path, 1); + } + printf("\n"); + fflush(stdout); +} + +#endif /* OPTIMIZER_DEBUG */ |