diff options
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r-- | src/backend/optimizer/path/Makefile | 26 | ||||
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 4216 | ||||
-rw-r--r-- | src/backend/optimizer/path/clausesel.c | 1000 | ||||
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 6176 | ||||
-rw-r--r-- | src/backend/optimizer/path/equivclass.c | 3255 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 3826 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 2304 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinrels.c | 1782 | ||||
-rw-r--r-- | src/backend/optimizer/path/pathkeys.c | 1917 | ||||
-rw-r--r-- | src/backend/optimizer/path/tidpath.c | 528 |
10 files changed, 25030 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile new file mode 100644 index 0000000..1e199ff --- /dev/null +++ b/src/backend/optimizer/path/Makefile @@ -0,0 +1,26 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for optimizer/path +# +# IDENTIFICATION +# src/backend/optimizer/path/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/optimizer/path +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = \ + allpaths.o \ + clausesel.o \ + costsize.o \ + equivclass.o \ + indxpath.o \ + joinpath.o \ + joinrels.o \ + pathkeys.o \ + tidpath.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c new file mode 100644 index 0000000..f3e7018 --- /dev/null +++ b/src/backend/optimizer/path/allpaths.c @@ -0,0 +1,4216 @@ +/*------------------------------------------------------------------------- + * + * allpaths.c + * Routines to find possible search paths for processing a query + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/path/allpaths.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <limits.h> +#include <math.h> + +#include "access/sysattr.h" +#include "access/tsmapi.h" +#include "catalog/pg_class.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_proc.h" +#include "foreign/fdwapi.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#ifdef OPTIMIZER_DEBUG +#include "nodes/print.h" +#endif +#include "optimizer/appendinfo.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/geqo.h" +#include "optimizer/inherit.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/plancat.h" +#include "optimizer/planner.h" +#include "optimizer/restrictinfo.h" +#include "optimizer/tlist.h" +#include "parser/parse_clause.h" +#include "parser/parsetree.h" +#include "partitioning/partbounds.h" +#include "partitioning/partprune.h" +#include "rewrite/rewriteManip.h" +#include "utils/lsyscache.h" + + +/* results of subquery_is_pushdown_safe */ +typedef struct pushdown_safety_info +{ + bool *unsafeColumns; /* which output columns are unsafe to use */ + bool unsafeVolatile; /* don't push down volatile quals */ + bool unsafeLeaky; /* don't push down leaky quals */ +} pushdown_safety_info; + +/* These parameters are set by GUC */ +bool enable_geqo = false; /* just in case GUC doesn't set it */ +int geqo_threshold; +int min_parallel_table_scan_size; +int min_parallel_index_scan_size; + +/* Hook for plugins to get control in set_rel_pathlist() */ +set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL; + +/* Hook for plugins to replace standard_join_search() */ +join_search_hook_type join_search_hook = NULL; + + +static void set_base_rel_consider_startup(PlannerInfo *root); +static void set_base_rel_sizes(PlannerInfo *root); +static void set_base_rel_pathlists(PlannerInfo *root); +static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel); +static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels, + List *all_child_pathkeys); +static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, + RelOptInfo *rel, + Relids required_outer); +static void accumulate_append_subpath(Path *path, + List **subpaths, + List **special_subpaths); +static Path *get_singleton_append_subpath(Path *path); +static void set_dummy_rel_pathlist(RelOptInfo *rel); +static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); +static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_result_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); +static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, + pushdown_safety_info *safetyInfo); +static bool recurse_pushdown_safe(Node *setOp, Query *topquery, + pushdown_safety_info *safetyInfo); +static void check_output_expressions(Query *subquery, + pushdown_safety_info *safetyInfo); +static void compare_tlist_datatypes(List *tlist, List *colTypes, + pushdown_safety_info *safetyInfo); +static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query); +static bool qual_is_pushdown_safe(Query *subquery, Index rti, + RestrictInfo *rinfo, + pushdown_safety_info *safetyInfo); +static void subquery_push_qual(Query *subquery, + RangeTblEntry *rte, Index rti, Node *qual); +static void recurse_push_qual(Node *setOp, Query *topquery, + RangeTblEntry *rte, Index rti, Node *qual); +static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); + + +/* + * make_one_rel + * Finds all possible access paths for executing a query, returning a + * single rel that represents the join of all base rels in the query. + */ +RelOptInfo * +make_one_rel(PlannerInfo *root, List *joinlist) +{ + RelOptInfo *rel; + Index rti; + double total_pages; + + /* + * Construct the all_baserels Relids set. + */ + root->all_baserels = NULL; + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (brel == NULL) + continue; + + Assert(brel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (brel->reloptkind != RELOPT_BASEREL) + continue; + + root->all_baserels = bms_add_member(root->all_baserels, brel->relid); + } + + /* Mark base rels as to whether we care about fast-start plans */ + set_base_rel_consider_startup(root); + + /* + * Compute size estimates and consider_parallel flags for each base rel. + */ + set_base_rel_sizes(root); + + /* + * We should now have size estimates for every actual table involved in + * the query, and we also know which if any have been deleted from the + * query by join removal, pruned by partition pruning, or eliminated by + * constraint exclusion. So we can now compute total_table_pages. + * + * Note that appendrels are not double-counted here, even though we don't + * bother to distinguish RelOptInfos for appendrel parents, because the + * parents will have pages = 0. + * + * XXX if a table is self-joined, we will count it once per appearance, + * which perhaps is the wrong thing ... but that's not completely clear, + * and detecting self-joins here is difficult, so ignore it for now. + */ + total_pages = 0; + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + if (brel == NULL) + continue; + + Assert(brel->relid == rti); /* sanity check on array */ + + if (IS_DUMMY_REL(brel)) + continue; + + if (IS_SIMPLE_REL(brel)) + total_pages += (double) brel->pages; + } + root->total_table_pages = total_pages; + + /* + * Generate access paths for each base rel. + */ + set_base_rel_pathlists(root); + + /* + * Generate access paths for the entire join tree. + */ + rel = make_rel_from_joinlist(root, joinlist); + + /* + * The result should join all and only the query's base rels. + */ + Assert(bms_equal(rel->relids, root->all_baserels)); + + return rel; +} + +/* + * set_base_rel_consider_startup + * Set the consider_[param_]startup flags for each base-relation entry. + * + * For the moment, we only deal with consider_param_startup here; because the + * logic for consider_startup is pretty trivial and is the same for every base + * relation, we just let build_simple_rel() initialize that flag correctly to + * start with. If that logic ever gets more complicated it would probably + * be better to move it here. + */ +static void +set_base_rel_consider_startup(PlannerInfo *root) +{ + /* + * Since parameterized paths can only be used on the inside of a nestloop + * join plan, there is usually little value in considering fast-start + * plans for them. However, for relations that are on the RHS of a SEMI + * or ANTI join, a fast-start plan can be useful because we're only going + * to care about fetching one tuple anyway. + * + * To minimize growth of planning time, we currently restrict this to + * cases where the RHS is a single base relation, not a join; there is no + * provision for consider_param_startup to get set at all on joinrels. + * Also we don't worry about appendrels. costsize.c's costing rules for + * nestloop semi/antijoins don't consider such cases either. + */ + ListCell *lc; + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + int varno; + + if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) && + bms_get_singleton_member(sjinfo->syn_righthand, &varno)) + { + RelOptInfo *rel = find_base_rel(root, varno); + + rel->consider_param_startup = true; + } + } +} + +/* + * set_base_rel_sizes + * Set the size estimates (rows and widths) for each base-relation entry. + * Also determine whether to consider parallel paths for base relations. + * + * We do this in a separate pass over the base rels so that rowcount + * estimates are available for parameterized path generation, and also so + * that each rel's consider_parallel flag is set correctly before we begin to + * generate paths. + */ +static void +set_base_rel_sizes(PlannerInfo *root) +{ + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + RangeTblEntry *rte; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + rte = root->simple_rte_array[rti]; + + /* + * If parallelism is allowable for this query in general, see whether + * it's allowable for this rel in particular. We have to do this + * before set_rel_size(), because (a) if this rel is an inheritance + * parent, set_append_rel_size() will use and perhaps change the rel's + * consider_parallel flag, and (b) for some RTE types, set_rel_size() + * goes ahead and makes paths immediately. + */ + if (root->glob->parallelModeOK) + set_rel_consider_parallel(root, rel, rte); + + set_rel_size(root, rel, rti, rte); + } +} + +/* + * set_base_rel_pathlists + * Finds all paths available for scanning each base-relation entry. + * Sequential scan and any available indices are considered. + * Each useful path is attached to its relation's 'pathlist' field. + */ +static void +set_base_rel_pathlists(PlannerInfo *root) +{ + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]); + } +} + +/* + * set_rel_size + * Set size estimates for a base relation + */ +static void +set_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + if (rel->reloptkind == RELOPT_BASEREL && + relation_excluded_by_constraints(root, rel, rte)) + { + /* + * We proved we don't need to scan the rel via constraint exclusion, + * so set up a single dummy path for it. Here we only check this for + * regular baserels; if it's an otherrel, CE was already checked in + * set_append_rel_size(). + * + * In this case, we go ahead and set up the relation's path right away + * instead of leaving it for set_rel_pathlist to do. This is because + * we don't have a convention for marking a rel as dummy except by + * assigning a dummy path to it. + */ + set_dummy_rel_pathlist(rel); + } + else if (rte->inh) + { + /* It's an "append relation", process accordingly */ + set_append_rel_size(root, rel, rti, rte); + } + else + { + switch (rel->rtekind) + { + case RTE_RELATION: + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + /* Foreign table */ + set_foreign_size(root, rel, rte); + } + else if (rte->relkind == RELKIND_PARTITIONED_TABLE) + { + /* + * We could get here if asked to scan a partitioned table + * with ONLY. In that case we shouldn't scan any of the + * partitions, so mark it as a dummy rel. + */ + set_dummy_rel_pathlist(rel); + } + else if (rte->tablesample != NULL) + { + /* Sampled relation */ + set_tablesample_rel_size(root, rel, rte); + } + else + { + /* Plain relation */ + set_plain_rel_size(root, rel, rte); + } + break; + case RTE_SUBQUERY: + + /* + * Subqueries don't support making a choice between + * parameterized and unparameterized paths, so just go ahead + * and build their paths immediately. + */ + set_subquery_pathlist(root, rel, rti, rte); + break; + case RTE_FUNCTION: + set_function_size_estimates(root, rel); + break; + case RTE_TABLEFUNC: + set_tablefunc_size_estimates(root, rel); + break; + case RTE_VALUES: + set_values_size_estimates(root, rel); + break; + case RTE_CTE: + + /* + * CTEs don't support making a choice between parameterized + * and unparameterized paths, so just go ahead and build their + * paths immediately. + */ + if (rte->self_reference) + set_worktable_pathlist(root, rel, rte); + else + set_cte_pathlist(root, rel, rte); + break; + case RTE_NAMEDTUPLESTORE: + /* Might as well just build the path immediately */ + set_namedtuplestore_pathlist(root, rel, rte); + break; + case RTE_RESULT: + /* Might as well just build the path immediately */ + set_result_pathlist(root, rel, rte); + break; + default: + elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); + break; + } + } + + /* + * We insist that all non-dummy rels have a nonzero rowcount estimate. + */ + Assert(rel->rows > 0 || IS_DUMMY_REL(rel)); +} + +/* + * set_rel_pathlist + * Build access paths for a base relation + */ +static void +set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + if (IS_DUMMY_REL(rel)) + { + /* We already proved the relation empty, so nothing more to do */ + } + else if (rte->inh) + { + /* It's an "append relation", process accordingly */ + set_append_rel_pathlist(root, rel, rti, rte); + } + else + { + switch (rel->rtekind) + { + case RTE_RELATION: + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + /* Foreign table */ + set_foreign_pathlist(root, rel, rte); + } + else if (rte->tablesample != NULL) + { + /* Sampled relation */ + set_tablesample_rel_pathlist(root, rel, rte); + } + else + { + /* Plain relation */ + set_plain_rel_pathlist(root, rel, rte); + } + break; + case RTE_SUBQUERY: + /* Subquery --- fully handled during set_rel_size */ + break; + case RTE_FUNCTION: + /* RangeFunction */ + set_function_pathlist(root, rel, rte); + break; + case RTE_TABLEFUNC: + /* Table Function */ + set_tablefunc_pathlist(root, rel, rte); + break; + case RTE_VALUES: + /* Values list */ + set_values_pathlist(root, rel, rte); + break; + case RTE_CTE: + /* CTE reference --- fully handled during set_rel_size */ + break; + case RTE_NAMEDTUPLESTORE: + /* tuplestore reference --- fully handled during set_rel_size */ + break; + case RTE_RESULT: + /* simple Result --- fully handled during set_rel_size */ + break; + default: + elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); + break; + } + } + + /* + * Allow a plugin to editorialize on the set of Paths for this base + * relation. It could add new paths (such as CustomPaths) by calling + * add_path(), or add_partial_path() if parallel aware. It could also + * delete or modify paths added by the core code. + */ + if (set_rel_pathlist_hook) + (*set_rel_pathlist_hook) (root, rel, rti, rte); + + /* + * If this is a baserel, we should normally consider gathering any partial + * paths we may have created for it. We have to do this after calling the + * set_rel_pathlist_hook, else it cannot add partial paths to be included + * here. + * + * However, if this is an inheritance child, skip it. Otherwise, we could + * end up with a very large number of gather nodes, each trying to grab + * its own pool of workers. Instead, we'll consider gathering partial + * paths for the parent appendrel. + * + * Also, if this is the topmost scan/join rel (that is, the only baserel), + * we postpone gathering until the final scan/join targetlist is available + * (see grouping_planner). + */ + if (rel->reloptkind == RELOPT_BASEREL && + bms_membership(root->all_baserels) != BMS_SINGLETON) + generate_useful_gather_paths(root, rel, false); + + /* Now find the cheapest of the paths for this rel */ + set_cheapest(rel); + +#ifdef OPTIMIZER_DEBUG + debug_print_rel(root, rel); +#endif +} + +/* + * set_plain_rel_size + * Set size estimates for a plain relation (no subquery, no inheritance) + */ +static void +set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + /* + * Test any partial indexes of rel for applicability. We must do this + * first since partial unique indexes can affect size estimates. + */ + check_index_predicates(root, rel); + + /* Mark rel with estimated output rows, width, etc */ + set_baserel_size_estimates(root, rel); +} + +/* + * If this relation could possibly be scanned from within a worker, then set + * its consider_parallel flag. + */ +static void +set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte) +{ + /* + * The flag has previously been initialized to false, so we can just + * return if it becomes clear that we can't safely set it. + */ + Assert(!rel->consider_parallel); + + /* Don't call this if parallelism is disallowed for the entire query. */ + Assert(root->glob->parallelModeOK); + + /* This should only be called for baserels and appendrel children. */ + Assert(IS_SIMPLE_REL(rel)); + + /* Assorted checks based on rtekind. */ + switch (rte->rtekind) + { + case RTE_RELATION: + + /* + * Currently, parallel workers can't access the leader's temporary + * tables. We could possibly relax this if we wrote all of its + * local buffers at the start of the query and made no changes + * thereafter (maybe we could allow hint bit changes), and if we + * taught the workers to read them. Writing a large number of + * temporary buffers could be expensive, though, and we don't have + * the rest of the necessary infrastructure right now anyway. So + * for now, bail out if we see a temporary table. + */ + if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP) + return; + + /* + * Table sampling can be pushed down to workers if the sample + * function and its arguments are safe. + */ + if (rte->tablesample != NULL) + { + char proparallel = func_parallel(rte->tablesample->tsmhandler); + + if (proparallel != PROPARALLEL_SAFE) + return; + if (!is_parallel_safe(root, (Node *) rte->tablesample->args)) + return; + } + + /* + * Ask FDWs whether they can support performing a ForeignScan + * within a worker. Most often, the answer will be no. For + * example, if the nature of the FDW is such that it opens a TCP + * connection with a remote server, each parallel worker would end + * up with a separate connection, and these connections might not + * be appropriately coordinated between workers and the leader. + */ + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + Assert(rel->fdwroutine); + if (!rel->fdwroutine->IsForeignScanParallelSafe) + return; + if (!rel->fdwroutine->IsForeignScanParallelSafe(root, rel, rte)) + return; + } + + /* + * There are additional considerations for appendrels, which we'll + * deal with in set_append_rel_size and set_append_rel_pathlist. + * For now, just set consider_parallel based on the rel's own + * quals and targetlist. + */ + break; + + case RTE_SUBQUERY: + + /* + * There's no intrinsic problem with scanning a subquery-in-FROM + * (as distinct from a SubPlan or InitPlan) in a parallel worker. + * If the subquery doesn't happen to have any parallel-safe paths, + * then flagging it as consider_parallel won't change anything, + * but that's true for plain tables, too. We must set + * consider_parallel based on the rel's own quals and targetlist, + * so that if a subquery path is parallel-safe but the quals and + * projection we're sticking onto it are not, we correctly mark + * the SubqueryScanPath as not parallel-safe. (Note that + * set_subquery_pathlist() might push some of these quals down + * into the subquery itself, but that doesn't change anything.) + * + * We can't push sub-select containing LIMIT/OFFSET to workers as + * there is no guarantee that the row order will be fully + * deterministic, and applying LIMIT/OFFSET will lead to + * inconsistent results at the top-level. (In some cases, where + * the result is ordered, we could relax this restriction. But it + * doesn't currently seem worth expending extra effort to do so.) + */ + { + Query *subquery = castNode(Query, rte->subquery); + + if (limit_needed(subquery)) + return; + } + break; + + case RTE_JOIN: + /* Shouldn't happen; we're only considering baserels here. */ + Assert(false); + return; + + case RTE_FUNCTION: + /* Check for parallel-restricted functions. */ + if (!is_parallel_safe(root, (Node *) rte->functions)) + return; + break; + + case RTE_TABLEFUNC: + /* not parallel safe */ + return; + + case RTE_VALUES: + /* Check for parallel-restricted functions. */ + if (!is_parallel_safe(root, (Node *) rte->values_lists)) + return; + break; + + case RTE_CTE: + + /* + * CTE tuplestores aren't shared among parallel workers, so we + * force all CTE scans to happen in the leader. Also, populating + * the CTE would require executing a subplan that's not available + * in the worker, might be parallel-restricted, and must get + * executed only once. + */ + return; + + case RTE_NAMEDTUPLESTORE: + + /* + * tuplestore cannot be shared, at least without more + * infrastructure to support that. + */ + return; + + case RTE_RESULT: + /* RESULT RTEs, in themselves, are no problem. */ + break; + } + + /* + * If there's anything in baserestrictinfo that's parallel-restricted, we + * give up on parallelizing access to this relation. We could consider + * instead postponing application of the restricted quals until we're + * above all the parallelism in the plan tree, but it's not clear that + * that would be a win in very many cases, and it might be tricky to make + * outer join clauses work correctly. It would likely break equivalence + * classes, too. + */ + if (!is_parallel_safe(root, (Node *) rel->baserestrictinfo)) + return; + + /* + * Likewise, if the relation's outputs are not parallel-safe, give up. + * (Usually, they're just Vars, but sometimes they're not.) + */ + if (!is_parallel_safe(root, (Node *) rel->reltarget->exprs)) + return; + + /* We have a winner. */ + rel->consider_parallel = true; +} + +/* + * set_plain_rel_pathlist + * Build access paths for a plain relation (no subquery, no inheritance) + */ +static void +set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + + /* + * We don't support pushing join clauses into the quals of a seqscan, but + * it could still have required parameterization due to LATERAL refs in + * its tlist. + */ + required_outer = rel->lateral_relids; + + /* Consider sequential scan */ + add_path(rel, create_seqscan_path(root, rel, required_outer, 0)); + + /* If appropriate, consider parallel sequential scan */ + if (rel->consider_parallel && required_outer == NULL) + create_plain_partial_paths(root, rel); + + /* Consider index scans */ + create_index_paths(root, rel); + + /* Consider TID scans */ + create_tidscan_paths(root, rel); +} + +/* + * create_plain_partial_paths + * Build partial access paths for parallel scan of a plain relation + */ +static void +create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel) +{ + int parallel_workers; + + parallel_workers = compute_parallel_worker(rel, rel->pages, -1, + max_parallel_workers_per_gather); + + /* If any limit was set to zero, the user doesn't want a parallel scan. */ + if (parallel_workers <= 0) + return; + + /* Add an unordered partial path based on a parallel sequential scan. */ + add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers)); +} + +/* + * set_tablesample_rel_size + * Set size estimates for a sampled relation + */ +static void +set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + TableSampleClause *tsc = rte->tablesample; + TsmRoutine *tsm; + BlockNumber pages; + double tuples; + + /* + * Test any partial indexes of rel for applicability. We must do this + * first since partial unique indexes can affect size estimates. + */ + check_index_predicates(root, rel); + + /* + * Call the sampling method's estimation function to estimate the number + * of pages it will read and the number of tuples it will return. (Note: + * we assume the function returns sane values.) + */ + tsm = GetTsmRoutine(tsc->tsmhandler); + tsm->SampleScanGetSampleSize(root, rel, tsc->args, + &pages, &tuples); + + /* + * For the moment, because we will only consider a SampleScan path for the + * rel, it's okay to just overwrite the pages and tuples estimates for the + * whole relation. If we ever consider multiple path types for sampled + * rels, we'll need more complication. + */ + rel->pages = pages; + rel->tuples = tuples; + + /* Mark rel with estimated output rows, width, etc */ + set_baserel_size_estimates(root, rel); +} + +/* + * set_tablesample_rel_pathlist + * Build access paths for a sampled relation + */ +static void +set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + Path *path; + + /* + * We don't support pushing join clauses into the quals of a samplescan, + * but it could still have required parameterization due to LATERAL refs + * in its tlist or TABLESAMPLE arguments. + */ + required_outer = rel->lateral_relids; + + /* Consider sampled scan */ + path = create_samplescan_path(root, rel, required_outer); + + /* + * If the sampling method does not support repeatable scans, we must avoid + * plans that would scan the rel multiple times. Ideally, we'd simply + * avoid putting the rel on the inside of a nestloop join; but adding such + * a consideration to the planner seems like a great deal of complication + * to support an uncommon usage of second-rate sampling methods. Instead, + * if there is a risk that the query might perform an unsafe join, just + * wrap the SampleScan in a Materialize node. We can check for joins by + * counting the membership of all_baserels (note that this correctly + * counts inheritance trees as single rels). If we're inside a subquery, + * we can't easily check whether a join might occur in the outer query, so + * just assume one is possible. + * + * GetTsmRoutine is relatively expensive compared to the other tests here, + * so check repeatable_across_scans last, even though that's a bit odd. + */ + if ((root->query_level > 1 || + bms_membership(root->all_baserels) != BMS_SINGLETON) && + !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans)) + { + path = (Path *) create_material_path(rel, path); + } + + add_path(rel, path); + + /* For the moment, at least, there are no other paths to consider */ +} + +/* + * set_foreign_size + * Set size estimates for a foreign table RTE + */ +static void +set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + /* Mark rel with estimated output rows, width, etc */ + set_foreign_size_estimates(root, rel); + + /* Let FDW adjust the size estimates, if it can */ + rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid); + + /* ... but do not let it set the rows estimate to zero */ + rel->rows = clamp_row_est(rel->rows); + + /* + * Also, make sure rel->tuples is not insane relative to rel->rows. + * Notably, this ensures sanity if pg_class.reltuples contains -1 and the + * FDW doesn't do anything to replace that. + */ + rel->tuples = Max(rel->tuples, rel->rows); +} + +/* + * set_foreign_pathlist + * Build access paths for a foreign table RTE + */ +static void +set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + /* Call the FDW's GetForeignPaths function to generate path(s) */ + rel->fdwroutine->GetForeignPaths(root, rel, rte->relid); +} + +/* + * set_append_rel_size + * Set size estimates for a simple "append relation" + * + * The passed-in rel and RTE represent the entire append relation. The + * relation's contents are computed by appending together the output of the + * individual member relations. Note that in the non-partitioned inheritance + * case, the first member relation is actually the same table as is mentioned + * in the parent RTE ... but it has a different RTE and RelOptInfo. This is + * a good thing because their outputs are not the same size. + */ +static void +set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + int parentRTindex = rti; + bool has_live_children; + double parent_rows; + double parent_size; + double *parent_attrsizes; + int nattrs; + ListCell *l; + + /* Guard against stack overflow due to overly deep inheritance tree. */ + check_stack_depth(); + + Assert(IS_SIMPLE_REL(rel)); + + /* + * If this is a partitioned baserel, set the consider_partitionwise_join + * flag; currently, we only consider partitionwise joins with the baserel + * if its targetlist doesn't contain a whole-row Var. + */ + if (enable_partitionwise_join && + rel->reloptkind == RELOPT_BASEREL && + rte->relkind == RELKIND_PARTITIONED_TABLE && + rel->attr_needed[InvalidAttrNumber - rel->min_attr] == NULL) + rel->consider_partitionwise_join = true; + + /* + * Initialize to compute size estimates for whole append relation. + * + * We handle width estimates by weighting the widths of different child + * rels proportionally to their number of rows. This is sensible because + * the use of width estimates is mainly to compute the total relation + * "footprint" if we have to sort or hash it. To do this, we sum the + * total equivalent size (in "double" arithmetic) and then divide by the + * total rowcount estimate. This is done separately for the total rel + * width and each attribute. + * + * Note: if you consider changing this logic, beware that child rels could + * have zero rows and/or width, if they were excluded by constraints. + */ + has_live_children = false; + parent_rows = 0; + parent_size = 0; + nattrs = rel->max_attr - rel->min_attr + 1; + parent_attrsizes = (double *) palloc0(nattrs * sizeof(double)); + + foreach(l, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + int childRTindex; + RangeTblEntry *childRTE; + RelOptInfo *childrel; + ListCell *parentvars; + ListCell *childvars; + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != parentRTindex) + continue; + + childRTindex = appinfo->child_relid; + childRTE = root->simple_rte_array[childRTindex]; + + /* + * The child rel's RelOptInfo was already created during + * add_other_rels_to_query. + */ + childrel = find_base_rel(root, childRTindex); + Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); + + /* We may have already proven the child to be dummy. */ + if (IS_DUMMY_REL(childrel)) + continue; + + /* + * We have to copy the parent's targetlist and quals to the child, + * with appropriate substitution of variables. However, the + * baserestrictinfo quals were already copied/substituted when the + * child RelOptInfo was built. So we don't need any additional setup + * before applying constraint exclusion. + */ + if (relation_excluded_by_constraints(root, childrel, childRTE)) + { + /* + * This child need not be scanned, so we can omit it from the + * appendrel. + */ + set_dummy_rel_pathlist(childrel); + continue; + } + + /* + * Constraint exclusion failed, so copy the parent's join quals and + * targetlist to the child, with appropriate variable substitutions. + * + * NB: the resulting childrel->reltarget->exprs may contain arbitrary + * expressions, which otherwise would not occur in a rel's targetlist. + * Code that might be looking at an appendrel child must cope with + * such. (Normally, a rel's targetlist would only include Vars and + * PlaceHolderVars.) XXX we do not bother to update the cost or width + * fields of childrel->reltarget; not clear if that would be useful. + */ + childrel->joininfo = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->joininfo, + 1, &appinfo); + childrel->reltarget->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->reltarget->exprs, + 1, &appinfo); + + /* + * We have to make child entries in the EquivalenceClass data + * structures as well. This is needed either if the parent + * participates in some eclass joins (because we will want to consider + * inner-indexscan joins on the individual children) or if the parent + * has useful pathkeys (because we should try to build MergeAppend + * paths that produce those sort orderings). + */ + if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) + add_child_rel_equivalences(root, appinfo, rel, childrel); + childrel->has_eclass_joins = rel->has_eclass_joins; + + /* + * Note: we could compute appropriate attr_needed data for the child's + * variables, by transforming the parent's attr_needed through the + * translated_vars mapping. However, currently there's no need + * because attr_needed is only examined for base relations not + * otherrels. So we just leave the child's attr_needed empty. + */ + + /* + * If we consider partitionwise joins with the parent rel, do the same + * for partitioned child rels. + * + * Note: here we abuse the consider_partitionwise_join flag by setting + * it for child rels that are not themselves partitioned. We do so to + * tell try_partitionwise_join() that the child rel is sufficiently + * valid to be used as a per-partition input, even if it later gets + * proven to be dummy. (It's not usable until we've set up the + * reltarget and EC entries, which we just did.) + */ + if (rel->consider_partitionwise_join) + childrel->consider_partitionwise_join = true; + + /* + * If parallelism is allowable for this query in general, see whether + * it's allowable for this childrel in particular. But if we've + * already decided the appendrel is not parallel-safe as a whole, + * there's no point in considering parallelism for this child. For + * consistency, do this before calling set_rel_size() for the child. + */ + if (root->glob->parallelModeOK && rel->consider_parallel) + set_rel_consider_parallel(root, childrel, childRTE); + + /* + * Compute the child's size. + */ + set_rel_size(root, childrel, childRTindex, childRTE); + + /* + * It is possible that constraint exclusion detected a contradiction + * within a child subquery, even though we didn't prove one above. If + * so, we can skip this child. + */ + if (IS_DUMMY_REL(childrel)) + continue; + + /* We have at least one live child. */ + has_live_children = true; + + /* + * If any live child is not parallel-safe, treat the whole appendrel + * as not parallel-safe. In future we might be able to generate plans + * in which some children are farmed out to workers while others are + * not; but we don't have that today, so it's a waste to consider + * partial paths anywhere in the appendrel unless it's all safe. + * (Child rels visited before this one will be unmarked in + * set_append_rel_pathlist().) + */ + if (!childrel->consider_parallel) + rel->consider_parallel = false; + + /* + * Accumulate size information from each live child. + */ + Assert(childrel->rows > 0); + + parent_rows += childrel->rows; + parent_size += childrel->reltarget->width * childrel->rows; + + /* + * Accumulate per-column estimates too. We need not do anything for + * PlaceHolderVars in the parent list. If child expression isn't a + * Var, or we didn't record a width estimate for it, we have to fall + * back on a datatype-based estimate. + * + * By construction, child's targetlist is 1-to-1 with parent's. + */ + forboth(parentvars, rel->reltarget->exprs, + childvars, childrel->reltarget->exprs) + { + Var *parentvar = (Var *) lfirst(parentvars); + Node *childvar = (Node *) lfirst(childvars); + + if (IsA(parentvar, Var) && parentvar->varno == parentRTindex) + { + int pndx = parentvar->varattno - rel->min_attr; + int32 child_width = 0; + + if (IsA(childvar, Var) && + ((Var *) childvar)->varno == childrel->relid) + { + int cndx = ((Var *) childvar)->varattno - childrel->min_attr; + + child_width = childrel->attr_widths[cndx]; + } + if (child_width <= 0) + child_width = get_typavgwidth(exprType(childvar), + exprTypmod(childvar)); + Assert(child_width > 0); + parent_attrsizes[pndx] += child_width * childrel->rows; + } + } + } + + if (has_live_children) + { + /* + * Save the finished size estimates. + */ + int i; + + Assert(parent_rows > 0); + rel->rows = parent_rows; + rel->reltarget->width = rint(parent_size / parent_rows); + for (i = 0; i < nattrs; i++) + rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows); + + /* + * Set "raw tuples" count equal to "rows" for the appendrel; needed + * because some places assume rel->tuples is valid for any baserel. + */ + rel->tuples = parent_rows; + + /* + * Note that we leave rel->pages as zero; this is important to avoid + * double-counting the appendrel tree in total_table_pages. + */ + } + else + { + /* + * All children were excluded by constraints, so mark the whole + * appendrel dummy. We must do this in this phase so that the rel's + * dummy-ness is visible when we generate paths for other rels. + */ + set_dummy_rel_pathlist(rel); + } + + pfree(parent_attrsizes); +} + +/* + * set_append_rel_pathlist + * Build access paths for an "append relation" + */ +static void +set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + int parentRTindex = rti; + List *live_childrels = NIL; + ListCell *l; + + /* + * Generate access paths for each member relation, and remember the + * non-dummy children. + */ + foreach(l, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + int childRTindex; + RangeTblEntry *childRTE; + RelOptInfo *childrel; + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != parentRTindex) + continue; + + /* Re-locate the child RTE and RelOptInfo */ + childRTindex = appinfo->child_relid; + childRTE = root->simple_rte_array[childRTindex]; + childrel = root->simple_rel_array[childRTindex]; + + /* + * If set_append_rel_size() decided the parent appendrel was + * parallel-unsafe at some point after visiting this child rel, we + * need to propagate the unsafety marking down to the child, so that + * we don't generate useless partial paths for it. + */ + if (!rel->consider_parallel) + childrel->consider_parallel = false; + + /* + * Compute the child's access paths. + */ + set_rel_pathlist(root, childrel, childRTindex, childRTE); + + /* + * If child is dummy, ignore it. + */ + if (IS_DUMMY_REL(childrel)) + continue; + + /* + * Child is live, so add it to the live_childrels list for use below. + */ + live_childrels = lappend(live_childrels, childrel); + } + + /* Add paths to the append relation. */ + add_paths_to_append_rel(root, rel, live_childrels); +} + + +/* + * add_paths_to_append_rel + * Generate paths for the given append relation given the set of non-dummy + * child rels. + * + * The function collects all parameterizations and orderings supported by the + * non-dummy children. For every such parameterization or ordering, it creates + * an append path collecting one path from each non-dummy child with given + * parameterization or ordering. Similarly it collects partial paths from + * non-dummy children to create partial append paths. + */ +void +add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels) +{ + List *subpaths = NIL; + bool subpaths_valid = true; + List *partial_subpaths = NIL; + List *pa_partial_subpaths = NIL; + List *pa_nonpartial_subpaths = NIL; + bool partial_subpaths_valid = true; + bool pa_subpaths_valid; + List *all_child_pathkeys = NIL; + List *all_child_outers = NIL; + ListCell *l; + double partial_rows = -1; + + /* If appropriate, consider parallel append */ + pa_subpaths_valid = enable_parallel_append && rel->consider_parallel; + + /* + * For every non-dummy child, remember the cheapest path. Also, identify + * all pathkeys (orderings) and parameterizations (required_outer sets) + * available for the non-dummy member relations. + */ + foreach(l, live_childrels) + { + RelOptInfo *childrel = lfirst(l); + ListCell *lcp; + Path *cheapest_partial_path = NULL; + + /* + * If child has an unparameterized cheapest-total path, add that to + * the unparameterized Append path we are constructing for the parent. + * If not, there's no workable unparameterized path. + * + * With partitionwise aggregates, the child rel's pathlist may be + * empty, so don't assume that a path exists here. + */ + if (childrel->pathlist != NIL && + childrel->cheapest_total_path->param_info == NULL) + accumulate_append_subpath(childrel->cheapest_total_path, + &subpaths, NULL); + else + subpaths_valid = false; + + /* Same idea, but for a partial plan. */ + if (childrel->partial_pathlist != NIL) + { + cheapest_partial_path = linitial(childrel->partial_pathlist); + accumulate_append_subpath(cheapest_partial_path, + &partial_subpaths, NULL); + } + else + partial_subpaths_valid = false; + + /* + * Same idea, but for a parallel append mixing partial and non-partial + * paths. + */ + if (pa_subpaths_valid) + { + Path *nppath = NULL; + + nppath = + get_cheapest_parallel_safe_total_inner(childrel->pathlist); + + if (cheapest_partial_path == NULL && nppath == NULL) + { + /* Neither a partial nor a parallel-safe path? Forget it. */ + pa_subpaths_valid = false; + } + else if (nppath == NULL || + (cheapest_partial_path != NULL && + cheapest_partial_path->total_cost < nppath->total_cost)) + { + /* Partial path is cheaper or the only option. */ + Assert(cheapest_partial_path != NULL); + accumulate_append_subpath(cheapest_partial_path, + &pa_partial_subpaths, + &pa_nonpartial_subpaths); + } + else + { + /* + * Either we've got only a non-partial path, or we think that + * a single backend can execute the best non-partial path + * faster than all the parallel backends working together can + * execute the best partial path. + * + * It might make sense to be more aggressive here. Even if + * the best non-partial path is more expensive than the best + * partial path, it could still be better to choose the + * non-partial path if there are several such paths that can + * be given to different workers. For now, we don't try to + * figure that out. + */ + accumulate_append_subpath(nppath, + &pa_nonpartial_subpaths, + NULL); + } + } + + /* + * Collect lists of all the available path orderings and + * parameterizations for all the children. We use these as a + * heuristic to indicate which sort orderings and parameterizations we + * should build Append and MergeAppend paths for. + */ + foreach(lcp, childrel->pathlist) + { + Path *childpath = (Path *) lfirst(lcp); + List *childkeys = childpath->pathkeys; + Relids childouter = PATH_REQ_OUTER(childpath); + + /* Unsorted paths don't contribute to pathkey list */ + if (childkeys != NIL) + { + ListCell *lpk; + bool found = false; + + /* Have we already seen this ordering? */ + foreach(lpk, all_child_pathkeys) + { + List *existing_pathkeys = (List *) lfirst(lpk); + + if (compare_pathkeys(existing_pathkeys, + childkeys) == PATHKEYS_EQUAL) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_pathkeys */ + all_child_pathkeys = lappend(all_child_pathkeys, + childkeys); + } + } + + /* Unparameterized paths don't contribute to param-set list */ + if (childouter) + { + ListCell *lco; + bool found = false; + + /* Have we already seen this param set? */ + foreach(lco, all_child_outers) + { + Relids existing_outers = (Relids) lfirst(lco); + + if (bms_equal(existing_outers, childouter)) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_outers */ + all_child_outers = lappend(all_child_outers, + childouter); + } + } + } + } + + /* + * If we found unparameterized paths for all children, build an unordered, + * unparameterized Append path for the rel. (Note: this is correct even + * if we have zero or one live subpath due to constraint exclusion.) + */ + if (subpaths_valid) + add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, + NIL, NULL, 0, false, + -1)); + + /* + * Consider an append of unordered, unparameterized partial paths. Make + * it parallel-aware if possible. + */ + if (partial_subpaths_valid && partial_subpaths != NIL) + { + AppendPath *appendpath; + ListCell *lc; + int parallel_workers = 0; + + /* Find the highest number of workers requested for any subpath. */ + foreach(lc, partial_subpaths) + { + Path *path = lfirst(lc); + + parallel_workers = Max(parallel_workers, path->parallel_workers); + } + Assert(parallel_workers > 0); + + /* + * If the use of parallel append is permitted, always request at least + * log2(# of children) workers. We assume it can be useful to have + * extra workers in this case because they will be spread out across + * the children. The precise formula is just a guess, but we don't + * want to end up with a radically different answer for a table with N + * partitions vs. an unpartitioned table with the same data, so the + * use of some kind of log-scaling here seems to make some sense. + */ + if (enable_parallel_append) + { + parallel_workers = Max(parallel_workers, + fls(list_length(live_childrels))); + parallel_workers = Min(parallel_workers, + max_parallel_workers_per_gather); + } + Assert(parallel_workers > 0); + + /* Generate a partial append path. */ + appendpath = create_append_path(root, rel, NIL, partial_subpaths, + NIL, NULL, parallel_workers, + enable_parallel_append, + -1); + + /* + * Make sure any subsequent partial paths use the same row count + * estimate. + */ + partial_rows = appendpath->path.rows; + + /* Add the path. */ + add_partial_path(rel, (Path *) appendpath); + } + + /* + * Consider a parallel-aware append using a mix of partial and non-partial + * paths. (This only makes sense if there's at least one child which has + * a non-partial path that is substantially cheaper than any partial path; + * otherwise, we should use the append path added in the previous step.) + */ + if (pa_subpaths_valid && pa_nonpartial_subpaths != NIL) + { + AppendPath *appendpath; + ListCell *lc; + int parallel_workers = 0; + + /* + * Find the highest number of workers requested for any partial + * subpath. + */ + foreach(lc, pa_partial_subpaths) + { + Path *path = lfirst(lc); + + parallel_workers = Max(parallel_workers, path->parallel_workers); + } + + /* + * Same formula here as above. It's even more important in this + * instance because the non-partial paths won't contribute anything to + * the planned number of parallel workers. + */ + parallel_workers = Max(parallel_workers, + fls(list_length(live_childrels))); + parallel_workers = Min(parallel_workers, + max_parallel_workers_per_gather); + Assert(parallel_workers > 0); + + appendpath = create_append_path(root, rel, pa_nonpartial_subpaths, + pa_partial_subpaths, + NIL, NULL, parallel_workers, true, + partial_rows); + add_partial_path(rel, (Path *) appendpath); + } + + /* + * Also build unparameterized ordered append paths based on the collected + * list of child pathkeys. + */ + if (subpaths_valid) + generate_orderedappend_paths(root, rel, live_childrels, + all_child_pathkeys); + + /* + * Build Append paths for each parameterization seen among the child rels. + * (This may look pretty expensive, but in most cases of practical + * interest, the child rels will expose mostly the same parameterizations, + * so that not that many cases actually get considered here.) + * + * The Append node itself cannot enforce quals, so all qual checking must + * be done in the child paths. This means that to have a parameterized + * Append path, we must have the exact same parameterization for each + * child path; otherwise some children might be failing to check the + * moved-down quals. To make them match up, we can try to increase the + * parameterization of lesser-parameterized paths. + */ + foreach(l, all_child_outers) + { + Relids required_outer = (Relids) lfirst(l); + ListCell *lcr; + + /* Select the child paths for an Append with this parameterization */ + subpaths = NIL; + subpaths_valid = true; + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *subpath; + + if (childrel->pathlist == NIL) + { + /* failed to make a suitable path for this child */ + subpaths_valid = false; + break; + } + + subpath = get_cheapest_parameterized_child_path(root, + childrel, + required_outer); + if (subpath == NULL) + { + /* failed to make a suitable path for this child */ + subpaths_valid = false; + break; + } + accumulate_append_subpath(subpath, &subpaths, NULL); + } + + if (subpaths_valid) + add_path(rel, (Path *) + create_append_path(root, rel, subpaths, NIL, + NIL, required_outer, 0, false, + -1)); + } + + /* + * When there is only a single child relation, the Append path can inherit + * any ordering available for the child rel's path, so that it's useful to + * consider ordered partial paths. Above we only considered the cheapest + * partial path for each child, but let's also make paths using any + * partial paths that have pathkeys. + */ + if (list_length(live_childrels) == 1) + { + RelOptInfo *childrel = (RelOptInfo *) linitial(live_childrels); + + /* skip the cheapest partial path, since we already used that above */ + for_each_from(l, childrel->partial_pathlist, 1) + { + Path *path = (Path *) lfirst(l); + AppendPath *appendpath; + + /* skip paths with no pathkeys. */ + if (path->pathkeys == NIL) + continue; + + appendpath = create_append_path(root, rel, NIL, list_make1(path), + NIL, NULL, + path->parallel_workers, true, + partial_rows); + add_partial_path(rel, (Path *) appendpath); + } + } +} + +/* + * generate_orderedappend_paths + * Generate ordered append paths for an append relation + * + * Usually we generate MergeAppend paths here, but there are some special + * cases where we can generate simple Append paths, because the subpaths + * can provide tuples in the required order already. + * + * We generate a path for each ordering (pathkey list) appearing in + * all_child_pathkeys. + * + * We consider both cheapest-startup and cheapest-total cases, ie, for each + * interesting ordering, collect all the cheapest startup subpaths and all the + * cheapest total paths, and build a suitable path for each case. + * + * We don't currently generate any parameterized ordered paths here. While + * it would not take much more code here to do so, it's very unclear that it + * is worth the planning cycles to investigate such paths: there's little + * use for an ordered path on the inside of a nestloop. In fact, it's likely + * that the current coding of add_path would reject such paths out of hand, + * because add_path gives no credit for sort ordering of parameterized paths, + * and a parameterized MergeAppend is going to be more expensive than the + * corresponding parameterized Append path. If we ever try harder to support + * parameterized mergejoin plans, it might be worth adding support for + * parameterized paths here to feed such joins. (See notes in + * optimizer/README for why that might not ever happen, though.) + */ +static void +generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels, + List *all_child_pathkeys) +{ + ListCell *lcp; + List *partition_pathkeys = NIL; + List *partition_pathkeys_desc = NIL; + bool partition_pathkeys_partial = true; + bool partition_pathkeys_desc_partial = true; + + /* + * Some partitioned table setups may allow us to use an Append node + * instead of a MergeAppend. This is possible in cases such as RANGE + * partitioned tables where it's guaranteed that an earlier partition must + * contain rows which come earlier in the sort order. To detect whether + * this is relevant, build pathkey descriptions of the partition ordering, + * for both forward and reverse scans. + */ + if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) && + partitions_are_ordered(rel->boundinfo, rel->nparts)) + { + partition_pathkeys = build_partition_pathkeys(root, rel, + ForwardScanDirection, + &partition_pathkeys_partial); + + partition_pathkeys_desc = build_partition_pathkeys(root, rel, + BackwardScanDirection, + &partition_pathkeys_desc_partial); + + /* + * You might think we should truncate_useless_pathkeys here, but + * allowing partition keys which are a subset of the query's pathkeys + * can often be useful. For example, consider a table partitioned by + * RANGE (a, b), and a query with ORDER BY a, b, c. If we have child + * paths that can produce the a, b, c ordering (perhaps via indexes on + * (a, b, c)) then it works to consider the appendrel output as + * ordered by a, b, c. + */ + } + + /* Now consider each interesting sort ordering */ + foreach(lcp, all_child_pathkeys) + { + List *pathkeys = (List *) lfirst(lcp); + List *startup_subpaths = NIL; + List *total_subpaths = NIL; + bool startup_neq_total = false; + ListCell *lcr; + bool match_partition_order; + bool match_partition_order_desc; + + /* + * Determine if this sort ordering matches any partition pathkeys we + * have, for both ascending and descending partition order. If the + * partition pathkeys happen to be contained in pathkeys then it still + * works, as described above, providing that the partition pathkeys + * are complete and not just a prefix of the partition keys. (In such + * cases we'll be relying on the child paths to have sorted the + * lower-order columns of the required pathkeys.) + */ + match_partition_order = + pathkeys_contained_in(pathkeys, partition_pathkeys) || + (!partition_pathkeys_partial && + pathkeys_contained_in(partition_pathkeys, pathkeys)); + + match_partition_order_desc = !match_partition_order && + (pathkeys_contained_in(pathkeys, partition_pathkeys_desc) || + (!partition_pathkeys_desc_partial && + pathkeys_contained_in(partition_pathkeys_desc, pathkeys))); + + /* Select the child paths for this ordering... */ + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *cheapest_startup, + *cheapest_total; + + /* Locate the right paths, if they are available. */ + cheapest_startup = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + NULL, + STARTUP_COST, + false); + cheapest_total = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + NULL, + TOTAL_COST, + false); + + /* + * If we can't find any paths with the right order just use the + * cheapest-total path; we'll have to sort it later. + */ + if (cheapest_startup == NULL || cheapest_total == NULL) + { + cheapest_startup = cheapest_total = + childrel->cheapest_total_path; + /* Assert we do have an unparameterized path for this child */ + Assert(cheapest_total->param_info == NULL); + } + + /* + * Notice whether we actually have different paths for the + * "cheapest" and "total" cases; frequently there will be no point + * in two create_merge_append_path() calls. + */ + if (cheapest_startup != cheapest_total) + startup_neq_total = true; + + /* + * Collect the appropriate child paths. The required logic varies + * for the Append and MergeAppend cases. + */ + if (match_partition_order) + { + /* + * We're going to make a plain Append path. We don't need + * most of what accumulate_append_subpath would do, but we do + * want to cut out child Appends or MergeAppends if they have + * just a single subpath (and hence aren't doing anything + * useful). + */ + cheapest_startup = get_singleton_append_subpath(cheapest_startup); + cheapest_total = get_singleton_append_subpath(cheapest_total); + + startup_subpaths = lappend(startup_subpaths, cheapest_startup); + total_subpaths = lappend(total_subpaths, cheapest_total); + } + else if (match_partition_order_desc) + { + /* + * As above, but we need to reverse the order of the children, + * because nodeAppend.c doesn't know anything about reverse + * ordering and will scan the children in the order presented. + */ + cheapest_startup = get_singleton_append_subpath(cheapest_startup); + cheapest_total = get_singleton_append_subpath(cheapest_total); + + startup_subpaths = lcons(cheapest_startup, startup_subpaths); + total_subpaths = lcons(cheapest_total, total_subpaths); + } + else + { + /* + * Otherwise, rely on accumulate_append_subpath to collect the + * child paths for the MergeAppend. + */ + accumulate_append_subpath(cheapest_startup, + &startup_subpaths, NULL); + accumulate_append_subpath(cheapest_total, + &total_subpaths, NULL); + } + } + + /* ... and build the Append or MergeAppend paths */ + if (match_partition_order || match_partition_order_desc) + { + /* We only need Append */ + add_path(rel, (Path *) create_append_path(root, + rel, + startup_subpaths, + NIL, + pathkeys, + NULL, + 0, + false, + -1)); + if (startup_neq_total) + add_path(rel, (Path *) create_append_path(root, + rel, + total_subpaths, + NIL, + pathkeys, + NULL, + 0, + false, + -1)); + } + else + { + /* We need MergeAppend */ + add_path(rel, (Path *) create_merge_append_path(root, + rel, + startup_subpaths, + pathkeys, + NULL)); + if (startup_neq_total) + add_path(rel, (Path *) create_merge_append_path(root, + rel, + total_subpaths, + pathkeys, + NULL)); + } + } +} + +/* + * get_cheapest_parameterized_child_path + * Get cheapest path for this relation that has exactly the requested + * parameterization. + * + * Returns NULL if unable to create such a path. + */ +static Path * +get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) +{ + Path *cheapest; + ListCell *lc; + + /* + * Look up the cheapest existing path with no more than the needed + * parameterization. If it has exactly the needed parameterization, we're + * done. + */ + cheapest = get_cheapest_path_for_pathkeys(rel->pathlist, + NIL, + required_outer, + TOTAL_COST, + false); + Assert(cheapest != NULL); + if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer)) + return cheapest; + + /* + * Otherwise, we can "reparameterize" an existing path to match the given + * parameterization, which effectively means pushing down additional + * joinquals to be checked within the path's scan. However, some existing + * paths might check the available joinquals already while others don't; + * therefore, it's not clear which existing path will be cheapest after + * reparameterization. We have to go through them all and find out. + */ + cheapest = NULL; + foreach(lc, rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + /* Can't use it if it needs more than requested parameterization */ + if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer)) + continue; + + /* + * Reparameterization can only increase the path's cost, so if it's + * already more expensive than the current cheapest, forget it. + */ + if (cheapest != NULL && + compare_path_costs(cheapest, path, TOTAL_COST) <= 0) + continue; + + /* Reparameterize if needed, then recheck cost */ + if (!bms_equal(PATH_REQ_OUTER(path), required_outer)) + { + path = reparameterize_path(root, path, required_outer, 1.0); + if (path == NULL) + continue; /* failed to reparameterize this one */ + Assert(bms_equal(PATH_REQ_OUTER(path), required_outer)); + + if (cheapest != NULL && + compare_path_costs(cheapest, path, TOTAL_COST) <= 0) + continue; + } + + /* We have a new best path */ + cheapest = path; + } + + /* Return the best path, or NULL if we found no suitable candidate */ + return cheapest; +} + +/* + * accumulate_append_subpath + * Add a subpath to the list being built for an Append or MergeAppend. + * + * It's possible that the child is itself an Append or MergeAppend path, in + * which case we can "cut out the middleman" and just add its child paths to + * our own list. (We don't try to do this earlier because we need to apply + * both levels of transformation to the quals.) + * + * Note that if we omit a child MergeAppend in this way, we are effectively + * omitting a sort step, which seems fine: if the parent is to be an Append, + * its result would be unsorted anyway, while if the parent is to be a + * MergeAppend, there's no point in a separate sort on a child. + * + * Normally, either path is a partial path and subpaths is a list of partial + * paths, or else path is a non-partial plan and subpaths is a list of those. + * However, if path is a parallel-aware Append, then we add its partial path + * children to subpaths and the rest to special_subpaths. If the latter is + * NULL, we don't flatten the path at all (unless it contains only partial + * paths). + */ +static void +accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths) +{ + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *) path; + + if (!apath->path.parallel_aware || apath->first_partial_path == 0) + { + *subpaths = list_concat(*subpaths, apath->subpaths); + return; + } + else if (special_subpaths != NULL) + { + List *new_special_subpaths; + + /* Split Parallel Append into partial and non-partial subpaths */ + *subpaths = list_concat(*subpaths, + list_copy_tail(apath->subpaths, + apath->first_partial_path)); + new_special_subpaths = + list_truncate(list_copy(apath->subpaths), + apath->first_partial_path); + *special_subpaths = list_concat(*special_subpaths, + new_special_subpaths); + return; + } + } + else if (IsA(path, MergeAppendPath)) + { + MergeAppendPath *mpath = (MergeAppendPath *) path; + + *subpaths = list_concat(*subpaths, mpath->subpaths); + return; + } + + *subpaths = lappend(*subpaths, path); +} + +/* + * get_singleton_append_subpath + * Returns the single subpath of an Append/MergeAppend, or just + * return 'path' if it's not a single sub-path Append/MergeAppend. + * + * Note: 'path' must not be a parallel-aware path. + */ +static Path * +get_singleton_append_subpath(Path *path) +{ + Assert(!path->parallel_aware); + + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *) path; + + if (list_length(apath->subpaths) == 1) + return (Path *) linitial(apath->subpaths); + } + else if (IsA(path, MergeAppendPath)) + { + MergeAppendPath *mpath = (MergeAppendPath *) path; + + if (list_length(mpath->subpaths) == 1) + return (Path *) linitial(mpath->subpaths); + } + + return path; +} + +/* + * set_dummy_rel_pathlist + * Build a dummy path for a relation that's been excluded by constraints + * + * Rather than inventing a special "dummy" path type, we represent this as an + * AppendPath with no members (see also IS_DUMMY_APPEND/IS_DUMMY_REL macros). + * + * (See also mark_dummy_rel, which does basically the same thing, but is + * typically used to change a rel into dummy state after we already made + * paths for it.) + */ +static void +set_dummy_rel_pathlist(RelOptInfo *rel) +{ + /* Set dummy size estimates --- we leave attr_widths[] as zeroes */ + rel->rows = 0; + rel->reltarget->width = 0; + + /* Discard any pre-existing paths; no further need for them */ + rel->pathlist = NIL; + rel->partial_pathlist = NIL; + + /* Set up the dummy path */ + add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, + NIL, rel->lateral_relids, + 0, false, -1)); + + /* + * We set the cheapest-path fields immediately, just in case they were + * pointing at some discarded path. This is redundant when we're called + * from set_rel_size(), but not when called from elsewhere, and doing it + * twice is harmless anyway. + */ + set_cheapest(rel); +} + +/* quick-and-dirty test to see if any joining is needed */ +static bool +has_multiple_baserels(PlannerInfo *root) +{ + int num_base_rels = 0; + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + if (brel == NULL) + continue; + + /* ignore RTEs that are "other rels" */ + if (brel->reloptkind == RELOPT_BASEREL) + if (++num_base_rels > 1) + return true; + } + return false; +} + +/* + * set_subquery_pathlist + * Generate SubqueryScan access paths for a subquery RTE + * + * We don't currently support generating parameterized paths for subqueries + * by pushing join clauses down into them; it seems too expensive to re-plan + * the subquery multiple times to consider different alternatives. + * (XXX that could stand to be reconsidered, now that we use Paths.) + * So the paths made here will be parameterized if the subquery contains + * LATERAL references, otherwise not. As long as that's true, there's no need + * for a separate set_subquery_size phase: just make the paths right away. + */ +static void +set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + Query *parse = root->parse; + Query *subquery = rte->subquery; + Relids required_outer; + pushdown_safety_info safetyInfo; + double tuple_fraction; + RelOptInfo *sub_final_rel; + ListCell *lc; + + /* + * Must copy the Query so that planning doesn't mess up the RTE contents + * (really really need to fix the planner to not scribble on its input, + * someday ... but see remove_unused_subquery_outputs to start with). + */ + subquery = copyObject(subquery); + + /* + * If it's a LATERAL subquery, it might contain some Vars of the current + * query level, requiring it to be treated as parameterized, even though + * we don't support pushing down join quals into subqueries. + */ + required_outer = rel->lateral_relids; + + /* + * Zero out result area for subquery_is_pushdown_safe, so that it can set + * flags as needed while recursing. In particular, we need a workspace + * for keeping track of unsafe-to-reference columns. unsafeColumns[i] + * will be set true if we find that output column i of the subquery is + * unsafe to use in a pushed-down qual. + */ + memset(&safetyInfo, 0, sizeof(safetyInfo)); + safetyInfo.unsafeColumns = (bool *) + palloc0((list_length(subquery->targetList) + 1) * sizeof(bool)); + + /* + * If the subquery has the "security_barrier" flag, it means the subquery + * originated from a view that must enforce row-level security. Then we + * must not push down quals that contain leaky functions. (Ideally this + * would be checked inside subquery_is_pushdown_safe, but since we don't + * currently pass the RTE to that function, we must do it here.) + */ + safetyInfo.unsafeLeaky = rte->security_barrier; + + /* + * If there are any restriction clauses that have been attached to the + * subquery relation, consider pushing them down to become WHERE or HAVING + * quals of the subquery itself. This transformation is useful because it + * may allow us to generate a better plan for the subquery than evaluating + * all the subquery output rows and then filtering them. + * + * There are several cases where we cannot push down clauses. Restrictions + * involving the subquery are checked by subquery_is_pushdown_safe(). + * Restrictions on individual clauses are checked by + * qual_is_pushdown_safe(). Also, we don't want to push down + * pseudoconstant clauses; better to have the gating node above the + * subquery. + * + * Non-pushed-down clauses will get evaluated as qpquals of the + * SubqueryScan node. + * + * XXX Are there any cases where we want to make a policy decision not to + * push down a pushable qual, because it'd result in a worse plan? + */ + if (rel->baserestrictinfo != NIL && + subquery_is_pushdown_safe(subquery, subquery, &safetyInfo)) + { + /* OK to consider pushing down individual quals */ + List *upperrestrictlist = NIL; + ListCell *l; + + foreach(l, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + + if (!rinfo->pseudoconstant && + qual_is_pushdown_safe(subquery, rti, rinfo, &safetyInfo)) + { + Node *clause = (Node *) rinfo->clause; + + /* Push it down */ + subquery_push_qual(subquery, rte, rti, clause); + } + else + { + /* Keep it in the upper query */ + upperrestrictlist = lappend(upperrestrictlist, rinfo); + } + } + rel->baserestrictinfo = upperrestrictlist; + /* We don't bother recomputing baserestrict_min_security */ + } + + pfree(safetyInfo.unsafeColumns); + + /* + * The upper query might not use all the subquery's output columns; if + * not, we can simplify. + */ + remove_unused_subquery_outputs(subquery, rel); + + /* + * We can safely pass the outer tuple_fraction down to the subquery if the + * outer level has no joining, aggregation, or sorting to do. Otherwise + * we'd better tell the subquery to plan for full retrieval. (XXX This + * could probably be made more intelligent ...) + */ + if (parse->hasAggs || + parse->groupClause || + parse->groupingSets || + parse->havingQual || + parse->distinctClause || + parse->sortClause || + has_multiple_baserels(root)) + tuple_fraction = 0.0; /* default case */ + else + tuple_fraction = root->tuple_fraction; + + /* plan_params should not be in use in current query level */ + Assert(root->plan_params == NIL); + + /* Generate a subroot and Paths for the subquery */ + rel->subroot = subquery_planner(root->glob, subquery, + root, + false, tuple_fraction); + + /* Isolate the params needed by this specific subplan */ + rel->subplan_params = root->plan_params; + root->plan_params = NIL; + + /* + * It's possible that constraint exclusion proved the subquery empty. If + * so, it's desirable to produce an unadorned dummy path so that we will + * recognize appropriate optimizations at this query level. + */ + sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL); + + if (IS_DUMMY_REL(sub_final_rel)) + { + set_dummy_rel_pathlist(rel); + return; + } + + /* + * Mark rel with estimated output rows, width, etc. Note that we have to + * do this before generating outer-query paths, else cost_subqueryscan is + * not happy. + */ + set_subquery_size_estimates(root, rel); + + /* + * For each Path that subquery_planner produced, make a SubqueryScanPath + * in the outer query. + */ + foreach(lc, sub_final_rel->pathlist) + { + Path *subpath = (Path *) lfirst(lc); + List *pathkeys; + + /* Convert subpath's pathkeys to outer representation */ + pathkeys = convert_subquery_pathkeys(root, + rel, + subpath->pathkeys, + make_tlist_from_pathtarget(subpath->pathtarget)); + + /* Generate outer path using this subpath */ + add_path(rel, (Path *) + create_subqueryscan_path(root, rel, subpath, + pathkeys, required_outer)); + } + + /* If outer rel allows parallelism, do same for partial paths. */ + if (rel->consider_parallel && bms_is_empty(required_outer)) + { + /* If consider_parallel is false, there should be no partial paths. */ + Assert(sub_final_rel->consider_parallel || + sub_final_rel->partial_pathlist == NIL); + + /* Same for partial paths. */ + foreach(lc, sub_final_rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + List *pathkeys; + + /* Convert subpath's pathkeys to outer representation */ + pathkeys = convert_subquery_pathkeys(root, + rel, + subpath->pathkeys, + make_tlist_from_pathtarget(subpath->pathtarget)); + + /* Generate outer path using this subpath */ + add_partial_path(rel, (Path *) + create_subqueryscan_path(root, rel, subpath, + pathkeys, + required_outer)); + } + } +} + +/* + * set_function_pathlist + * Build the (single) access path for a function RTE + */ +static void +set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + List *pathkeys = NIL; + + /* + * We don't support pushing join clauses into the quals of a function + * scan, but it could still have required parameterization due to LATERAL + * refs in the function expression. + */ + required_outer = rel->lateral_relids; + + /* + * The result is considered unordered unless ORDINALITY was used, in which + * case it is ordered by the ordinal column (the last one). See if we + * care, by checking for uses of that Var in equivalence classes. + */ + if (rte->funcordinality) + { + AttrNumber ordattno = rel->max_attr; + Var *var = NULL; + ListCell *lc; + + /* + * Is there a Var for it in rel's targetlist? If not, the query did + * not reference the ordinality column, or at least not in any way + * that would be interesting for sorting. + */ + foreach(lc, rel->reltarget->exprs) + { + Var *node = (Var *) lfirst(lc); + + /* checking varno/varlevelsup is just paranoia */ + if (IsA(node, Var) && + node->varattno == ordattno && + node->varno == rel->relid && + node->varlevelsup == 0) + { + var = node; + break; + } + } + + /* + * Try to build pathkeys for this Var with int8 sorting. We tell + * build_expression_pathkey not to build any new equivalence class; if + * the Var isn't already mentioned in some EC, it means that nothing + * cares about the ordering. + */ + if (var) + pathkeys = build_expression_pathkey(root, + (Expr *) var, + NULL, /* below outer joins */ + Int8LessOperator, + rel->relids, + false); + } + + /* Generate appropriate path */ + add_path(rel, create_functionscan_path(root, rel, + pathkeys, required_outer)); +} + +/* + * set_values_pathlist + * Build the (single) access path for a VALUES RTE + */ +static void +set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + + /* + * We don't support pushing join clauses into the quals of a values scan, + * but it could still have required parameterization due to LATERAL refs + * in the values expressions. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_valuesscan_path(root, rel, required_outer)); +} + +/* + * set_tablefunc_pathlist + * Build the (single) access path for a table func RTE + */ +static void +set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + + /* + * We don't support pushing join clauses into the quals of a tablefunc + * scan, but it could still have required parameterization due to LATERAL + * refs in the function expression. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_tablefuncscan_path(root, rel, + required_outer)); +} + +/* + * set_cte_pathlist + * Build the (single) access path for a non-self-reference CTE RTE + * + * There's no need for a separate set_cte_size phase, since we don't + * support join-qual-parameterized paths for CTEs. + */ +static void +set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Plan *cteplan; + PlannerInfo *cteroot; + Index levelsup; + int ndx; + ListCell *lc; + int plan_id; + Relids required_outer; + + /* + * Find the referenced CTE, and locate the plan previously made for it. + */ + levelsup = rte->ctelevelsup; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + + /* + * Note: cte_plan_ids can be shorter than cteList, if we are still working + * on planning the CTEs (ie, this is a side-reference from another CTE). + * So we mustn't use forboth here. + */ + ndx = 0; + foreach(lc, cteroot->parse->cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); + + if (strcmp(cte->ctename, rte->ctename) == 0) + break; + ndx++; + } + if (lc == NULL) /* shouldn't happen */ + elog(ERROR, "could not find CTE \"%s\"", rte->ctename); + if (ndx >= list_length(cteroot->cte_plan_ids)) + elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); + plan_id = list_nth_int(cteroot->cte_plan_ids, ndx); + if (plan_id <= 0) + elog(ERROR, "no plan was made for CTE \"%s\"", rte->ctename); + cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1); + + /* Mark rel with estimated output rows, width, etc */ + set_cte_size_estimates(root, rel, cteplan->plan_rows); + + /* + * We don't support pushing join clauses into the quals of a CTE scan, but + * it could still have required parameterization due to LATERAL refs in + * its tlist. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_ctescan_path(root, rel, required_outer)); +} + +/* + * set_namedtuplestore_pathlist + * Build the (single) access path for a named tuplestore RTE + * + * There's no need for a separate set_namedtuplestore_size phase, since we + * don't support join-qual-parameterized paths for tuplestores. + */ +static void +set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte) +{ + Relids required_outer; + + /* Mark rel with estimated output rows, width, etc */ + set_namedtuplestore_size_estimates(root, rel); + + /* + * We don't support pushing join clauses into the quals of a tuplestore + * scan, but it could still have required parameterization due to LATERAL + * refs in its tlist. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_namedtuplestorescan_path(root, rel, required_outer)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); +} + +/* + * set_result_pathlist + * Build the (single) access path for an RTE_RESULT RTE + * + * There's no need for a separate set_result_size phase, since we + * don't support join-qual-parameterized paths for these RTEs. + */ +static void +set_result_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte) +{ + Relids required_outer; + + /* Mark rel with estimated output rows, width, etc */ + set_result_size_estimates(root, rel); + + /* + * We don't support pushing join clauses into the quals of a Result scan, + * but it could still have required parameterization due to LATERAL refs + * in its tlist. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_resultscan_path(root, rel, required_outer)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); +} + +/* + * set_worktable_pathlist + * Build the (single) access path for a self-reference CTE RTE + * + * There's no need for a separate set_worktable_size phase, since we don't + * support join-qual-parameterized paths for CTEs. + */ +static void +set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Path *ctepath; + PlannerInfo *cteroot; + Index levelsup; + Relids required_outer; + + /* + * We need to find the non-recursive term's path, which is in the plan + * level that's processing the recursive UNION, which is one level *below* + * where the CTE comes from. + */ + levelsup = rte->ctelevelsup; + if (levelsup == 0) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + levelsup--; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + ctepath = cteroot->non_recursive_path; + if (!ctepath) /* shouldn't happen */ + elog(ERROR, "could not find path for CTE \"%s\"", rte->ctename); + + /* Mark rel with estimated output rows, width, etc */ + set_cte_size_estimates(root, rel, ctepath->rows); + + /* + * We don't support pushing join clauses into the quals of a worktable + * scan, but it could still have required parameterization due to LATERAL + * refs in its tlist. (I'm not sure this is actually possible given the + * restrictions on recursive references, but it's easy enough to support.) + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_worktablescan_path(root, rel, required_outer)); +} + +/* + * generate_gather_paths + * Generate parallel access paths for a relation by pushing a Gather or + * Gather Merge on top of a partial path. + * + * This must not be called until after we're done creating all partial paths + * for the specified relation. (Otherwise, add_partial_path might delete a + * path that some GatherPath or GatherMergePath has a reference to.) + * + * If we're generating paths for a scan or join relation, override_rows will + * be false, and we'll just use the relation's size estimate. When we're + * being called for a partially-grouped path, though, we need to override + * the rowcount estimate. (It's not clear that the particular value we're + * using here is actually best, but the underlying rel has no estimate so + * we must do something.) + */ +void +generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) +{ + Path *cheapest_partial_path; + Path *simple_gather_path; + ListCell *lc; + double rows; + double *rowsp = NULL; + + /* If there are no partial paths, there's nothing to do here. */ + if (rel->partial_pathlist == NIL) + return; + + /* Should we override the rel's rowcount estimate? */ + if (override_rows) + rowsp = &rows; + + /* + * The output of Gather is always unsorted, so there's only one partial + * path of interest: the cheapest one. That will be the one at the front + * of partial_pathlist because of the way add_partial_path works. + */ + cheapest_partial_path = linitial(rel->partial_pathlist); + rows = + cheapest_partial_path->rows * cheapest_partial_path->parallel_workers; + simple_gather_path = (Path *) + create_gather_path(root, rel, cheapest_partial_path, rel->reltarget, + NULL, rowsp); + add_path(rel, simple_gather_path); + + /* + * For each useful ordering, we can consider an order-preserving Gather + * Merge. + */ + foreach(lc, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + GatherMergePath *path; + + if (subpath->pathkeys == NIL) + continue; + + rows = subpath->rows * subpath->parallel_workers; + path = create_gather_merge_path(root, rel, subpath, rel->reltarget, + subpath->pathkeys, NULL, rowsp); + add_path(rel, &path->path); + } +} + +/* + * get_useful_pathkeys_for_relation + * Determine which orderings of a relation might be useful. + * + * Getting data in sorted order can be useful either because the requested + * order matches the final output ordering for the overall query we're + * planning, or because it enables an efficient merge join. Here, we try + * to figure out which pathkeys to consider. + * + * This allows us to do incremental sort on top of an index scan under a gather + * merge node, i.e. parallelized. + * + * If the require_parallel_safe is true, we also require the expressions to + * be parallel safe (which allows pushing the sort below Gather Merge). + * + * XXX At the moment this can only ever return a list with a single element, + * because it looks at query_pathkeys only. So we might return the pathkeys + * directly, but it seems plausible we'll want to consider other orderings + * in the future. For example, we might want to consider pathkeys useful for + * merge joins. + */ +static List * +get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel, + bool require_parallel_safe) +{ + List *useful_pathkeys_list = NIL; + + /* + * Considering query_pathkeys is always worth it, because it might allow + * us to avoid a total sort when we have a partially presorted path + * available or to push the total sort into the parallel portion of the + * query. + */ + if (root->query_pathkeys) + { + ListCell *lc; + int npathkeys = 0; /* useful pathkeys */ + + foreach(lc, root->query_pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + + /* + * We can only build a sort for pathkeys that contain a + * safe-to-compute-early EC member computable from the current + * relation's reltarget, so ignore the remainder of the list as + * soon as we find a pathkey without such a member. + * + * It's still worthwhile to return any prefix of the pathkeys list + * that meets this requirement, as we may be able to do an + * incremental sort. + * + * If requested, ensure the sort expression is parallel-safe too. + */ + if (!relation_can_be_sorted_early(root, rel, pathkey_ec, + require_parallel_safe)) + break; + + npathkeys++; + } + + /* + * The whole query_pathkeys list matches, so append it directly, to + * allow comparing pathkeys easily by comparing list pointer. If we + * have to truncate the pathkeys, we gotta do a copy though. + */ + if (npathkeys == list_length(root->query_pathkeys)) + useful_pathkeys_list = lappend(useful_pathkeys_list, + root->query_pathkeys); + else if (npathkeys > 0) + useful_pathkeys_list = lappend(useful_pathkeys_list, + list_truncate(list_copy(root->query_pathkeys), + npathkeys)); + } + + return useful_pathkeys_list; +} + +/* + * generate_useful_gather_paths + * Generate parallel access paths for a relation by pushing a Gather or + * Gather Merge on top of a partial path. + * + * Unlike plain generate_gather_paths, this looks both at pathkeys of input + * paths (aiming to preserve the ordering), but also considers ordering that + * might be useful for nodes above the gather merge node, and tries to add + * a sort (regular or incremental) to provide that. + */ +void +generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) +{ + ListCell *lc; + double rows; + double *rowsp = NULL; + List *useful_pathkeys_list = NIL; + Path *cheapest_partial_path = NULL; + + /* If there are no partial paths, there's nothing to do here. */ + if (rel->partial_pathlist == NIL) + return; + + /* Should we override the rel's rowcount estimate? */ + if (override_rows) + rowsp = &rows; + + /* generate the regular gather (merge) paths */ + generate_gather_paths(root, rel, override_rows); + + /* consider incremental sort for interesting orderings */ + useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel, true); + + /* used for explicit (full) sort paths */ + cheapest_partial_path = linitial(rel->partial_pathlist); + + /* + * Consider sorted paths for each interesting ordering. We generate both + * incremental and full sort. + */ + foreach(lc, useful_pathkeys_list) + { + List *useful_pathkeys = lfirst(lc); + ListCell *lc2; + bool is_sorted; + int presorted_keys; + + foreach(lc2, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc2); + GatherMergePath *path; + + is_sorted = pathkeys_count_contained_in(useful_pathkeys, + subpath->pathkeys, + &presorted_keys); + + /* + * We don't need to consider the case where a subpath is already + * fully sorted because generate_gather_paths already creates a + * gather merge path for every subpath that has pathkeys present. + * + * But since the subpath is already sorted, we know we don't need + * to consider adding a sort (other either kind) on top of it, so + * we can continue here. + */ + if (is_sorted) + continue; + + /* + * Consider regular sort for the cheapest partial path (for each + * useful pathkeys). We know the path is not sorted, because we'd + * not get here otherwise. + * + * This is not redundant with the gather paths created in + * generate_gather_paths, because that doesn't generate ordered + * output. Here we add an explicit sort to match the useful + * ordering. + */ + if (cheapest_partial_path == subpath) + { + Path *tmp; + + tmp = (Path *) create_sort_path(root, + rel, + subpath, + useful_pathkeys, + -1.0); + + rows = tmp->rows * tmp->parallel_workers; + + path = create_gather_merge_path(root, rel, + tmp, + rel->reltarget, + tmp->pathkeys, + NULL, + rowsp); + + add_path(rel, &path->path); + + /* Fall through */ + } + + /* + * Consider incremental sort, but only when the subpath is already + * partially sorted on a pathkey prefix. + */ + if (enable_incremental_sort && presorted_keys > 0) + { + Path *tmp; + + /* + * We should have already excluded pathkeys of length 1 + * because then presorted_keys > 0 would imply is_sorted was + * true. + */ + Assert(list_length(useful_pathkeys) != 1); + + tmp = (Path *) create_incremental_sort_path(root, + rel, + subpath, + useful_pathkeys, + presorted_keys, + -1); + + path = create_gather_merge_path(root, rel, + tmp, + rel->reltarget, + tmp->pathkeys, + NULL, + rowsp); + + add_path(rel, &path->path); + } + } + } +} + +/* + * make_rel_from_joinlist + * Build access paths using a "joinlist" to guide the join path search. + * + * See comments for deconstruct_jointree() for definition of the joinlist + * data structure. + */ +static RelOptInfo * +make_rel_from_joinlist(PlannerInfo *root, List *joinlist) +{ + int levels_needed; + List *initial_rels; + ListCell *jl; + + /* + * Count the number of child joinlist nodes. This is the depth of the + * dynamic-programming algorithm we must employ to consider all ways of + * joining the child nodes. + */ + levels_needed = list_length(joinlist); + + if (levels_needed <= 0) + return NULL; /* nothing to do? */ + + /* + * Construct a list of rels corresponding to the child joinlist nodes. + * This may contain both base rels and rels constructed according to + * sub-joinlists. + */ + initial_rels = NIL; + foreach(jl, joinlist) + { + Node *jlnode = (Node *) lfirst(jl); + RelOptInfo *thisrel; + + if (IsA(jlnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jlnode)->rtindex; + + thisrel = find_base_rel(root, varno); + } + else if (IsA(jlnode, List)) + { + /* Recurse to handle subproblem */ + thisrel = make_rel_from_joinlist(root, (List *) jlnode); + } + else + { + elog(ERROR, "unrecognized joinlist node type: %d", + (int) nodeTag(jlnode)); + thisrel = NULL; /* keep compiler quiet */ + } + + initial_rels = lappend(initial_rels, thisrel); + } + + if (levels_needed == 1) + { + /* + * Single joinlist node, so we're done. + */ + return (RelOptInfo *) linitial(initial_rels); + } + else + { + /* + * Consider the different orders in which we could join the rels, + * using a plugin, GEQO, or the regular join search code. + * + * We put the initial_rels list into a PlannerInfo field because + * has_legal_joinclause() needs to look at it (ugly :-(). + */ + root->initial_rels = initial_rels; + + if (join_search_hook) + return (*join_search_hook) (root, levels_needed, initial_rels); + else if (enable_geqo && levels_needed >= geqo_threshold) + return geqo(root, levels_needed, initial_rels); + else + return standard_join_search(root, levels_needed, initial_rels); + } +} + +/* + * standard_join_search + * Find possible joinpaths for a query by successively finding ways + * to join component relations into join relations. + * + * 'levels_needed' is the number of iterations needed, ie, the number of + * independent jointree items in the query. This is > 1. + * + * 'initial_rels' is a list of RelOptInfo nodes for each independent + * jointree item. These are the components to be joined together. + * Note that levels_needed == list_length(initial_rels). + * + * Returns the final level of join relations, i.e., the relation that is + * the result of joining all the original relations together. + * At least one implementation path must be provided for this relation and + * all required sub-relations. + * + * To support loadable plugins that modify planner behavior by changing the + * join searching algorithm, we provide a hook variable that lets a plugin + * replace or supplement this function. Any such hook must return the same + * final join relation as the standard code would, but it might have a + * different set of implementation paths attached, and only the sub-joinrels + * needed for these paths need have been instantiated. + * + * Note to plugin authors: the functions invoked during standard_join_search() + * modify root->join_rel_list and root->join_rel_hash. If you want to do more + * than one join-order search, you'll probably need to save and restore the + * original states of those data structures. See geqo_eval() for an example. + */ +RelOptInfo * +standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) +{ + int lev; + RelOptInfo *rel; + + /* + * This function cannot be invoked recursively within any one planning + * problem, so join_rel_level[] can't be in use already. + */ + Assert(root->join_rel_level == NULL); + + /* + * We employ a simple "dynamic programming" algorithm: we first find all + * ways to build joins of two jointree items, then all ways to build joins + * of three items (from two-item joins and single items), then four-item + * joins, and so on until we have considered all ways to join all the + * items into one rel. + * + * root->join_rel_level[j] is a list of all the j-item rels. Initially we + * set root->join_rel_level[1] to represent all the single-jointree-item + * relations. + */ + root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *)); + + root->join_rel_level[1] = initial_rels; + + for (lev = 2; lev <= levels_needed; lev++) + { + ListCell *lc; + + /* + * Determine all possible pairs of relations to be joined at this + * level, and build paths for making each one from every available + * pair of lower-level relations. + */ + join_search_one_level(root, lev); + + /* + * Run generate_partitionwise_join_paths() and + * generate_useful_gather_paths() for each just-processed joinrel. We + * could not do this earlier because both regular and partial paths + * can get added to a particular joinrel at multiple times within + * join_search_one_level. + * + * After that, we're done creating paths for the joinrel, so run + * set_cheapest(). + */ + foreach(lc, root->join_rel_level[lev]) + { + rel = (RelOptInfo *) lfirst(lc); + + /* Create paths for partitionwise joins. */ + generate_partitionwise_join_paths(root, rel); + + /* + * Except for the topmost scan/join rel, consider gathering + * partial paths. We'll do the same for the topmost scan/join rel + * once we know the final targetlist (see grouping_planner). + */ + if (lev < levels_needed) + generate_useful_gather_paths(root, rel, false); + + /* Find and save the cheapest paths for this rel */ + set_cheapest(rel); + +#ifdef OPTIMIZER_DEBUG + debug_print_rel(root, rel); +#endif + } + } + + /* + * We should have a single rel at the final level. + */ + if (root->join_rel_level[levels_needed] == NIL) + elog(ERROR, "failed to build any %d-way joins", levels_needed); + Assert(list_length(root->join_rel_level[levels_needed]) == 1); + + rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]); + + root->join_rel_level = NULL; + + return rel; +} + +/***************************************************************************** + * PUSHING QUALS DOWN INTO SUBQUERIES + *****************************************************************************/ + +/* + * subquery_is_pushdown_safe - is a subquery safe for pushing down quals? + * + * subquery is the particular component query being checked. topquery + * is the top component of a set-operations tree (the same Query if no + * set-op is involved). + * + * Conditions checked here: + * + * 1. If the subquery has a LIMIT clause, we must not push down any quals, + * since that could change the set of rows returned. + * + * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push + * quals into it, because that could change the results. + * + * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it. + * This is because upper-level quals should semantically be evaluated only + * once per distinct row, not once per original row, and if the qual is + * volatile then extra evaluations could change the results. (This issue + * does not apply to other forms of aggregation such as GROUP BY, because + * when those are present we push into HAVING not WHERE, so that the quals + * are still applied after aggregation.) + * + * 4. If the subquery contains window functions, we cannot push volatile quals + * into it. The issue here is a bit different from DISTINCT: a volatile qual + * might succeed for some rows of a window partition and fail for others, + * thereby changing the partition contents and thus the window functions' + * results for rows that remain. + * + * 5. If the subquery contains any set-returning functions in its targetlist, + * we cannot push volatile quals into it. That would push them below the SRFs + * and thereby change the number of times they are evaluated. Also, a + * volatile qual could succeed for some SRF output rows and fail for others, + * a behavior that cannot occur if it's evaluated before SRF expansion. + * + * 6. If the subquery has nonempty grouping sets, we cannot push down any + * quals. The concern here is that a qual referencing a "constant" grouping + * column could get constant-folded, which would be improper because the value + * is potentially nullable by grouping-set expansion. This restriction could + * be removed if we had a parsetree representation that shows that such + * grouping columns are not really constant. (There are other ideas that + * could be used to relax this restriction, but that's the approach most + * likely to get taken in the future. Note that there's not much to be gained + * so long as subquery_planner can't move HAVING clauses to WHERE within such + * a subquery.) + * + * In addition, we make several checks on the subquery's output columns to see + * if it is safe to reference them in pushed-down quals. If output column k + * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k] + * to true, but we don't reject the subquery overall since column k might not + * be referenced by some/all quals. The unsafeColumns[] array will be + * consulted later by qual_is_pushdown_safe(). It's better to do it this way + * than to make the checks directly in qual_is_pushdown_safe(), because when + * the subquery involves set operations we have to check the output + * expressions in each arm of the set op. + * + * Note: pushing quals into a DISTINCT subquery is theoretically dubious: + * we're effectively assuming that the quals cannot distinguish values that + * the DISTINCT's equality operator sees as equal, yet there are many + * counterexamples to that assumption. However use of such a qual with a + * DISTINCT subquery would be unsafe anyway, since there's no guarantee which + * "equal" value will be chosen as the output value by the DISTINCT operation. + * So we don't worry too much about that. Another objection is that if the + * qual is expensive to evaluate, running it for each original row might cost + * more than we save by eliminating rows before the DISTINCT step. But it + * would be very hard to estimate that at this stage, and in practice pushdown + * seldom seems to make things worse, so we ignore that problem too. + * + * Note: likewise, pushing quals into a subquery with window functions is a + * bit dubious: the quals might remove some rows of a window partition while + * leaving others, causing changes in the window functions' results for the + * surviving rows. We insist that such a qual reference only partitioning + * columns, but again that only protects us if the qual does not distinguish + * values that the partitioning equality operator sees as equal. The risks + * here are perhaps larger than for DISTINCT, since no de-duplication of rows + * occurs and thus there is no theoretical problem with such a qual. But + * we'll do this anyway because the potential performance benefits are very + * large, and we've seen no field complaints about the longstanding comparable + * behavior with DISTINCT. + */ +static bool +subquery_is_pushdown_safe(Query *subquery, Query *topquery, + pushdown_safety_info *safetyInfo) +{ + SetOperationStmt *topop; + + /* Check point 1 */ + if (subquery->limitOffset != NULL || subquery->limitCount != NULL) + return false; + + /* Check point 6 */ + if (subquery->groupClause && subquery->groupingSets) + return false; + + /* Check points 3, 4, and 5 */ + if (subquery->distinctClause || + subquery->hasWindowFuncs || + subquery->hasTargetSRFs) + safetyInfo->unsafeVolatile = true; + + /* + * If we're at a leaf query, check for unsafe expressions in its target + * list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in + * setop trees have only simple Vars in their tlists, so no need to check + * them.) + */ + if (subquery->setOperations == NULL) + check_output_expressions(subquery, safetyInfo); + + /* Are we at top level, or looking at a setop component? */ + if (subquery == topquery) + { + /* Top level, so check any component queries */ + if (subquery->setOperations != NULL) + if (!recurse_pushdown_safe(subquery->setOperations, topquery, + safetyInfo)) + return false; + } + else + { + /* Setop component must not have more components (too weird) */ + if (subquery->setOperations != NULL) + return false; + /* Check whether setop component output types match top level */ + topop = castNode(SetOperationStmt, topquery->setOperations); + Assert(topop); + compare_tlist_datatypes(subquery->targetList, + topop->colTypes, + safetyInfo); + } + return true; +} + +/* + * Helper routine to recurse through setOperations tree + */ +static bool +recurse_pushdown_safe(Node *setOp, Query *topquery, + pushdown_safety_info *safetyInfo) +{ + if (IsA(setOp, RangeTblRef)) + { + RangeTblRef *rtr = (RangeTblRef *) setOp; + RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable); + Query *subquery = rte->subquery; + + Assert(subquery != NULL); + return subquery_is_pushdown_safe(subquery, topquery, safetyInfo); + } + else if (IsA(setOp, SetOperationStmt)) + { + SetOperationStmt *op = (SetOperationStmt *) setOp; + + /* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */ + if (op->op == SETOP_EXCEPT) + return false; + /* Else recurse */ + if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo)) + return false; + if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo)) + return false; + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(setOp)); + } + return true; +} + +/* + * check_output_expressions - check subquery's output expressions for safety + * + * There are several cases in which it's unsafe to push down an upper-level + * qual if it references a particular output column of a subquery. We check + * each output column of the subquery and set unsafeColumns[k] to true if + * that column is unsafe for a pushed-down qual to reference. The conditions + * checked here are: + * + * 1. We must not push down any quals that refer to subselect outputs that + * return sets, else we'd introduce functions-returning-sets into the + * subquery's WHERE/HAVING quals. + * + * 2. We must not push down any quals that refer to subselect outputs that + * contain volatile functions, for fear of introducing strange results due + * to multiple evaluation of a volatile function. + * + * 3. If the subquery uses DISTINCT ON, we must not push down any quals that + * refer to non-DISTINCT output columns, because that could change the set + * of rows returned. (This condition is vacuous for DISTINCT, because then + * there are no non-DISTINCT output columns, so we needn't check. Note that + * subquery_is_pushdown_safe already reported that we can't use volatile + * quals if there's DISTINCT or DISTINCT ON.) + * + * 4. If the subquery has any window functions, we must not push down quals + * that reference any output columns that are not listed in all the subquery's + * window PARTITION BY clauses. We can push down quals that use only + * partitioning columns because they should succeed or fail identically for + * every row of any one window partition, and totally excluding some + * partitions will not change a window function's results for remaining + * partitions. (Again, this also requires nonvolatile quals, but + * subquery_is_pushdown_safe handles that.) + */ +static void +check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo) +{ + ListCell *lc; + + foreach(lc, subquery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + + /* We need not check further if output col is already known unsafe */ + if (safetyInfo->unsafeColumns[tle->resno]) + continue; + + /* Functions returning sets are unsafe (point 1) */ + if (subquery->hasTargetSRFs && + expression_returns_set((Node *) tle->expr)) + { + safetyInfo->unsafeColumns[tle->resno] = true; + continue; + } + + /* Volatile functions are unsafe (point 2) */ + if (contain_volatile_functions((Node *) tle->expr)) + { + safetyInfo->unsafeColumns[tle->resno] = true; + continue; + } + + /* If subquery uses DISTINCT ON, check point 3 */ + if (subquery->hasDistinctOn && + !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) + { + /* non-DISTINCT column, so mark it unsafe */ + safetyInfo->unsafeColumns[tle->resno] = true; + continue; + } + + /* If subquery uses window functions, check point 4 */ + if (subquery->hasWindowFuncs && + !targetIsInAllPartitionLists(tle, subquery)) + { + /* not present in all PARTITION BY clauses, so mark it unsafe */ + safetyInfo->unsafeColumns[tle->resno] = true; + continue; + } + } +} + +/* + * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can + * push quals into each component query, but the quals can only reference + * subquery columns that suffer no type coercions in the set operation. + * Otherwise there are possible semantic gotchas. So, we check the + * component queries to see if any of them have output types different from + * the top-level setop outputs. unsafeColumns[k] is set true if column k + * has different type in any component. + * + * We don't have to care about typmods here: the only allowed difference + * between set-op input and output typmods is input is a specific typmod + * and output is -1, and that does not require a coercion. + * + * tlist is a subquery tlist. + * colTypes is an OID list of the top-level setop's output column types. + * safetyInfo->unsafeColumns[] is the result array. + */ +static void +compare_tlist_datatypes(List *tlist, List *colTypes, + pushdown_safety_info *safetyInfo) +{ + ListCell *l; + ListCell *colType = list_head(colTypes); + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + if (colType == NULL) + elog(ERROR, "wrong number of tlist entries"); + if (exprType((Node *) tle->expr) != lfirst_oid(colType)) + safetyInfo->unsafeColumns[tle->resno] = true; + colType = lnext(colTypes, colType); + } + if (colType != NULL) + elog(ERROR, "wrong number of tlist entries"); +} + +/* + * targetIsInAllPartitionLists + * True if the TargetEntry is listed in the PARTITION BY clause + * of every window defined in the query. + * + * It would be safe to ignore windows not actually used by any window + * function, but it's not easy to get that info at this stage; and it's + * unlikely to be useful to spend any extra cycles getting it, since + * unreferenced window definitions are probably infrequent in practice. + */ +static bool +targetIsInAllPartitionLists(TargetEntry *tle, Query *query) +{ + ListCell *lc; + + foreach(lc, query->windowClause) + { + WindowClause *wc = (WindowClause *) lfirst(lc); + + if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause)) + return false; + } + return true; +} + +/* + * qual_is_pushdown_safe - is a particular rinfo safe to push down? + * + * rinfo is a restriction clause applying to the given subquery (whose RTE + * has index rti in the parent query). + * + * Conditions checked here: + * + * 1. rinfo's clause must not contain any SubPlans (mainly because it's + * unclear that it will work correctly: SubLinks will already have been + * transformed into SubPlans in the qual, but not in the subquery). Note that + * SubLinks that transform to initplans are safe, and will be accepted here + * because what we'll see in the qual is just a Param referencing the initplan + * output. + * + * 2. If unsafeVolatile is set, rinfo's clause must not contain any volatile + * functions. + * + * 3. If unsafeLeaky is set, rinfo's clause must not contain any leaky + * functions that are passed Var nodes, and therefore might reveal values from + * the subquery as side effects. + * + * 4. rinfo's clause must not refer to the whole-row output of the subquery + * (since there is no easy way to name that within the subquery itself). + * + * 5. rinfo's clause must not refer to any subquery output columns that were + * found to be unsafe to reference by subquery_is_pushdown_safe(). + */ +static bool +qual_is_pushdown_safe(Query *subquery, Index rti, RestrictInfo *rinfo, + pushdown_safety_info *safetyInfo) +{ + bool safe = true; + Node *qual = (Node *) rinfo->clause; + List *vars; + ListCell *vl; + + /* Refuse subselects (point 1) */ + if (contain_subplans(qual)) + return false; + + /* Refuse volatile quals if we found they'd be unsafe (point 2) */ + if (safetyInfo->unsafeVolatile && + contain_volatile_functions((Node *) rinfo)) + return false; + + /* Refuse leaky quals if told to (point 3) */ + if (safetyInfo->unsafeLeaky && + contain_leaked_vars(qual)) + return false; + + /* + * It would be unsafe to push down window function calls, but at least for + * the moment we could never see any in a qual anyhow. (The same applies + * to aggregates, which we check for in pull_var_clause below.) + */ + Assert(!contain_window_function(qual)); + + /* + * Examine all Vars used in clause. Since it's a restriction clause, all + * such Vars must refer to subselect output columns ... unless this is + * part of a LATERAL subquery, in which case there could be lateral + * references. + */ + vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS); + foreach(vl, vars) + { + Var *var = (Var *) lfirst(vl); + + /* + * XXX Punt if we find any PlaceHolderVars in the restriction clause. + * It's not clear whether a PHV could safely be pushed down, and even + * less clear whether such a situation could arise in any cases of + * practical interest anyway. So for the moment, just refuse to push + * down. + */ + if (!IsA(var, Var)) + { + safe = false; + break; + } + + /* + * Punt if we find any lateral references. It would be safe to push + * these down, but we'd have to convert them into outer references, + * which subquery_push_qual lacks the infrastructure to do. The case + * arises so seldom that it doesn't seem worth working hard on. + */ + if (var->varno != rti) + { + safe = false; + break; + } + + /* Subqueries have no system columns */ + Assert(var->varattno >= 0); + + /* Check point 4 */ + if (var->varattno == 0) + { + safe = false; + break; + } + + /* Check point 5 */ + if (safetyInfo->unsafeColumns[var->varattno]) + { + safe = false; + break; + } + } + + list_free(vars); + + return safe; +} + +/* + * subquery_push_qual - push down a qual that we have determined is safe + */ +static void +subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) +{ + if (subquery->setOperations != NULL) + { + /* Recurse to push it separately to each component query */ + recurse_push_qual(subquery->setOperations, subquery, + rte, rti, qual); + } + else + { + /* + * We need to replace Vars in the qual (which must refer to outputs of + * the subquery) with copies of the subquery's targetlist expressions. + * Note that at this point, any uplevel Vars in the qual should have + * been replaced with Params, so they need no work. + * + * This step also ensures that when we are pushing into a setop tree, + * each component query gets its own copy of the qual. + */ + qual = ReplaceVarsFromTargetList(qual, rti, 0, rte, + subquery->targetList, + REPLACEVARS_REPORT_ERROR, 0, + &subquery->hasSubLinks); + + /* + * Now attach the qual to the proper place: normally WHERE, but if the + * subquery uses grouping or aggregation, put it in HAVING (since the + * qual really refers to the group-result rows). + */ + if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual) + subquery->havingQual = make_and_qual(subquery->havingQual, qual); + else + subquery->jointree->quals = + make_and_qual(subquery->jointree->quals, qual); + + /* + * We need not change the subquery's hasAggs or hasSubLinks flags, + * since we can't be pushing down any aggregates that weren't there + * before, and we don't push down subselects at all. + */ + } +} + +/* + * Helper routine to recurse through setOperations tree + */ +static void +recurse_push_qual(Node *setOp, Query *topquery, + RangeTblEntry *rte, Index rti, Node *qual) +{ + if (IsA(setOp, RangeTblRef)) + { + RangeTblRef *rtr = (RangeTblRef *) setOp; + RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable); + Query *subquery = subrte->subquery; + + Assert(subquery != NULL); + subquery_push_qual(subquery, rte, rti, qual); + } + else if (IsA(setOp, SetOperationStmt)) + { + SetOperationStmt *op = (SetOperationStmt *) setOp; + + recurse_push_qual(op->larg, topquery, rte, rti, qual); + recurse_push_qual(op->rarg, topquery, rte, rti, qual); + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(setOp)); + } +} + +/***************************************************************************** + * SIMPLIFYING SUBQUERY TARGETLISTS + *****************************************************************************/ + +/* + * remove_unused_subquery_outputs + * Remove subquery targetlist items we don't need + * + * It's possible, even likely, that the upper query does not read all the + * output columns of the subquery. We can remove any such outputs that are + * not needed by the subquery itself (e.g., as sort/group columns) and do not + * affect semantics otherwise (e.g., volatile functions can't be removed). + * This is useful not only because we might be able to remove expensive-to- + * compute expressions, but because deletion of output columns might allow + * optimizations such as join removal to occur within the subquery. + * + * To avoid affecting column numbering in the targetlist, we don't physically + * remove unused tlist entries, but rather replace their expressions with NULL + * constants. This is implemented by modifying subquery->targetList. + */ +static void +remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel) +{ + Bitmapset *attrs_used = NULL; + ListCell *lc; + + /* + * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we + * could update all the child SELECTs' tlists, but it seems not worth the + * trouble presently. + */ + if (subquery->setOperations) + return; + + /* + * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our + * time: all its output columns must be used in the distinctClause. + */ + if (subquery->distinctClause && !subquery->hasDistinctOn) + return; + + /* + * Collect a bitmap of all the output column numbers used by the upper + * query. + * + * Add all the attributes needed for joins or final output. Note: we must + * look at rel's targetlist, not the attr_needed data, because attr_needed + * isn't computed for inheritance child rels, cf set_append_rel_size(). + * (XXX might be worth changing that sometime.) + */ + pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &attrs_used); + + /* Add all the attributes used by un-pushed-down restriction clauses. */ + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used); + } + + /* + * If there's a whole-row reference to the subquery, we can't remove + * anything. + */ + if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used)) + return; + + /* + * Run through the tlist and zap entries we don't need. It's okay to + * modify the tlist items in-place because set_subquery_pathlist made a + * copy of the subquery. + */ + foreach(lc, subquery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + Node *texpr = (Node *) tle->expr; + + /* + * If it has a sortgroupref number, it's used in some sort/group + * clause so we'd better not remove it. Also, don't remove any + * resjunk columns, since their reason for being has nothing to do + * with anybody reading the subquery's output. (It's likely that + * resjunk columns in a sub-SELECT would always have ressortgroupref + * set, but even if they don't, it seems imprudent to remove them.) + */ + if (tle->ressortgroupref || tle->resjunk) + continue; + + /* + * If it's used by the upper query, we can't remove it. + */ + if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber, + attrs_used)) + continue; + + /* + * If it contains a set-returning function, we can't remove it since + * that could change the number of rows returned by the subquery. + */ + if (subquery->hasTargetSRFs && + expression_returns_set(texpr)) + continue; + + /* + * If it contains volatile functions, we daren't remove it for fear + * that the user is expecting their side-effects to happen. + */ + if (contain_volatile_functions(texpr)) + continue; + + /* + * OK, we don't need it. Replace the expression with a NULL constant. + * Preserve the exposed type of the expression, in case something + * looks at the rowtype of the subquery's result. + */ + tle->expr = (Expr *) makeNullConst(exprType(texpr), + exprTypmod(texpr), + exprCollation(texpr)); + } +} + +/* + * create_partial_bitmap_paths + * Build partial bitmap heap path for the relation + */ +void +create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, + Path *bitmapqual) +{ + int parallel_workers; + double pages_fetched; + + /* Compute heap pages for bitmap heap scan */ + pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0, + NULL, NULL); + + parallel_workers = compute_parallel_worker(rel, pages_fetched, -1, + max_parallel_workers_per_gather); + + if (parallel_workers <= 0) + return; + + add_partial_path(rel, (Path *) create_bitmap_heap_path(root, rel, + bitmapqual, rel->lateral_relids, 1.0, parallel_workers)); +} + +/* + * Compute the number of parallel workers that should be used to scan a + * relation. We compute the parallel workers based on the size of the heap to + * be scanned and the size of the index to be scanned, then choose a minimum + * of those. + * + * "heap_pages" is the number of pages from the table that we expect to scan, or + * -1 if we don't expect to scan any. + * + * "index_pages" is the number of pages from the index that we expect to scan, or + * -1 if we don't expect to scan any. + * + * "max_workers" is caller's limit on the number of workers. This typically + * comes from a GUC. + */ +int +compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages, + int max_workers) +{ + int parallel_workers = 0; + + /* + * If the user has set the parallel_workers reloption, use that; otherwise + * select a default number of workers. + */ + if (rel->rel_parallel_workers != -1) + parallel_workers = rel->rel_parallel_workers; + else + { + /* + * If the number of pages being scanned is insufficient to justify a + * parallel scan, just return zero ... unless it's an inheritance + * child. In that case, we want to generate a parallel path here + * anyway. It might not be worthwhile just for this relation, but + * when combined with all of its inheritance siblings it may well pay + * off. + */ + if (rel->reloptkind == RELOPT_BASEREL && + ((heap_pages >= 0 && heap_pages < min_parallel_table_scan_size) || + (index_pages >= 0 && index_pages < min_parallel_index_scan_size))) + return 0; + + if (heap_pages >= 0) + { + int heap_parallel_threshold; + int heap_parallel_workers = 1; + + /* + * Select the number of workers based on the log of the size of + * the relation. This probably needs to be a good deal more + * sophisticated, but we need something here for now. Note that + * the upper limit of the min_parallel_table_scan_size GUC is + * chosen to prevent overflow here. + */ + heap_parallel_threshold = Max(min_parallel_table_scan_size, 1); + while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3)) + { + heap_parallel_workers++; + heap_parallel_threshold *= 3; + if (heap_parallel_threshold > INT_MAX / 3) + break; /* avoid overflow */ + } + + parallel_workers = heap_parallel_workers; + } + + if (index_pages >= 0) + { + int index_parallel_workers = 1; + int index_parallel_threshold; + + /* same calculation as for heap_pages above */ + index_parallel_threshold = Max(min_parallel_index_scan_size, 1); + while (index_pages >= (BlockNumber) (index_parallel_threshold * 3)) + { + index_parallel_workers++; + index_parallel_threshold *= 3; + if (index_parallel_threshold > INT_MAX / 3) + break; /* avoid overflow */ + } + + if (parallel_workers > 0) + parallel_workers = Min(parallel_workers, index_parallel_workers); + else + parallel_workers = index_parallel_workers; + } + } + + /* In no case use more than caller supplied maximum number of workers */ + parallel_workers = Min(parallel_workers, max_workers); + + return parallel_workers; +} + +/* + * generate_partitionwise_join_paths + * Create paths representing partitionwise join for given partitioned + * join relation. + * + * This must not be called until after we are done adding paths for all + * child-joins. Otherwise, add_path might delete a path to which some path + * generated here has a reference. + */ +void +generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel) +{ + List *live_children = NIL; + int cnt_parts; + int num_parts; + RelOptInfo **part_rels; + + /* Handle only join relations here. */ + if (!IS_JOIN_REL(rel)) + return; + + /* We've nothing to do if the relation is not partitioned. */ + if (!IS_PARTITIONED_REL(rel)) + return; + + /* The relation should have consider_partitionwise_join set. */ + Assert(rel->consider_partitionwise_join); + + /* Guard against stack overflow due to overly deep partition hierarchy. */ + check_stack_depth(); + + num_parts = rel->nparts; + part_rels = rel->part_rels; + + /* Collect non-dummy child-joins. */ + for (cnt_parts = 0; cnt_parts < num_parts; cnt_parts++) + { + RelOptInfo *child_rel = part_rels[cnt_parts]; + + /* If it's been pruned entirely, it's certainly dummy. */ + if (child_rel == NULL) + continue; + + /* Add partitionwise join paths for partitioned child-joins. */ + generate_partitionwise_join_paths(root, child_rel); + + set_cheapest(child_rel); + + /* Dummy children will not be scanned, so ignore those. */ + if (IS_DUMMY_REL(child_rel)) + continue; + +#ifdef OPTIMIZER_DEBUG + debug_print_rel(root, child_rel); +#endif + + live_children = lappend(live_children, child_rel); + } + + /* If all child-joins are dummy, parent join is also dummy. */ + if (!live_children) + { + mark_dummy_rel(rel); + return; + } + + /* Build additional paths for this rel from child-join paths. */ + add_paths_to_append_rel(root, rel, live_children); + list_free(live_children); +} + + +/***************************************************************************** + * DEBUG SUPPORT + *****************************************************************************/ + +#ifdef OPTIMIZER_DEBUG + +static void +print_relids(PlannerInfo *root, Relids relids) +{ + int x; + bool first = true; + + x = -1; + while ((x = bms_next_member(relids, x)) >= 0) + { + if (!first) + printf(" "); + if (x < root->simple_rel_array_size && + root->simple_rte_array[x]) + printf("%s", root->simple_rte_array[x]->eref->aliasname); + else + printf("%d", x); + first = false; + } +} + +static void +print_restrictclauses(PlannerInfo *root, List *clauses) +{ + ListCell *l; + + foreach(l, clauses) + { + RestrictInfo *c = lfirst(l); + + print_expr((Node *) c->clause, root->parse->rtable); + if (lnext(clauses, l)) + printf(", "); + } +} + +static void +print_path(PlannerInfo *root, Path *path, int indent) +{ + const char *ptype; + bool join = false; + Path *subpath = NULL; + int i; + + switch (nodeTag(path)) + { + case T_Path: + switch (path->pathtype) + { + case T_SeqScan: + ptype = "SeqScan"; + break; + case T_SampleScan: + ptype = "SampleScan"; + break; + case T_FunctionScan: + ptype = "FunctionScan"; + break; + case T_TableFuncScan: + ptype = "TableFuncScan"; + break; + case T_ValuesScan: + ptype = "ValuesScan"; + break; + case T_CteScan: + ptype = "CteScan"; + break; + case T_NamedTuplestoreScan: + ptype = "NamedTuplestoreScan"; + break; + case T_Result: + ptype = "Result"; + break; + case T_WorkTableScan: + ptype = "WorkTableScan"; + break; + default: + ptype = "???Path"; + break; + } + break; + case T_IndexPath: + ptype = "IdxScan"; + break; + case T_BitmapHeapPath: + ptype = "BitmapHeapScan"; + break; + case T_BitmapAndPath: + ptype = "BitmapAndPath"; + break; + case T_BitmapOrPath: + ptype = "BitmapOrPath"; + break; + case T_TidPath: + ptype = "TidScan"; + break; + case T_SubqueryScanPath: + ptype = "SubqueryScan"; + break; + case T_ForeignPath: + ptype = "ForeignScan"; + break; + case T_CustomPath: + ptype = "CustomScan"; + break; + case T_NestPath: + ptype = "NestLoop"; + join = true; + break; + case T_MergePath: + ptype = "MergeJoin"; + join = true; + break; + case T_HashPath: + ptype = "HashJoin"; + join = true; + break; + case T_AppendPath: + ptype = "Append"; + break; + case T_MergeAppendPath: + ptype = "MergeAppend"; + break; + case T_GroupResultPath: + ptype = "GroupResult"; + break; + case T_MaterialPath: + ptype = "Material"; + subpath = ((MaterialPath *) path)->subpath; + break; + case T_MemoizePath: + ptype = "Memoize"; + subpath = ((MemoizePath *) path)->subpath; + break; + case T_UniquePath: + ptype = "Unique"; + subpath = ((UniquePath *) path)->subpath; + break; + case T_GatherPath: + ptype = "Gather"; + subpath = ((GatherPath *) path)->subpath; + break; + case T_GatherMergePath: + ptype = "GatherMerge"; + subpath = ((GatherMergePath *) path)->subpath; + break; + case T_ProjectionPath: + ptype = "Projection"; + subpath = ((ProjectionPath *) path)->subpath; + break; + case T_ProjectSetPath: + ptype = "ProjectSet"; + subpath = ((ProjectSetPath *) path)->subpath; + break; + case T_SortPath: + ptype = "Sort"; + subpath = ((SortPath *) path)->subpath; + break; + case T_IncrementalSortPath: + ptype = "IncrementalSort"; + subpath = ((SortPath *) path)->subpath; + break; + case T_GroupPath: + ptype = "Group"; + subpath = ((GroupPath *) path)->subpath; + break; + case T_UpperUniquePath: + ptype = "UpperUnique"; + subpath = ((UpperUniquePath *) path)->subpath; + break; + case T_AggPath: + ptype = "Agg"; + subpath = ((AggPath *) path)->subpath; + break; + case T_GroupingSetsPath: + ptype = "GroupingSets"; + subpath = ((GroupingSetsPath *) path)->subpath; + break; + case T_MinMaxAggPath: + ptype = "MinMaxAgg"; + break; + case T_WindowAggPath: + ptype = "WindowAgg"; + subpath = ((WindowAggPath *) path)->subpath; + break; + case T_SetOpPath: + ptype = "SetOp"; + subpath = ((SetOpPath *) path)->subpath; + break; + case T_RecursiveUnionPath: + ptype = "RecursiveUnion"; + break; + case T_LockRowsPath: + ptype = "LockRows"; + subpath = ((LockRowsPath *) path)->subpath; + break; + case T_ModifyTablePath: + ptype = "ModifyTable"; + break; + case T_LimitPath: + ptype = "Limit"; + subpath = ((LimitPath *) path)->subpath; + break; + default: + ptype = "???Path"; + break; + } + + for (i = 0; i < indent; i++) + printf("\t"); + printf("%s", ptype); + + if (path->parent) + { + printf("("); + print_relids(root, path->parent->relids); + printf(")"); + } + if (path->param_info) + { + printf(" required_outer ("); + print_relids(root, path->param_info->ppi_req_outer); + printf(")"); + } + printf(" rows=%.0f cost=%.2f..%.2f\n", + path->rows, path->startup_cost, path->total_cost); + + if (path->pathkeys) + { + for (i = 0; i < indent; i++) + printf("\t"); + printf(" pathkeys: "); + print_pathkeys(path->pathkeys, root->parse->rtable); + } + + if (join) + { + JoinPath *jp = (JoinPath *) path; + + for (i = 0; i < indent; i++) + printf("\t"); + printf(" clauses: "); + print_restrictclauses(root, jp->joinrestrictinfo); + printf("\n"); + + if (IsA(path, MergePath)) + { + MergePath *mp = (MergePath *) path; + + for (i = 0; i < indent; i++) + printf("\t"); + printf(" sortouter=%d sortinner=%d materializeinner=%d\n", + ((mp->outersortkeys) ? 1 : 0), + ((mp->innersortkeys) ? 1 : 0), + ((mp->materialize_inner) ? 1 : 0)); + } + + print_path(root, jp->outerjoinpath, indent + 1); + print_path(root, jp->innerjoinpath, indent + 1); + } + + if (subpath) + print_path(root, subpath, indent + 1); +} + +void +debug_print_rel(PlannerInfo *root, RelOptInfo *rel) +{ + ListCell *l; + + printf("RELOPTINFO ("); + print_relids(root, rel->relids); + printf("): rows=%.0f width=%d\n", rel->rows, rel->reltarget->width); + + if (rel->baserestrictinfo) + { + printf("\tbaserestrictinfo: "); + print_restrictclauses(root, rel->baserestrictinfo); + printf("\n"); + } + + if (rel->joininfo) + { + printf("\tjoininfo: "); + print_restrictclauses(root, rel->joininfo); + printf("\n"); + } + + printf("\tpath list:\n"); + foreach(l, rel->pathlist) + print_path(root, lfirst(l), 1); + if (rel->cheapest_parameterized_paths) + { + printf("\n\tcheapest parameterized paths:\n"); + foreach(l, rel->cheapest_parameterized_paths) + print_path(root, lfirst(l), 1); + } + if (rel->cheapest_startup_path) + { + printf("\n\tcheapest startup path:\n"); + print_path(root, rel->cheapest_startup_path, 1); + } + if (rel->cheapest_total_path) + { + printf("\n\tcheapest total path:\n"); + print_path(root, rel->cheapest_total_path, 1); + } + printf("\n"); + fflush(stdout); +} + +#endif /* OPTIMIZER_DEBUG */ diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c new file mode 100644 index 0000000..d263ecf --- /dev/null +++ b/src/backend/optimizer/path/clausesel.c @@ -0,0 +1,1000 @@ +/*------------------------------------------------------------------------- + * + * clausesel.c + * Routines to compute clause selectivities + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/path/clausesel.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/plancat.h" +#include "statistics/statistics.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/selfuncs.h" + +/* + * Data structure for accumulating info about possible range-query + * clause pairs in clauselist_selectivity. + */ +typedef struct RangeQueryClause +{ + struct RangeQueryClause *next; /* next in linked list */ + Node *var; /* The common variable of the clauses */ + bool have_lobound; /* found a low-bound clause yet? */ + bool have_hibound; /* found a high-bound clause yet? */ + Selectivity lobound; /* Selectivity of a var > something clause */ + Selectivity hibound; /* Selectivity of a var < something clause */ +} RangeQueryClause; + +static void addRangeClause(RangeQueryClause **rqlist, Node *clause, + bool varonleft, bool isLTsel, Selectivity s2); +static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root, + List *clauses); +static Selectivity clauselist_selectivity_or(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + bool use_extended_stats); + +/**************************************************************************** + * ROUTINES TO COMPUTE SELECTIVITIES + ****************************************************************************/ + +/* + * clauselist_selectivity - + * Compute the selectivity of an implicitly-ANDed list of boolean + * expression clauses. The list can be empty, in which case 1.0 + * must be returned. List elements may be either RestrictInfos + * or bare expression clauses --- the former is preferred since + * it allows caching of results. + * + * See clause_selectivity() for the meaning of the additional parameters. + * + * The basic approach is to apply extended statistics first, on as many + * clauses as possible, in order to capture cross-column dependencies etc. + * The remaining clauses are then estimated by taking the product of their + * selectivities, but that's only right if they have independent + * probabilities, and in reality they are often NOT independent even if they + * only refer to a single column. So, we want to be smarter where we can. + * + * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses + * are recognized as possible range query components if they are restriction + * opclauses whose operators have scalarltsel or a related function as their + * restriction selectivity estimator. We pair up clauses of this form that + * refer to the same variable. An unpairable clause of this kind is simply + * multiplied into the selectivity product in the normal way. But when we + * find a pair, we know that the selectivities represent the relative + * positions of the low and high bounds within the column's range, so instead + * of figuring the selectivity as hisel * losel, we can figure it as hisel + + * losel - 1. (To visualize this, see that hisel is the fraction of the range + * below the high bound, while losel is the fraction above the low bound; so + * hisel can be interpreted directly as a 0..1 value but we need to convert + * losel to 1-losel before interpreting it as a value. Then the available + * range is 1-losel to hisel. However, this calculation double-excludes + * nulls, so really we need hisel + losel + null_frac - 1.) + * + * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation + * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation + * yields an impossible (negative) result. + * + * A free side-effect is that we can recognize redundant inequalities such + * as "x < 4 AND x < 5"; only the tighter constraint will be counted. + * + * Of course this is all very dependent on the behavior of the inequality + * selectivity functions; perhaps some day we can generalize the approach. + */ +Selectivity +clauselist_selectivity(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo) +{ + return clauselist_selectivity_ext(root, clauses, varRelid, + jointype, sjinfo, true); +} + +/* + * clauselist_selectivity_ext - + * Extended version of clauselist_selectivity(). If "use_extended_stats" + * is false, all extended statistics will be ignored, and only per-column + * statistics will be used. + */ +Selectivity +clauselist_selectivity_ext(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + bool use_extended_stats) +{ + Selectivity s1 = 1.0; + RelOptInfo *rel; + Bitmapset *estimatedclauses = NULL; + RangeQueryClause *rqlist = NULL; + ListCell *l; + int listidx; + + /* + * If there's exactly one clause, just go directly to + * clause_selectivity_ext(). None of what we might do below is relevant. + */ + if (list_length(clauses) == 1) + return clause_selectivity_ext(root, (Node *) linitial(clauses), + varRelid, jointype, sjinfo, + use_extended_stats); + + /* + * Determine if these clauses reference a single relation. If so, and if + * it has extended statistics, try to apply those. + */ + rel = find_single_rel_for_clauses(root, clauses); + if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL) + { + /* + * Estimate as many clauses as possible using extended statistics. + * + * 'estimatedclauses' is populated with the 0-based list position + * index of clauses estimated here, and that should be ignored below. + */ + s1 = statext_clauselist_selectivity(root, clauses, varRelid, + jointype, sjinfo, rel, + &estimatedclauses, false); + } + + /* + * Apply normal selectivity estimates for remaining clauses. We'll be + * careful to skip any clauses which were already estimated above. + * + * Anything that doesn't look like a potential rangequery clause gets + * multiplied into s1 and forgotten. Anything that does gets inserted into + * an rqlist entry. + */ + listidx = -1; + foreach(l, clauses) + { + Node *clause = (Node *) lfirst(l); + RestrictInfo *rinfo; + Selectivity s2; + + listidx++; + + /* + * Skip this clause if it's already been estimated by some other + * statistics above. + */ + if (bms_is_member(listidx, estimatedclauses)) + continue; + + /* Compute the selectivity of this clause in isolation */ + s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo, + use_extended_stats); + + /* + * Check for being passed a RestrictInfo. + * + * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or + * 0.0; just use that rather than looking for range pairs. + */ + if (IsA(clause, RestrictInfo)) + { + rinfo = (RestrictInfo *) clause; + if (rinfo->pseudoconstant) + { + s1 = s1 * s2; + continue; + } + clause = (Node *) rinfo->clause; + } + else + rinfo = NULL; + + /* + * See if it looks like a restriction clause with a pseudoconstant on + * one side. (Anything more complicated than that might not behave in + * the simple way we are expecting.) Most of the tests here can be + * done more efficiently with rinfo than without. + */ + if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2) + { + OpExpr *expr = (OpExpr *) clause; + bool varonleft = true; + bool ok; + + if (rinfo) + { + ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) && + (is_pseudo_constant_clause_relids(lsecond(expr->args), + rinfo->right_relids) || + (varonleft = false, + is_pseudo_constant_clause_relids(linitial(expr->args), + rinfo->left_relids))); + } + else + { + ok = (NumRelids(root, clause) == 1) && + (is_pseudo_constant_clause(lsecond(expr->args)) || + (varonleft = false, + is_pseudo_constant_clause(linitial(expr->args)))); + } + + if (ok) + { + /* + * If it's not a "<"/"<="/">"/">=" operator, just merge the + * selectivity in generically. But if it's the right oprrest, + * add the clause to rqlist for later processing. + */ + switch (get_oprrest(expr->opno)) + { + case F_SCALARLTSEL: + case F_SCALARLESEL: + addRangeClause(&rqlist, clause, + varonleft, true, s2); + break; + case F_SCALARGTSEL: + case F_SCALARGESEL: + addRangeClause(&rqlist, clause, + varonleft, false, s2); + break; + default: + /* Just merge the selectivity in generically */ + s1 = s1 * s2; + break; + } + continue; /* drop to loop bottom */ + } + } + + /* Not the right form, so treat it generically. */ + s1 = s1 * s2; + } + + /* + * Now scan the rangequery pair list. + */ + while (rqlist != NULL) + { + RangeQueryClause *rqnext; + + if (rqlist->have_lobound && rqlist->have_hibound) + { + /* Successfully matched a pair of range clauses */ + Selectivity s2; + + /* + * Exact equality to the default value probably means the + * selectivity function punted. This is not airtight but should + * be good enough. + */ + if (rqlist->hibound == DEFAULT_INEQ_SEL || + rqlist->lobound == DEFAULT_INEQ_SEL) + { + s2 = DEFAULT_RANGE_INEQ_SEL; + } + else + { + s2 = rqlist->hibound + rqlist->lobound - 1.0; + + /* Adjust for double-exclusion of NULLs */ + s2 += nulltestsel(root, IS_NULL, rqlist->var, + varRelid, jointype, sjinfo); + + /* + * A zero or slightly negative s2 should be converted into a + * small positive value; we probably are dealing with a very + * tight range and got a bogus result due to roundoff errors. + * However, if s2 is very negative, then we probably have + * default selectivity estimates on one or both sides of the + * range that we failed to recognize above for some reason. + */ + if (s2 <= 0.0) + { + if (s2 < -0.01) + { + /* + * No data available --- use a default estimate that + * is small, but not real small. + */ + s2 = DEFAULT_RANGE_INEQ_SEL; + } + else + { + /* + * It's just roundoff error; use a small positive + * value + */ + s2 = 1.0e-10; + } + } + } + /* Merge in the selectivity of the pair of clauses */ + s1 *= s2; + } + else + { + /* Only found one of a pair, merge it in generically */ + if (rqlist->have_lobound) + s1 *= rqlist->lobound; + else + s1 *= rqlist->hibound; + } + /* release storage and advance */ + rqnext = rqlist->next; + pfree(rqlist); + rqlist = rqnext; + } + + return s1; +} + +/* + * clauselist_selectivity_or - + * Compute the selectivity of an implicitly-ORed list of boolean + * expression clauses. The list can be empty, in which case 0.0 + * must be returned. List elements may be either RestrictInfos + * or bare expression clauses --- the former is preferred since + * it allows caching of results. + * + * See clause_selectivity() for the meaning of the additional parameters. + * + * The basic approach is to apply extended statistics first, on as many + * clauses as possible, in order to capture cross-column dependencies etc. + * The remaining clauses are then estimated as if they were independent. + */ +static Selectivity +clauselist_selectivity_or(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + bool use_extended_stats) +{ + Selectivity s1 = 0.0; + RelOptInfo *rel; + Bitmapset *estimatedclauses = NULL; + ListCell *lc; + int listidx; + + /* + * Determine if these clauses reference a single relation. If so, and if + * it has extended statistics, try to apply those. + */ + rel = find_single_rel_for_clauses(root, clauses); + if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL) + { + /* + * Estimate as many clauses as possible using extended statistics. + * + * 'estimatedclauses' is populated with the 0-based list position + * index of clauses estimated here, and that should be ignored below. + */ + s1 = statext_clauselist_selectivity(root, clauses, varRelid, + jointype, sjinfo, rel, + &estimatedclauses, true); + } + + /* + * Estimate the remaining clauses as if they were independent. + * + * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account + * for the probable overlap of selected tuple sets. + * + * XXX is this too conservative? + */ + listidx = -1; + foreach(lc, clauses) + { + Selectivity s2; + + listidx++; + + /* + * Skip this clause if it's already been estimated by some other + * statistics above. + */ + if (bms_is_member(listidx, estimatedclauses)) + continue; + + s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid, + jointype, sjinfo, use_extended_stats); + + s1 = s1 + s2 - s1 * s2; + } + + return s1; +} + +/* + * addRangeClause --- add a new range clause for clauselist_selectivity + * + * Here is where we try to match up pairs of range-query clauses + */ +static void +addRangeClause(RangeQueryClause **rqlist, Node *clause, + bool varonleft, bool isLTsel, Selectivity s2) +{ + RangeQueryClause *rqelem; + Node *var; + bool is_lobound; + + if (varonleft) + { + var = get_leftop((Expr *) clause); + is_lobound = !isLTsel; /* x < something is high bound */ + } + else + { + var = get_rightop((Expr *) clause); + is_lobound = isLTsel; /* something < x is low bound */ + } + + for (rqelem = *rqlist; rqelem; rqelem = rqelem->next) + { + /* + * We use full equal() here because the "var" might be a function of + * one or more attributes of the same relation... + */ + if (!equal(var, rqelem->var)) + continue; + /* Found the right group to put this clause in */ + if (is_lobound) + { + if (!rqelem->have_lobound) + { + rqelem->have_lobound = true; + rqelem->lobound = s2; + } + else + { + + /*------ + * We have found two similar clauses, such as + * x < y AND x <= z. + * Keep only the more restrictive one. + *------ + */ + if (rqelem->lobound > s2) + rqelem->lobound = s2; + } + } + else + { + if (!rqelem->have_hibound) + { + rqelem->have_hibound = true; + rqelem->hibound = s2; + } + else + { + + /*------ + * We have found two similar clauses, such as + * x > y AND x >= z. + * Keep only the more restrictive one. + *------ + */ + if (rqelem->hibound > s2) + rqelem->hibound = s2; + } + } + return; + } + + /* No matching var found, so make a new clause-pair data structure */ + rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause)); + rqelem->var = var; + if (is_lobound) + { + rqelem->have_lobound = true; + rqelem->have_hibound = false; + rqelem->lobound = s2; + } + else + { + rqelem->have_lobound = false; + rqelem->have_hibound = true; + rqelem->hibound = s2; + } + rqelem->next = *rqlist; + *rqlist = rqelem; +} + +/* + * find_single_rel_for_clauses + * Examine each clause in 'clauses' and determine if all clauses + * reference only a single relation. If so return that relation, + * otherwise return NULL. + */ +static RelOptInfo * +find_single_rel_for_clauses(PlannerInfo *root, List *clauses) +{ + int lastrelid = 0; + ListCell *l; + + foreach(l, clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + int relid; + + /* + * If we have a list of bare clauses rather than RestrictInfos, we + * could pull out their relids the hard way with pull_varnos(). + * However, currently the extended-stats machinery won't do anything + * with non-RestrictInfo clauses anyway, so there's no point in + * spending extra cycles; just fail if that's what we have. + * + * An exception to that rule is if we have a bare BoolExpr AND clause. + * We treat this as a special case because the restrictinfo machinery + * doesn't build RestrictInfos on top of AND clauses. + */ + if (is_andclause(rinfo)) + { + RelOptInfo *rel; + + rel = find_single_rel_for_clauses(root, + ((BoolExpr *) rinfo)->args); + + if (rel == NULL) + return NULL; + if (lastrelid == 0) + lastrelid = rel->relid; + else if (rel->relid != lastrelid) + return NULL; + + continue; + } + + if (!IsA(rinfo, RestrictInfo)) + return NULL; + + if (bms_is_empty(rinfo->clause_relids)) + continue; /* we can ignore variable-free clauses */ + if (!bms_get_singleton_member(rinfo->clause_relids, &relid)) + return NULL; /* multiple relations in this clause */ + if (lastrelid == 0) + lastrelid = relid; /* first clause referencing a relation */ + else if (relid != lastrelid) + return NULL; /* relation not same as last one */ + } + + if (lastrelid != 0) + return find_base_rel(root, lastrelid); + + return NULL; /* no clauses */ +} + +/* + * bms_is_subset_singleton + * + * Same result as bms_is_subset(s, bms_make_singleton(x)), + * but a little faster and doesn't leak memory. + * + * Is this of use anywhere else? If so move to bitmapset.c ... + */ +static bool +bms_is_subset_singleton(const Bitmapset *s, int x) +{ + switch (bms_membership(s)) + { + case BMS_EMPTY_SET: + return true; + case BMS_SINGLETON: + return bms_is_member(x, s); + case BMS_MULTIPLE: + return false; + } + /* can't get here... */ + return false; +} + +/* + * treat_as_join_clause - + * Decide whether an operator clause is to be handled by the + * restriction or join estimator. Subroutine for clause_selectivity(). + */ +static inline bool +treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo, + int varRelid, SpecialJoinInfo *sjinfo) +{ + if (varRelid != 0) + { + /* + * Caller is forcing restriction mode (eg, because we are examining an + * inner indexscan qual). + */ + return false; + } + else if (sjinfo == NULL) + { + /* + * It must be a restriction clause, since it's being evaluated at a + * scan node. + */ + return false; + } + else + { + /* + * Otherwise, it's a join if there's more than one relation used. We + * can optimize this calculation if an rinfo was passed. + * + * XXX Since we know the clause is being evaluated at a join, the + * only way it could be single-relation is if it was delayed by outer + * joins. Although we can make use of the restriction qual estimators + * anyway, it seems likely that we ought to account for the + * probability of injected nulls somehow. + */ + if (rinfo) + return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE); + else + return (NumRelids(root, clause) > 1); + } +} + + +/* + * clause_selectivity - + * Compute the selectivity of a general boolean expression clause. + * + * The clause can be either a RestrictInfo or a plain expression. If it's + * a RestrictInfo, we try to cache the selectivity for possible re-use, + * so passing RestrictInfos is preferred. + * + * varRelid is either 0 or a rangetable index. + * + * When varRelid is not 0, only variables belonging to that relation are + * considered in computing selectivity; other vars are treated as constants + * of unknown values. This is appropriate for estimating the selectivity of + * a join clause that is being used as a restriction clause in a scan of a + * nestloop join's inner relation --- varRelid should then be the ID of the + * inner relation. + * + * When varRelid is 0, all variables are treated as variables. This + * is appropriate for ordinary join clauses and restriction clauses. + * + * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER + * if the clause isn't a join clause. + * + * sjinfo is NULL for a non-join clause, otherwise it provides additional + * context information about the join being performed. There are some + * special cases: + * 1. For a special (not INNER) join, sjinfo is always a member of + * root->join_info_list. + * 2. For an INNER join, sjinfo is just a transient struct, and only the + * relids and jointype fields in it can be trusted. + * It is possible for jointype to be different from sjinfo->jointype. + * This indicates we are considering a variant join: either with + * the LHS and RHS switched, or with one input unique-ified. + * + * Note: when passing nonzero varRelid, it's normally appropriate to set + * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a + * join clause; because we aren't treating it as a join clause. + */ +Selectivity +clause_selectivity(PlannerInfo *root, + Node *clause, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo) +{ + return clause_selectivity_ext(root, clause, varRelid, + jointype, sjinfo, true); +} + +/* + * clause_selectivity_ext - + * Extended version of clause_selectivity(). If "use_extended_stats" is + * false, all extended statistics will be ignored, and only per-column + * statistics will be used. + */ +Selectivity +clause_selectivity_ext(PlannerInfo *root, + Node *clause, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + bool use_extended_stats) +{ + Selectivity s1 = 0.5; /* default for any unhandled clause type */ + RestrictInfo *rinfo = NULL; + bool cacheable = false; + + if (clause == NULL) /* can this still happen? */ + return s1; + + if (IsA(clause, RestrictInfo)) + { + rinfo = (RestrictInfo *) clause; + + /* + * If the clause is marked pseudoconstant, then it will be used as a + * gating qual and should not affect selectivity estimates; hence + * return 1.0. The only exception is that a constant FALSE may be + * taken as having selectivity 0.0, since it will surely mean no rows + * out of the plan. This case is simple enough that we need not + * bother caching the result. + */ + if (rinfo->pseudoconstant) + { + if (!IsA(rinfo->clause, Const)) + return (Selectivity) 1.0; + } + + /* + * If the clause is marked redundant, always return 1.0. + */ + if (rinfo->norm_selec > 1) + return (Selectivity) 1.0; + + /* + * If possible, cache the result of the selectivity calculation for + * the clause. We can cache if varRelid is zero or the clause + * contains only vars of that relid --- otherwise varRelid will affect + * the result, so mustn't cache. Outer join quals might be examined + * with either their join's actual jointype or JOIN_INNER, so we need + * two cache variables to remember both cases. Note: we assume the + * result won't change if we are switching the input relations or + * considering a unique-ified case, so we only need one cache variable + * for all non-JOIN_INNER cases. + */ + if (varRelid == 0 || + bms_is_subset_singleton(rinfo->clause_relids, varRelid)) + { + /* Cacheable --- do we already have the result? */ + if (jointype == JOIN_INNER) + { + if (rinfo->norm_selec >= 0) + return rinfo->norm_selec; + } + else + { + if (rinfo->outer_selec >= 0) + return rinfo->outer_selec; + } + cacheable = true; + } + + /* + * Proceed with examination of contained clause. If the clause is an + * OR-clause, we want to look at the variant with sub-RestrictInfos, + * so that per-subclause selectivities can be cached. + */ + if (rinfo->orclause) + clause = (Node *) rinfo->orclause; + else + clause = (Node *) rinfo->clause; + } + + if (IsA(clause, Var)) + { + Var *var = (Var *) clause; + + /* + * We probably shouldn't ever see an uplevel Var here, but if we do, + * return the default selectivity... + */ + if (var->varlevelsup == 0 && + (varRelid == 0 || varRelid == (int) var->varno)) + { + /* Use the restriction selectivity function for a bool Var */ + s1 = boolvarsel(root, (Node *) var, varRelid); + } + } + else if (IsA(clause, Const)) + { + /* bool constant is pretty easy... */ + Const *con = (Const *) clause; + + s1 = con->constisnull ? 0.0 : + DatumGetBool(con->constvalue) ? 1.0 : 0.0; + } + else if (IsA(clause, Param)) + { + /* see if we can replace the Param */ + Node *subst = estimate_expression_value(root, clause); + + if (IsA(subst, Const)) + { + /* bool constant is pretty easy... */ + Const *con = (Const *) subst; + + s1 = con->constisnull ? 0.0 : + DatumGetBool(con->constvalue) ? 1.0 : 0.0; + } + else + { + /* XXX any way to do better than default? */ + } + } + else if (is_notclause(clause)) + { + /* inverse of the selectivity of the underlying clause */ + s1 = 1.0 - clause_selectivity_ext(root, + (Node *) get_notclausearg((Expr *) clause), + varRelid, + jointype, + sjinfo, + use_extended_stats); + } + else if (is_andclause(clause)) + { + /* share code with clauselist_selectivity() */ + s1 = clauselist_selectivity_ext(root, + ((BoolExpr *) clause)->args, + varRelid, + jointype, + sjinfo, + use_extended_stats); + } + else if (is_orclause(clause)) + { + /* + * Almost the same thing as clauselist_selectivity, but with the + * clauses connected by OR. + */ + s1 = clauselist_selectivity_or(root, + ((BoolExpr *) clause)->args, + varRelid, + jointype, + sjinfo, + use_extended_stats); + } + else if (is_opclause(clause) || IsA(clause, DistinctExpr)) + { + OpExpr *opclause = (OpExpr *) clause; + Oid opno = opclause->opno; + + if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo)) + { + /* Estimate selectivity for a join clause. */ + s1 = join_selectivity(root, opno, + opclause->args, + opclause->inputcollid, + jointype, + sjinfo); + } + else + { + /* Estimate selectivity for a restriction clause. */ + s1 = restriction_selectivity(root, opno, + opclause->args, + opclause->inputcollid, + varRelid); + } + + /* + * DistinctExpr has the same representation as OpExpr, but the + * contained operator is "=" not "<>", so we must negate the result. + * This estimation method doesn't give the right behavior for nulls, + * but it's better than doing nothing. + */ + if (IsA(clause, DistinctExpr)) + s1 = 1.0 - s1; + } + else if (is_funcclause(clause)) + { + FuncExpr *funcclause = (FuncExpr *) clause; + + /* Try to get an estimate from the support function, if any */ + s1 = function_selectivity(root, + funcclause->funcid, + funcclause->args, + funcclause->inputcollid, + treat_as_join_clause(root, clause, rinfo, + varRelid, sjinfo), + varRelid, + jointype, + sjinfo); + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + /* Use node specific selectivity calculation function */ + s1 = scalararraysel(root, + (ScalarArrayOpExpr *) clause, + treat_as_join_clause(root, clause, rinfo, + varRelid, sjinfo), + varRelid, + jointype, + sjinfo); + } + else if (IsA(clause, RowCompareExpr)) + { + /* Use node specific selectivity calculation function */ + s1 = rowcomparesel(root, + (RowCompareExpr *) clause, + varRelid, + jointype, + sjinfo); + } + else if (IsA(clause, NullTest)) + { + /* Use node specific selectivity calculation function */ + s1 = nulltestsel(root, + ((NullTest *) clause)->nulltesttype, + (Node *) ((NullTest *) clause)->arg, + varRelid, + jointype, + sjinfo); + } + else if (IsA(clause, BooleanTest)) + { + /* Use node specific selectivity calculation function */ + s1 = booltestsel(root, + ((BooleanTest *) clause)->booltesttype, + (Node *) ((BooleanTest *) clause)->arg, + varRelid, + jointype, + sjinfo); + } + else if (IsA(clause, CurrentOfExpr)) + { + /* CURRENT OF selects at most one row of its table */ + CurrentOfExpr *cexpr = (CurrentOfExpr *) clause; + RelOptInfo *crel = find_base_rel(root, cexpr->cvarno); + + if (crel->tuples > 0) + s1 = 1.0 / crel->tuples; + } + else if (IsA(clause, RelabelType)) + { + /* Not sure this case is needed, but it can't hurt */ + s1 = clause_selectivity_ext(root, + (Node *) ((RelabelType *) clause)->arg, + varRelid, + jointype, + sjinfo, + use_extended_stats); + } + else if (IsA(clause, CoerceToDomain)) + { + /* Not sure this case is needed, but it can't hurt */ + s1 = clause_selectivity_ext(root, + (Node *) ((CoerceToDomain *) clause)->arg, + varRelid, + jointype, + sjinfo, + use_extended_stats); + } + else + { + /* + * For anything else, see if we can consider it as a boolean variable. + * This only works if it's an immutable expression in Vars of a single + * relation; but there's no point in us checking that here because + * boolvarsel() will do it internally, and return a suitable default + * selectivity if not. + */ + s1 = boolvarsel(root, clause, varRelid); + } + + /* Cache the result if possible */ + if (cacheable) + { + if (jointype == JOIN_INNER) + rinfo->norm_selec = s1; + else + rinfo->outer_selec = s1; + } + +#ifdef SELECTIVITY_DEBUG + elog(DEBUG4, "clause_selectivity: s1 %f", s1); +#endif /* SELECTIVITY_DEBUG */ + + return s1; +} diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c new file mode 100644 index 0000000..006f91f --- /dev/null +++ b/src/backend/optimizer/path/costsize.c @@ -0,0 +1,6176 @@ +/*------------------------------------------------------------------------- + * + * costsize.c + * Routines to compute (and set) relation sizes and path costs + * + * Path costs are measured in arbitrary units established by these basic + * parameters: + * + * seq_page_cost Cost of a sequential page fetch + * random_page_cost Cost of a non-sequential page fetch + * cpu_tuple_cost Cost of typical CPU time to process a tuple + * cpu_index_tuple_cost Cost of typical CPU time to process an index tuple + * cpu_operator_cost Cost of CPU time to execute an operator or function + * parallel_tuple_cost Cost of CPU time to pass a tuple from worker to leader backend + * parallel_setup_cost Cost of setting up shared memory for parallelism + * + * We expect that the kernel will typically do some amount of read-ahead + * optimization; this in conjunction with seek costs means that seq_page_cost + * is normally considerably less than random_page_cost. (However, if the + * database is fully cached in RAM, it is reasonable to set them equal.) + * + * We also use a rough estimate "effective_cache_size" of the number of + * disk pages in Postgres + OS-level disk cache. (We can't simply use + * NBuffers for this purpose because that would ignore the effects of + * the kernel's disk cache.) + * + * Obviously, taking constants for these values is an oversimplification, + * but it's tough enough to get any useful estimates even at this level of + * detail. Note that all of these parameters are user-settable, in case + * the default values are drastically off for a particular platform. + * + * seq_page_cost and random_page_cost can also be overridden for an individual + * tablespace, in case some data is on a fast disk and other data is on a slow + * disk. Per-tablespace overrides never apply to temporary work files such as + * an external sort or a materialize node that overflows work_mem. + * + * We compute two separate costs for each path: + * total_cost: total estimated cost to fetch all tuples + * startup_cost: cost that is expended before first tuple is fetched + * In some scenarios, such as when there is a LIMIT or we are implementing + * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the + * path's result. A caller can estimate the cost of fetching a partial + * result by interpolating between startup_cost and total_cost. In detail: + * actual_cost = startup_cost + + * (total_cost - startup_cost) * tuples_to_fetch / path->rows; + * Note that a base relation's rows count (and, by extension, plan_rows for + * plan nodes below the LIMIT node) are set without regard to any LIMIT, so + * that this equation works properly. (Note: while path->rows is never zero + * for ordinary relations, it is zero for paths for provably-empty relations, + * so beware of division-by-zero.) The LIMIT is applied as a top-level + * plan node. + * + * For largely historical reasons, most of the routines in this module use + * the passed result Path only to store their results (rows, startup_cost and + * total_cost) into. All the input data they need is passed as separate + * parameters, even though much of it could be extracted from the Path. + * An exception is made for the cost_XXXjoin() routines, which expect all + * the other fields of the passed XXXPath to be filled in, and similarly + * cost_index() assumes the passed IndexPath is valid except for its output + * values. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/optimizer/path/costsize.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <math.h> + +#include "access/amapi.h" +#include "access/htup_details.h" +#include "access/tsmapi.h" +#include "executor/executor.h" +#include "executor/nodeAgg.h" +#include "executor/nodeHash.h" +#include "executor/nodeMemoize.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/placeholder.h" +#include "optimizer/plancat.h" +#include "optimizer/planmain.h" +#include "optimizer/restrictinfo.h" +#include "parser/parsetree.h" +#include "utils/lsyscache.h" +#include "utils/selfuncs.h" +#include "utils/spccache.h" +#include "utils/tuplesort.h" + + +#define LOG2(x) (log(x) / 0.693147180559945) + +/* + * Append and MergeAppend nodes are less expensive than some other operations + * which use cpu_tuple_cost; instead of adding a separate GUC, estimate the + * per-tuple cost as cpu_tuple_cost multiplied by this value. + */ +#define APPEND_CPU_COST_MULTIPLIER 0.5 + +/* + * Maximum value for row estimates. We cap row estimates to this to help + * ensure that costs based on these estimates remain within the range of what + * double can represent. add_path() wouldn't act sanely given infinite or NaN + * cost values. + */ +#define MAXIMUM_ROWCOUNT 1e100 + +double seq_page_cost = DEFAULT_SEQ_PAGE_COST; +double random_page_cost = DEFAULT_RANDOM_PAGE_COST; +double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST; +double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST; +double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST; +double parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST; +double parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST; + +int effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE; + +Cost disable_cost = 1.0e10; + +int max_parallel_workers_per_gather = 2; + +bool enable_seqscan = true; +bool enable_indexscan = true; +bool enable_indexonlyscan = true; +bool enable_bitmapscan = true; +bool enable_tidscan = true; +bool enable_sort = true; +bool enable_incremental_sort = true; +bool enable_hashagg = true; +bool enable_nestloop = true; +bool enable_material = true; +bool enable_memoize = true; +bool enable_mergejoin = true; +bool enable_hashjoin = true; +bool enable_gathermerge = true; +bool enable_partitionwise_join = false; +bool enable_partitionwise_aggregate = false; +bool enable_parallel_append = true; +bool enable_parallel_hash = true; +bool enable_partition_pruning = true; +bool enable_async_append = true; + +typedef struct +{ + PlannerInfo *root; + QualCost total; +} cost_qual_eval_context; + +static List *extract_nonindex_conditions(List *qual_clauses, List *indexclauses); +static MergeScanSelCache *cached_scansel(PlannerInfo *root, + RestrictInfo *rinfo, + PathKey *pathkey); +static void cost_rescan(PlannerInfo *root, Path *path, + Cost *rescan_startup_cost, Cost *rescan_total_cost); +static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context); +static void get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, + ParamPathInfo *param_info, + QualCost *qpqual_cost); +static bool has_indexed_join_quals(NestPath *joinpath); +static double approx_tuple_count(PlannerInfo *root, JoinPath *path, + List *quals); +static double calc_joinrel_size_estimate(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + double outer_rows, + double inner_rows, + SpecialJoinInfo *sjinfo, + List *restrictlist); +static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root, + Relids outer_relids, + Relids inner_relids, + SpecialJoinInfo *sjinfo, + List **restrictlist); +static Cost append_nonpartial_cost(List *subpaths, int numpaths, + int parallel_workers); +static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); +static double relation_byte_size(double tuples, int width); +static double page_size(double tuples, int width); +static double get_parallel_divisor(Path *path); + + +/* + * clamp_row_est + * Force a row-count estimate to a sane value. + */ +double +clamp_row_est(double nrows) +{ + /* + * Avoid infinite and NaN row estimates. Costs derived from such values + * are going to be useless. Also force the estimate to be at least one + * row, to make explain output look better and to avoid possible + * divide-by-zero when interpolating costs. Make it an integer, too. + */ + if (nrows > MAXIMUM_ROWCOUNT || isnan(nrows)) + nrows = MAXIMUM_ROWCOUNT; + else if (nrows <= 1.0) + nrows = 1.0; + else + nrows = rint(nrows); + + return nrows; +} + + +/* + * cost_seqscan + * Determines and returns the cost of scanning a relation sequentially. + * + * 'baserel' is the relation to be scanned + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + */ +void +cost_seqscan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) +{ + Cost startup_cost = 0; + Cost cpu_run_cost; + Cost disk_run_cost; + double spc_seq_page_cost; + QualCost qpqual_cost; + Cost cpu_per_tuple; + + /* Should only be applied to base relations */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_RELATION); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + if (!enable_seqscan) + startup_cost += disable_cost; + + /* fetch estimated page cost for tablespace containing table */ + get_tablespace_page_costs(baserel->reltablespace, + NULL, + &spc_seq_page_cost); + + /* + * disk costs + */ + disk_run_cost = spc_seq_page_cost * baserel->pages; + + /* CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; + cpu_run_cost = cpu_per_tuple * baserel->tuples; + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + cpu_run_cost += path->pathtarget->cost.per_tuple * path->rows; + + /* Adjust costing for parallelism, if used. */ + if (path->parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(path); + + /* The CPU cost is divided among all the workers. */ + cpu_run_cost /= parallel_divisor; + + /* + * It may be possible to amortize some of the I/O cost, but probably + * not very much, because most operating systems already do aggressive + * prefetching. For now, we assume that the disk run cost can't be + * amortized at all. + */ + + /* + * In the case of a parallel plan, the row count needs to represent + * the number of tuples processed per worker. + */ + path->rows = clamp_row_est(path->rows / parallel_divisor); + } + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + cpu_run_cost + disk_run_cost; +} + +/* + * cost_samplescan + * Determines and returns the cost of scanning a relation using sampling. + * + * 'baserel' is the relation to be scanned + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + */ +void +cost_samplescan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + RangeTblEntry *rte; + TableSampleClause *tsc; + TsmRoutine *tsm; + double spc_seq_page_cost, + spc_random_page_cost, + spc_page_cost; + QualCost qpqual_cost; + Cost cpu_per_tuple; + + /* Should only be applied to base relations with tablesample clauses */ + Assert(baserel->relid > 0); + rte = planner_rt_fetch(baserel->relid, root); + Assert(rte->rtekind == RTE_RELATION); + tsc = rte->tablesample; + Assert(tsc != NULL); + tsm = GetTsmRoutine(tsc->tsmhandler); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* fetch estimated page cost for tablespace containing table */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + &spc_seq_page_cost); + + /* if NextSampleBlock is used, assume random access, else sequential */ + spc_page_cost = (tsm->NextSampleBlock != NULL) ? + spc_random_page_cost : spc_seq_page_cost; + + /* + * disk costs (recall that baserel->pages has already been set to the + * number of pages the sampling method will visit) + */ + run_cost += spc_page_cost * baserel->pages; + + /* + * CPU costs (recall that baserel->tuples has already been set to the + * number of tuples the sampling method will select). Note that we ignore + * execution cost of the TABLESAMPLE parameter expressions; they will be + * evaluated only once per scan, and in most usages they'll likely be + * simple constants anyway. We also don't charge anything for the + * calculations the sampling method might do internally. + */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_gather + * Determines and returns the cost of gather path. + * + * 'rel' is the relation to be operated upon + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + * 'rows' may be used to point to a row estimate; if non-NULL, it overrides + * both 'rel' and 'param_info'. This is useful when the path doesn't exactly + * correspond to any particular RelOptInfo. + */ +void +cost_gather(GatherPath *path, PlannerInfo *root, + RelOptInfo *rel, ParamPathInfo *param_info, + double *rows) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + + /* Mark the path with the correct row estimate */ + if (rows) + path->path.rows = *rows; + else if (param_info) + path->path.rows = param_info->ppi_rows; + else + path->path.rows = rel->rows; + + startup_cost = path->subpath->startup_cost; + + run_cost = path->subpath->total_cost - path->subpath->startup_cost; + + /* Parallel setup and communication cost. */ + startup_cost += parallel_setup_cost; + run_cost += parallel_tuple_cost * path->path.rows; + + path->path.startup_cost = startup_cost; + path->path.total_cost = (startup_cost + run_cost); +} + +/* + * cost_gather_merge + * Determines and returns the cost of gather merge path. + * + * GatherMerge merges several pre-sorted input streams, using a heap that at + * any given instant holds the next tuple from each stream. If there are N + * streams, we need about N*log2(N) tuple comparisons to construct the heap at + * startup, and then for each output tuple, about log2(N) comparisons to + * replace the top heap entry with the next tuple from the same stream. + */ +void +cost_gather_merge(GatherMergePath *path, PlannerInfo *root, + RelOptInfo *rel, ParamPathInfo *param_info, + Cost input_startup_cost, Cost input_total_cost, + double *rows) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + Cost comparison_cost; + double N; + double logN; + + /* Mark the path with the correct row estimate */ + if (rows) + path->path.rows = *rows; + else if (param_info) + path->path.rows = param_info->ppi_rows; + else + path->path.rows = rel->rows; + + if (!enable_gathermerge) + startup_cost += disable_cost; + + /* + * Add one to the number of workers to account for the leader. This might + * be overgenerous since the leader will do less work than other workers + * in typical cases, but we'll go with it for now. + */ + Assert(path->num_workers > 0); + N = (double) path->num_workers + 1; + logN = LOG2(N); + + /* Assumed cost per tuple comparison */ + comparison_cost = 2.0 * cpu_operator_cost; + + /* Heap creation cost */ + startup_cost += comparison_cost * N * logN; + + /* Per-tuple heap maintenance cost */ + run_cost += path->path.rows * comparison_cost * logN; + + /* small cost for heap management, like cost_merge_append */ + run_cost += cpu_operator_cost * path->path.rows; + + /* + * Parallel setup and communication cost. Since Gather Merge, unlike + * Gather, requires us to block until a tuple is available from every + * worker, we bump the IPC cost up a little bit as compared with Gather. + * For lack of a better idea, charge an extra 5%. + */ + startup_cost += parallel_setup_cost; + run_cost += parallel_tuple_cost * path->path.rows * 1.05; + + path->path.startup_cost = startup_cost + input_startup_cost; + path->path.total_cost = (startup_cost + run_cost + input_total_cost); +} + +/* + * cost_index + * Determines and returns the cost of scanning a relation using an index. + * + * 'path' describes the indexscan under consideration, and is complete + * except for the fields to be set by this routine + * 'loop_count' is the number of repetitions of the indexscan to factor into + * estimates of caching behavior + * + * In addition to rows, startup_cost and total_cost, cost_index() sets the + * path's indextotalcost and indexselectivity fields. These values will be + * needed if the IndexPath is used in a BitmapIndexScan. + * + * NOTE: path->indexquals must contain only clauses usable as index + * restrictions. Any additional quals evaluated as qpquals may reduce the + * number of returned tuples, but they won't reduce the number of tuples + * we have to fetch from the table, so they don't reduce the scan cost. + */ +void +cost_index(IndexPath *path, PlannerInfo *root, double loop_count, + bool partial_path) +{ + IndexOptInfo *index = path->indexinfo; + RelOptInfo *baserel = index->rel; + bool indexonly = (path->path.pathtype == T_IndexOnlyScan); + amcostestimate_function amcostestimate; + List *qpquals; + Cost startup_cost = 0; + Cost run_cost = 0; + Cost cpu_run_cost = 0; + Cost indexStartupCost; + Cost indexTotalCost; + Selectivity indexSelectivity; + double indexCorrelation, + csquared; + double spc_seq_page_cost, + spc_random_page_cost; + Cost min_IO_cost, + max_IO_cost; + QualCost qpqual_cost; + Cost cpu_per_tuple; + double tuples_fetched; + double pages_fetched; + double rand_heap_pages; + double index_pages; + + /* Should only be applied to base relations */ + Assert(IsA(baserel, RelOptInfo) && + IsA(index, IndexOptInfo)); + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_RELATION); + + /* + * Mark the path with the correct row estimate, and identify which quals + * will need to be enforced as qpquals. We need not check any quals that + * are implied by the index's predicate, so we can use indrestrictinfo not + * baserestrictinfo as the list of relevant restriction clauses for the + * rel. + */ + if (path->path.param_info) + { + path->path.rows = path->path.param_info->ppi_rows; + /* qpquals come from the rel's restriction clauses and ppi_clauses */ + qpquals = list_concat(extract_nonindex_conditions(path->indexinfo->indrestrictinfo, + path->indexclauses), + extract_nonindex_conditions(path->path.param_info->ppi_clauses, + path->indexclauses)); + } + else + { + path->path.rows = baserel->rows; + /* qpquals come from just the rel's restriction clauses */ + qpquals = extract_nonindex_conditions(path->indexinfo->indrestrictinfo, + path->indexclauses); + } + + if (!enable_indexscan) + startup_cost += disable_cost; + /* we don't need to check enable_indexonlyscan; indxpath.c does that */ + + /* + * Call index-access-method-specific code to estimate the processing cost + * for scanning the index, as well as the selectivity of the index (ie, + * the fraction of main-table tuples we will have to retrieve) and its + * correlation to the main-table tuple order. We need a cast here because + * pathnodes.h uses a weak function type to avoid including amapi.h. + */ + amcostestimate = (amcostestimate_function) index->amcostestimate; + amcostestimate(root, path, loop_count, + &indexStartupCost, &indexTotalCost, + &indexSelectivity, &indexCorrelation, + &index_pages); + + /* + * Save amcostestimate's results for possible use in bitmap scan planning. + * We don't bother to save indexStartupCost or indexCorrelation, because a + * bitmap scan doesn't care about either. + */ + path->indextotalcost = indexTotalCost; + path->indexselectivity = indexSelectivity; + + /* all costs for touching index itself included here */ + startup_cost += indexStartupCost; + run_cost += indexTotalCost - indexStartupCost; + + /* estimate number of main-table tuples fetched */ + tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); + + /* fetch estimated page costs for tablespace containing table */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + &spc_seq_page_cost); + + /*---------- + * Estimate number of main-table pages fetched, and compute I/O cost. + * + * When the index ordering is uncorrelated with the table ordering, + * we use an approximation proposed by Mackert and Lohman (see + * index_pages_fetched() for details) to compute the number of pages + * fetched, and then charge spc_random_page_cost per page fetched. + * + * When the index ordering is exactly correlated with the table ordering + * (just after a CLUSTER, for example), the number of pages fetched should + * be exactly selectivity * table_size. What's more, all but the first + * will be sequential fetches, not the random fetches that occur in the + * uncorrelated case. So if the number of pages is more than 1, we + * ought to charge + * spc_random_page_cost + (pages_fetched - 1) * spc_seq_page_cost + * For partially-correlated indexes, we ought to charge somewhere between + * these two estimates. We currently interpolate linearly between the + * estimates based on the correlation squared (XXX is that appropriate?). + * + * If it's an index-only scan, then we will not need to fetch any heap + * pages for which the visibility map shows all tuples are visible. + * Hence, reduce the estimated number of heap fetches accordingly. + * We use the measured fraction of the entire heap that is all-visible, + * which might not be particularly relevant to the subset of the heap + * that this query will fetch; but it's not clear how to do better. + *---------- + */ + if (loop_count > 1) + { + /* + * For repeated indexscans, the appropriate estimate for the + * uncorrelated case is to scale up the number of tuples fetched in + * the Mackert and Lohman formula by the number of scans, so that we + * estimate the number of pages fetched by all the scans; then + * pro-rate the costs for one scan. In this case we assume all the + * fetches are random accesses. + */ + pages_fetched = index_pages_fetched(tuples_fetched * loop_count, + baserel->pages, + (double) index->pages, + root); + + if (indexonly) + pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); + + rand_heap_pages = pages_fetched; + + max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count; + + /* + * In the perfectly correlated case, the number of pages touched by + * each scan is selectivity * table_size, and we can use the Mackert + * and Lohman formula at the page level to estimate how much work is + * saved by caching across scans. We still assume all the fetches are + * random, though, which is an overestimate that's hard to correct for + * without double-counting the cache effects. (But in most cases + * where such a plan is actually interesting, only one page would get + * fetched per scan anyway, so it shouldn't matter much.) + */ + pages_fetched = ceil(indexSelectivity * (double) baserel->pages); + + pages_fetched = index_pages_fetched(pages_fetched * loop_count, + baserel->pages, + (double) index->pages, + root); + + if (indexonly) + pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); + + min_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count; + } + else + { + /* + * Normal case: apply the Mackert and Lohman formula, and then + * interpolate between that and the correlation-derived result. + */ + pages_fetched = index_pages_fetched(tuples_fetched, + baserel->pages, + (double) index->pages, + root); + + if (indexonly) + pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); + + rand_heap_pages = pages_fetched; + + /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */ + max_IO_cost = pages_fetched * spc_random_page_cost; + + /* min_IO_cost is for the perfectly correlated case (csquared=1) */ + pages_fetched = ceil(indexSelectivity * (double) baserel->pages); + + if (indexonly) + pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); + + if (pages_fetched > 0) + { + min_IO_cost = spc_random_page_cost; + if (pages_fetched > 1) + min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost; + } + else + min_IO_cost = 0; + } + + if (partial_path) + { + /* + * For index only scans compute workers based on number of index pages + * fetched; the number of heap pages we fetch might be so small as to + * effectively rule out parallelism, which we don't want to do. + */ + if (indexonly) + rand_heap_pages = -1; + + /* + * Estimate the number of parallel workers required to scan index. Use + * the number of heap pages computed considering heap fetches won't be + * sequential as for parallel scans the pages are accessed in random + * order. + */ + path->path.parallel_workers = compute_parallel_worker(baserel, + rand_heap_pages, + index_pages, + max_parallel_workers_per_gather); + + /* + * Fall out if workers can't be assigned for parallel scan, because in + * such a case this path will be rejected. So there is no benefit in + * doing extra computation. + */ + if (path->path.parallel_workers <= 0) + return; + + path->path.parallel_aware = true; + } + + /* + * Now interpolate based on estimated index order correlation to get total + * disk I/O cost for main table accesses. + */ + csquared = indexCorrelation * indexCorrelation; + + run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost); + + /* + * Estimate CPU costs per tuple. + * + * What we want here is cpu_tuple_cost plus the evaluation costs of any + * qual clauses that we have to evaluate as qpquals. + */ + cost_qual_eval(&qpqual_cost, qpquals, root); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; + + cpu_run_cost += cpu_per_tuple * tuples_fetched; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->path.pathtarget->cost.startup; + cpu_run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + + /* Adjust costing for parallelism, if used. */ + if (path->path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->path); + + path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); + + /* The CPU cost is divided among all the workers. */ + cpu_run_cost /= parallel_divisor; + } + + run_cost += cpu_run_cost; + + path->path.startup_cost = startup_cost; + path->path.total_cost = startup_cost + run_cost; +} + +/* + * extract_nonindex_conditions + * + * Given a list of quals to be enforced in an indexscan, extract the ones that + * will have to be applied as qpquals (ie, the index machinery won't handle + * them). Here we detect only whether a qual clause is directly redundant + * with some indexclause. If the index path is chosen for use, createplan.c + * will try a bit harder to get rid of redundant qual conditions; specifically + * it will see if quals can be proven to be implied by the indexquals. But + * it does not seem worth the cycles to try to factor that in at this stage, + * since we're only trying to estimate qual eval costs. Otherwise this must + * match the logic in create_indexscan_plan(). + * + * qual_clauses, and the result, are lists of RestrictInfos. + * indexclauses is a list of IndexClauses. + */ +static List * +extract_nonindex_conditions(List *qual_clauses, List *indexclauses) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, qual_clauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + if (rinfo->pseudoconstant) + continue; /* we may drop pseudoconstants here */ + if (is_redundant_with_indexclauses(rinfo, indexclauses)) + continue; /* dup or derived from same EquivalenceClass */ + /* ... skip the predicate proof attempt createplan.c will try ... */ + result = lappend(result, rinfo); + } + return result; +} + +/* + * index_pages_fetched + * Estimate the number of pages actually fetched after accounting for + * cache effects. + * + * We use an approximation proposed by Mackert and Lohman, "Index Scans + * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions + * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424. + * The Mackert and Lohman approximation is that the number of pages + * fetched is + * PF = + * min(2TNs/(2T+Ns), T) when T <= b + * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b) + * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b) + * where + * T = # pages in table + * N = # tuples in table + * s = selectivity = fraction of table to be scanned + * b = # buffer pages available (we include kernel space here) + * + * We assume that effective_cache_size is the total number of buffer pages + * available for the whole query, and pro-rate that space across all the + * tables in the query and the index currently under consideration. (This + * ignores space needed for other indexes used by the query, but since we + * don't know which indexes will get used, we can't estimate that very well; + * and in any case counting all the tables may well be an overestimate, since + * depending on the join plan not all the tables may be scanned concurrently.) + * + * The product Ns is the number of tuples fetched; we pass in that + * product rather than calculating it here. "pages" is the number of pages + * in the object under consideration (either an index or a table). + * "index_pages" is the amount to add to the total table space, which was + * computed for us by make_one_rel. + * + * Caller is expected to have ensured that tuples_fetched is greater than zero + * and rounded to integer (see clamp_row_est). The result will likewise be + * greater than zero and integral. + */ +double +index_pages_fetched(double tuples_fetched, BlockNumber pages, + double index_pages, PlannerInfo *root) +{ + double pages_fetched; + double total_pages; + double T, + b; + + /* T is # pages in table, but don't allow it to be zero */ + T = (pages > 1) ? (double) pages : 1.0; + + /* Compute number of pages assumed to be competing for cache space */ + total_pages = root->total_table_pages + index_pages; + total_pages = Max(total_pages, 1.0); + Assert(T <= total_pages); + + /* b is pro-rated share of effective_cache_size */ + b = (double) effective_cache_size * T / total_pages; + + /* force it positive and integral */ + if (b <= 1.0) + b = 1.0; + else + b = ceil(b); + + /* This part is the Mackert and Lohman formula */ + if (T <= b) + { + pages_fetched = + (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + if (pages_fetched >= T) + pages_fetched = T; + else + pages_fetched = ceil(pages_fetched); + } + else + { + double lim; + + lim = (2.0 * T * b) / (2.0 * T - b); + if (tuples_fetched <= lim) + { + pages_fetched = + (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + } + else + { + pages_fetched = + b + (tuples_fetched - lim) * (T - b) / T; + } + pages_fetched = ceil(pages_fetched); + } + return pages_fetched; +} + +/* + * get_indexpath_pages + * Determine the total size of the indexes used in a bitmap index path. + * + * Note: if the same index is used more than once in a bitmap tree, we will + * count it multiple times, which perhaps is the wrong thing ... but it's + * not completely clear, and detecting duplicates is difficult, so ignore it + * for now. + */ +static double +get_indexpath_pages(Path *bitmapqual) +{ + double result = 0; + ListCell *l; + + if (IsA(bitmapqual, BitmapAndPath)) + { + BitmapAndPath *apath = (BitmapAndPath *) bitmapqual; + + foreach(l, apath->bitmapquals) + { + result += get_indexpath_pages((Path *) lfirst(l)); + } + } + else if (IsA(bitmapqual, BitmapOrPath)) + { + BitmapOrPath *opath = (BitmapOrPath *) bitmapqual; + + foreach(l, opath->bitmapquals) + { + result += get_indexpath_pages((Path *) lfirst(l)); + } + } + else if (IsA(bitmapqual, IndexPath)) + { + IndexPath *ipath = (IndexPath *) bitmapqual; + + result = (double) ipath->indexinfo->pages; + } + else + elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual)); + + return result; +} + +/* + * cost_bitmap_heap_scan + * Determines and returns the cost of scanning a relation using a bitmap + * index-then-heap plan. + * + * 'baserel' is the relation to be scanned + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + * 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths + * 'loop_count' is the number of repetitions of the indexscan to factor into + * estimates of caching behavior + * + * Note: the component IndexPaths in bitmapqual should have been costed + * using the same loop_count. + */ +void +cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, + ParamPathInfo *param_info, + Path *bitmapqual, double loop_count) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + Cost indexTotalCost; + QualCost qpqual_cost; + Cost cpu_per_tuple; + Cost cost_per_page; + Cost cpu_run_cost; + double tuples_fetched; + double pages_fetched; + double spc_seq_page_cost, + spc_random_page_cost; + double T; + + /* Should only be applied to base relations */ + Assert(IsA(baserel, RelOptInfo)); + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_RELATION); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + if (!enable_bitmapscan) + startup_cost += disable_cost; + + pages_fetched = compute_bitmap_pages(root, baserel, bitmapqual, + loop_count, &indexTotalCost, + &tuples_fetched); + + startup_cost += indexTotalCost; + T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; + + /* Fetch estimated page costs for tablespace containing table. */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + &spc_seq_page_cost); + + /* + * For small numbers of pages we should charge spc_random_page_cost + * apiece, while if nearly all the table's pages are being read, it's more + * appropriate to charge spc_seq_page_cost apiece. The effect is + * nonlinear, too. For lack of a better idea, interpolate like this to + * determine the cost per page. + */ + if (pages_fetched >= 2.0) + cost_per_page = spc_random_page_cost - + (spc_random_page_cost - spc_seq_page_cost) + * sqrt(pages_fetched / T); + else + cost_per_page = spc_random_page_cost; + + run_cost += pages_fetched * cost_per_page; + + /* + * Estimate CPU costs per tuple. + * + * Often the indexquals don't need to be rechecked at each tuple ... but + * not always, especially not if there are enough tuples involved that the + * bitmaps become lossy. For the moment, just assume they will be + * rechecked always. This means we charge the full freight for all the + * scan clauses. + */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; + cpu_run_cost = cpu_per_tuple * tuples_fetched; + + /* Adjust costing for parallelism, if used. */ + if (path->parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(path); + + /* The CPU cost is divided among all the workers. */ + cpu_run_cost /= parallel_divisor; + + path->rows = clamp_row_est(path->rows / parallel_divisor); + } + + + run_cost += cpu_run_cost; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_bitmap_tree_node + * Extract cost and selectivity from a bitmap tree node (index/and/or) + */ +void +cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec) +{ + if (IsA(path, IndexPath)) + { + *cost = ((IndexPath *) path)->indextotalcost; + *selec = ((IndexPath *) path)->indexselectivity; + + /* + * Charge a small amount per retrieved tuple to reflect the costs of + * manipulating the bitmap. This is mostly to make sure that a bitmap + * scan doesn't look to be the same cost as an indexscan to retrieve a + * single tuple. + */ + *cost += 0.1 * cpu_operator_cost * path->rows; + } + else if (IsA(path, BitmapAndPath)) + { + *cost = path->total_cost; + *selec = ((BitmapAndPath *) path)->bitmapselectivity; + } + else if (IsA(path, BitmapOrPath)) + { + *cost = path->total_cost; + *selec = ((BitmapOrPath *) path)->bitmapselectivity; + } + else + { + elog(ERROR, "unrecognized node type: %d", nodeTag(path)); + *cost = *selec = 0; /* keep compiler quiet */ + } +} + +/* + * cost_bitmap_and_node + * Estimate the cost of a BitmapAnd node + * + * Note that this considers only the costs of index scanning and bitmap + * creation, not the eventual heap access. In that sense the object isn't + * truly a Path, but it has enough path-like properties (costs in particular) + * to warrant treating it as one. We don't bother to set the path rows field, + * however. + */ +void +cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root) +{ + Cost totalCost; + Selectivity selec; + ListCell *l; + + /* + * We estimate AND selectivity on the assumption that the inputs are + * independent. This is probably often wrong, but we don't have the info + * to do better. + * + * The runtime cost of the BitmapAnd itself is estimated at 100x + * cpu_operator_cost for each tbm_intersect needed. Probably too small, + * definitely too simplistic? + */ + totalCost = 0.0; + selec = 1.0; + foreach(l, path->bitmapquals) + { + Path *subpath = (Path *) lfirst(l); + Cost subCost; + Selectivity subselec; + + cost_bitmap_tree_node(subpath, &subCost, &subselec); + + selec *= subselec; + + totalCost += subCost; + if (l != list_head(path->bitmapquals)) + totalCost += 100.0 * cpu_operator_cost; + } + path->bitmapselectivity = selec; + path->path.rows = 0; /* per above, not used */ + path->path.startup_cost = totalCost; + path->path.total_cost = totalCost; +} + +/* + * cost_bitmap_or_node + * Estimate the cost of a BitmapOr node + * + * See comments for cost_bitmap_and_node. + */ +void +cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root) +{ + Cost totalCost; + Selectivity selec; + ListCell *l; + + /* + * We estimate OR selectivity on the assumption that the inputs are + * non-overlapping, since that's often the case in "x IN (list)" type + * situations. Of course, we clamp to 1.0 at the end. + * + * The runtime cost of the BitmapOr itself is estimated at 100x + * cpu_operator_cost for each tbm_union needed. Probably too small, + * definitely too simplistic? We are aware that the tbm_unions are + * optimized out when the inputs are BitmapIndexScans. + */ + totalCost = 0.0; + selec = 0.0; + foreach(l, path->bitmapquals) + { + Path *subpath = (Path *) lfirst(l); + Cost subCost; + Selectivity subselec; + + cost_bitmap_tree_node(subpath, &subCost, &subselec); + + selec += subselec; + + totalCost += subCost; + if (l != list_head(path->bitmapquals) && + !IsA(subpath, IndexPath)) + totalCost += 100.0 * cpu_operator_cost; + } + path->bitmapselectivity = Min(selec, 1.0); + path->path.rows = 0; /* per above, not used */ + path->path.startup_cost = totalCost; + path->path.total_cost = totalCost; +} + +/* + * cost_tidscan + * Determines and returns the cost of scanning a relation using TIDs. + * + * 'baserel' is the relation to be scanned + * 'tidquals' is the list of TID-checkable quals + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + */ +void +cost_tidscan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + bool isCurrentOf = false; + QualCost qpqual_cost; + Cost cpu_per_tuple; + QualCost tid_qual_cost; + int ntuples; + ListCell *l; + double spc_random_page_cost; + + /* Should only be applied to base relations */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_RELATION); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* Count how many tuples we expect to retrieve */ + ntuples = 0; + foreach(l, tidquals) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + Expr *qual = rinfo->clause; + + if (IsA(qual, ScalarArrayOpExpr)) + { + /* Each element of the array yields 1 tuple */ + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual; + Node *arraynode = (Node *) lsecond(saop->args); + + ntuples += estimate_array_length(arraynode); + } + else if (IsA(qual, CurrentOfExpr)) + { + /* CURRENT OF yields 1 tuple */ + isCurrentOf = true; + ntuples++; + } + else + { + /* It's just CTID = something, count 1 tuple */ + ntuples++; + } + } + + /* + * We must force TID scan for WHERE CURRENT OF, because only nodeTidscan.c + * understands how to do it correctly. Therefore, honor enable_tidscan + * only when CURRENT OF isn't present. Also note that cost_qual_eval + * counts a CurrentOfExpr as having startup cost disable_cost, which we + * subtract off here; that's to prevent other plan types such as seqscan + * from winning. + */ + if (isCurrentOf) + { + Assert(baserel->baserestrictcost.startup >= disable_cost); + startup_cost -= disable_cost; + } + else if (!enable_tidscan) + startup_cost += disable_cost; + + /* + * The TID qual expressions will be computed once, any other baserestrict + * quals once per retrieved tuple. + */ + cost_qual_eval(&tid_qual_cost, tidquals, root); + + /* fetch estimated page cost for tablespace containing table */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + NULL); + + /* disk costs --- assume each tuple on a different page */ + run_cost += spc_random_page_cost * ntuples; + + /* Add scanning CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + /* XXX currently we assume TID quals are a subset of qpquals */ + startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple - + tid_qual_cost.per_tuple; + run_cost += cpu_per_tuple * ntuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_tidrangescan + * Determines and sets the costs of scanning a relation using a range of + * TIDs for 'path' + * + * 'baserel' is the relation to be scanned + * 'tidrangequals' is the list of TID-checkable range quals + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + */ +void +cost_tidrangescan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, List *tidrangequals, + ParamPathInfo *param_info) +{ + Selectivity selectivity; + double pages; + Cost startup_cost = 0; + Cost run_cost = 0; + QualCost qpqual_cost; + Cost cpu_per_tuple; + QualCost tid_qual_cost; + double ntuples; + double nseqpages; + double spc_random_page_cost; + double spc_seq_page_cost; + + /* Should only be applied to base relations */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_RELATION); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* Count how many tuples and pages we expect to scan */ + selectivity = clauselist_selectivity(root, tidrangequals, baserel->relid, + JOIN_INNER, NULL); + pages = ceil(selectivity * baserel->pages); + + if (pages <= 0.0) + pages = 1.0; + + /* + * The first page in a range requires a random seek, but each subsequent + * page is just a normal sequential page read. NOTE: it's desirable for + * TID Range Scans to cost more than the equivalent Sequential Scans, + * because Seq Scans have some performance advantages such as scan + * synchronization and parallelizability, and we'd prefer one of them to + * be picked unless a TID Range Scan really is better. + */ + ntuples = selectivity * baserel->tuples; + nseqpages = pages - 1.0; + + if (!enable_tidscan) + startup_cost += disable_cost; + + /* + * The TID qual expressions will be computed once, any other baserestrict + * quals once per retrieved tuple. + */ + cost_qual_eval(&tid_qual_cost, tidrangequals, root); + + /* fetch estimated page cost for tablespace containing table */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + &spc_seq_page_cost); + + /* disk costs; 1 random page and the remainder as seq pages */ + run_cost += spc_random_page_cost + spc_seq_page_cost * nseqpages; + + /* Add scanning CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + /* + * XXX currently we assume TID quals are a subset of qpquals at this + * point; they will be removed (if possible) when we create the plan, so + * we subtract their cost from the total qpqual cost. (If the TID quals + * can't be removed, this is a mistake and we're going to underestimate + * the CPU cost a bit.) + */ + startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple - + tid_qual_cost.per_tuple; + run_cost += cpu_per_tuple * ntuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_subqueryscan + * Determines and returns the cost of scanning a subquery RTE. + * + * 'baserel' is the relation to be scanned + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + */ +void +cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) +{ + Cost startup_cost; + Cost run_cost; + QualCost qpqual_cost; + Cost cpu_per_tuple; + + /* Should only be applied to base relations that are subqueries */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_SUBQUERY); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->path.rows = param_info->ppi_rows; + else + path->path.rows = baserel->rows; + + /* + * Cost of path is cost of evaluating the subplan, plus cost of evaluating + * any restriction clauses and tlist that will be attached to the + * SubqueryScan node, plus cpu_tuple_cost to account for selection and + * projection overhead. + */ + path->path.startup_cost = path->subpath->startup_cost; + path->path.total_cost = path->subpath->total_cost; + + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost = qpqual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; + run_cost = cpu_per_tuple * baserel->tuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->path.pathtarget->cost.startup; + run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + + path->path.startup_cost += startup_cost; + path->path.total_cost += startup_cost + run_cost; +} + +/* + * cost_functionscan + * Determines and returns the cost of scanning a function RTE. + * + * 'baserel' is the relation to be scanned + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + */ +void +cost_functionscan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + QualCost qpqual_cost; + Cost cpu_per_tuple; + RangeTblEntry *rte; + QualCost exprcost; + + /* Should only be applied to base relations that are functions */ + Assert(baserel->relid > 0); + rte = planner_rt_fetch(baserel->relid, root); + Assert(rte->rtekind == RTE_FUNCTION); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* + * Estimate costs of executing the function expression(s). + * + * Currently, nodeFunctionscan.c always executes the functions to + * completion before returning any rows, and caches the results in a + * tuplestore. So the function eval cost is all startup cost, and per-row + * costs are minimal. + * + * XXX in principle we ought to charge tuplestore spill costs if the + * number of rows is large. However, given how phony our rowcount + * estimates for functions tend to be, there's not a lot of point in that + * refinement right now. + */ + cost_qual_eval_node(&exprcost, (Node *) rte->functions, root); + + startup_cost += exprcost.startup + exprcost.per_tuple; + + /* Add scanning CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_tablefuncscan + * Determines and returns the cost of scanning a table function. + * + * 'baserel' is the relation to be scanned + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + */ +void +cost_tablefuncscan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + QualCost qpqual_cost; + Cost cpu_per_tuple; + RangeTblEntry *rte; + QualCost exprcost; + + /* Should only be applied to base relations that are functions */ + Assert(baserel->relid > 0); + rte = planner_rt_fetch(baserel->relid, root); + Assert(rte->rtekind == RTE_TABLEFUNC); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* + * Estimate costs of executing the table func expression(s). + * + * XXX in principle we ought to charge tuplestore spill costs if the + * number of rows is large. However, given how phony our rowcount + * estimates for tablefuncs tend to be, there's not a lot of point in that + * refinement right now. + */ + cost_qual_eval_node(&exprcost, (Node *) rte->tablefunc, root); + + startup_cost += exprcost.startup + exprcost.per_tuple; + + /* Add scanning CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_valuesscan + * Determines and returns the cost of scanning a VALUES RTE. + * + * 'baserel' is the relation to be scanned + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + */ +void +cost_valuesscan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + QualCost qpqual_cost; + Cost cpu_per_tuple; + + /* Should only be applied to base relations that are values lists */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_VALUES); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* + * For now, estimate list evaluation cost at one operator eval per list + * (probably pretty bogus, but is it worth being smarter?) + */ + cpu_per_tuple = cpu_operator_cost; + + /* Add scanning CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_ctescan + * Determines and returns the cost of scanning a CTE RTE. + * + * Note: this is used for both self-reference and regular CTEs; the + * possible cost differences are below the threshold of what we could + * estimate accurately anyway. Note that the costs of evaluating the + * referenced CTE query are added into the final plan as initplan costs, + * and should NOT be counted here. + */ +void +cost_ctescan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + QualCost qpqual_cost; + Cost cpu_per_tuple; + + /* Should only be applied to base relations that are CTEs */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_CTE); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* Charge one CPU tuple cost per row for tuplestore manipulation */ + cpu_per_tuple = cpu_tuple_cost; + + /* Add scanning CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_namedtuplestorescan + * Determines and returns the cost of scanning a named tuplestore. + */ +void +cost_namedtuplestorescan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + QualCost qpqual_cost; + Cost cpu_per_tuple; + + /* Should only be applied to base relations that are Tuplestores */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_NAMEDTUPLESTORE); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* Charge one CPU tuple cost per row for tuplestore manipulation */ + cpu_per_tuple = cpu_tuple_cost; + + /* Add scanning CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_resultscan + * Determines and returns the cost of scanning an RTE_RESULT relation. + */ +void +cost_resultscan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + QualCost qpqual_cost; + Cost cpu_per_tuple; + + /* Should only be applied to RTE_RESULT base relations */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_RESULT); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* We charge qual cost plus cpu_tuple_cost */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_recursive_union + * Determines and returns the cost of performing a recursive union, + * and also the estimated output size. + * + * We are given Paths for the nonrecursive and recursive terms. + */ +void +cost_recursive_union(Path *runion, Path *nrterm, Path *rterm) +{ + Cost startup_cost; + Cost total_cost; + double total_rows; + + /* We probably have decent estimates for the non-recursive term */ + startup_cost = nrterm->startup_cost; + total_cost = nrterm->total_cost; + total_rows = nrterm->rows; + + /* + * We arbitrarily assume that about 10 recursive iterations will be + * needed, and that we've managed to get a good fix on the cost and output + * size of each one of them. These are mighty shaky assumptions but it's + * hard to see how to do better. + */ + total_cost += 10 * rterm->total_cost; + total_rows += 10 * rterm->rows; + + /* + * Also charge cpu_tuple_cost per row to account for the costs of + * manipulating the tuplestores. (We don't worry about possible + * spill-to-disk costs.) + */ + total_cost += cpu_tuple_cost * total_rows; + + runion->startup_cost = startup_cost; + runion->total_cost = total_cost; + runion->rows = total_rows; + runion->pathtarget->width = Max(nrterm->pathtarget->width, + rterm->pathtarget->width); +} + +/* + * cost_tuplesort + * Determines and returns the cost of sorting a relation using tuplesort, + * not including the cost of reading the input data. + * + * If the total volume of data to sort is less than sort_mem, we will do + * an in-memory sort, which requires no I/O and about t*log2(t) tuple + * comparisons for t tuples. + * + * If the total volume exceeds sort_mem, we switch to a tape-style merge + * algorithm. There will still be about t*log2(t) tuple comparisons in + * total, but we will also need to write and read each tuple once per + * merge pass. We expect about ceil(logM(r)) merge passes where r is the + * number of initial runs formed and M is the merge order used by tuplesort.c. + * Since the average initial run should be about sort_mem, we have + * disk traffic = 2 * relsize * ceil(logM(p / sort_mem)) + * cpu = comparison_cost * t * log2(t) + * + * If the sort is bounded (i.e., only the first k result tuples are needed) + * and k tuples can fit into sort_mem, we use a heap method that keeps only + * k tuples in the heap; this will require about t*log2(k) tuple comparisons. + * + * The disk traffic is assumed to be 3/4ths sequential and 1/4th random + * accesses (XXX can't we refine that guess?) + * + * By default, we charge two operator evals per tuple comparison, which should + * be in the right ballpark in most cases. The caller can tweak this by + * specifying nonzero comparison_cost; typically that's used for any extra + * work that has to be done to prepare the inputs to the comparison operators. + * + * 'tuples' is the number of tuples in the relation + * 'width' is the average tuple width in bytes + * 'comparison_cost' is the extra cost per comparison, if any + * 'sort_mem' is the number of kilobytes of work memory allowed for the sort + * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound + */ +static void +cost_tuplesort(Cost *startup_cost, Cost *run_cost, + double tuples, int width, + Cost comparison_cost, int sort_mem, + double limit_tuples) +{ + double input_bytes = relation_byte_size(tuples, width); + double output_bytes; + double output_tuples; + long sort_mem_bytes = sort_mem * 1024L; + + /* + * We want to be sure the cost of a sort is never estimated as zero, even + * if passed-in tuple count is zero. Besides, mustn't do log(0)... + */ + if (tuples < 2.0) + tuples = 2.0; + + /* Include the default cost-per-comparison */ + comparison_cost += 2.0 * cpu_operator_cost; + + /* Do we have a useful LIMIT? */ + if (limit_tuples > 0 && limit_tuples < tuples) + { + output_tuples = limit_tuples; + output_bytes = relation_byte_size(output_tuples, width); + } + else + { + output_tuples = tuples; + output_bytes = input_bytes; + } + + if (output_bytes > sort_mem_bytes) + { + /* + * We'll have to use a disk-based sort of all the tuples + */ + double npages = ceil(input_bytes / BLCKSZ); + double nruns = input_bytes / sort_mem_bytes; + double mergeorder = tuplesort_merge_order(sort_mem_bytes); + double log_runs; + double npageaccesses; + + /* + * CPU costs + * + * Assume about N log2 N comparisons + */ + *startup_cost = comparison_cost * tuples * LOG2(tuples); + + /* Disk costs */ + + /* Compute logM(r) as log(r) / log(M) */ + if (nruns > mergeorder) + log_runs = ceil(log(nruns) / log(mergeorder)); + else + log_runs = 1.0; + npageaccesses = 2.0 * npages * log_runs; + /* Assume 3/4ths of accesses are sequential, 1/4th are not */ + *startup_cost += npageaccesses * + (seq_page_cost * 0.75 + random_page_cost * 0.25); + } + else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes) + { + /* + * We'll use a bounded heap-sort keeping just K tuples in memory, for + * a total number of tuple comparisons of N log2 K; but the constant + * factor is a bit higher than for quicksort. Tweak it so that the + * cost curve is continuous at the crossover point. + */ + *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples); + } + else + { + /* We'll use plain quicksort on all the input tuples */ + *startup_cost = comparison_cost * tuples * LOG2(tuples); + } + + /* + * Also charge a small amount (arbitrarily set equal to operator cost) per + * extracted tuple. We don't charge cpu_tuple_cost because a Sort node + * doesn't do qual-checking or projection, so it has less overhead than + * most plan nodes. Note it's correct to use tuples not output_tuples + * here --- the upper LIMIT will pro-rate the run cost so we'd be double + * counting the LIMIT otherwise. + */ + *run_cost = cpu_operator_cost * tuples; +} + +/* + * cost_incremental_sort + * Determines and returns the cost of sorting a relation incrementally, when + * the input path is presorted by a prefix of the pathkeys. + * + * 'presorted_keys' is the number of leading pathkeys by which the input path + * is sorted. + * + * We estimate the number of groups into which the relation is divided by the + * leading pathkeys, and then calculate the cost of sorting a single group + * with tuplesort using cost_tuplesort(). + */ +void +cost_incremental_sort(Path *path, + PlannerInfo *root, List *pathkeys, int presorted_keys, + Cost input_startup_cost, Cost input_total_cost, + double input_tuples, int width, Cost comparison_cost, int sort_mem, + double limit_tuples) +{ + Cost startup_cost = 0, + run_cost = 0, + input_run_cost = input_total_cost - input_startup_cost; + double group_tuples, + input_groups; + Cost group_startup_cost, + group_run_cost, + group_input_run_cost; + List *presortedExprs = NIL; + ListCell *l; + int i = 0; + bool unknown_varno = false; + + Assert(presorted_keys != 0); + + /* + * We want to be sure the cost of a sort is never estimated as zero, even + * if passed-in tuple count is zero. Besides, mustn't do log(0)... + */ + if (input_tuples < 2.0) + input_tuples = 2.0; + + /* Default estimate of number of groups, capped to one group per row. */ + input_groups = Min(input_tuples, DEFAULT_NUM_DISTINCT); + + /* + * Extract presorted keys as list of expressions. + * + * We need to be careful about Vars containing "varno 0" which might have + * been introduced by generate_append_tlist, which would confuse + * estimate_num_groups (in fact it'd fail for such expressions). See + * recurse_set_operations which has to deal with the same issue. + * + * Unlike recurse_set_operations we can't access the original target list + * here, and even if we could it's not very clear how useful would that be + * for a set operation combining multiple tables. So we simply detect if + * there are any expressions with "varno 0" and use the default + * DEFAULT_NUM_DISTINCT in that case. + * + * We might also use either 1.0 (a single group) or input_tuples (each row + * being a separate group), pretty much the worst and best case for + * incremental sort. But those are extreme cases and using something in + * between seems reasonable. Furthermore, generate_append_tlist is used + * for set operations, which are likely to produce mostly unique output + * anyway - from that standpoint the DEFAULT_NUM_DISTINCT is defensive + * while maintaining lower startup cost. + */ + foreach(l, pathkeys) + { + PathKey *key = (PathKey *) lfirst(l); + EquivalenceMember *member = (EquivalenceMember *) + linitial(key->pk_eclass->ec_members); + + /* + * Check if the expression contains Var with "varno 0" so that we + * don't call estimate_num_groups in that case. + */ + if (bms_is_member(0, pull_varnos(root, (Node *) member->em_expr))) + { + unknown_varno = true; + break; + } + + /* expression not containing any Vars with "varno 0" */ + presortedExprs = lappend(presortedExprs, member->em_expr); + + i++; + if (i >= presorted_keys) + break; + } + + /* Estimate number of groups with equal presorted keys. */ + if (!unknown_varno) + input_groups = estimate_num_groups(root, presortedExprs, input_tuples, + NULL, NULL); + + group_tuples = input_tuples / input_groups; + group_input_run_cost = input_run_cost / input_groups; + + /* + * Estimate average cost of sorting of one group where presorted keys are + * equal. Incremental sort is sensitive to distribution of tuples to the + * groups, where we're relying on quite rough assumptions. Thus, we're + * pessimistic about incremental sort performance and increase its average + * group size by half. + */ + cost_tuplesort(&group_startup_cost, &group_run_cost, + 1.5 * group_tuples, width, comparison_cost, sort_mem, + limit_tuples); + + /* + * Startup cost of incremental sort is the startup cost of its first group + * plus the cost of its input. + */ + startup_cost += group_startup_cost + + input_startup_cost + group_input_run_cost; + + /* + * After we started producing tuples from the first group, the cost of + * producing all the tuples is given by the cost to finish processing this + * group, plus the total cost to process the remaining groups, plus the + * remaining cost of input. + */ + run_cost += group_run_cost + + (group_run_cost + group_startup_cost) * (input_groups - 1) + + group_input_run_cost * (input_groups - 1); + + /* + * Incremental sort adds some overhead by itself. Firstly, it has to + * detect the sort groups. This is roughly equal to one extra copy and + * comparison per tuple. Secondly, it has to reset the tuplesort context + * for every group. + */ + run_cost += (cpu_tuple_cost + comparison_cost) * input_tuples; + run_cost += 2.0 * cpu_tuple_cost * input_groups; + + path->rows = input_tuples; + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_sort + * Determines and returns the cost of sorting a relation, including + * the cost of reading the input data. + * + * NOTE: some callers currently pass NIL for pathkeys because they + * can't conveniently supply the sort keys. Since this routine doesn't + * currently do anything with pathkeys anyway, that doesn't matter... + * but if it ever does, it should react gracefully to lack of key data. + * (Actually, the thing we'd most likely be interested in is just the number + * of sort keys, which all callers *could* supply.) + */ +void +cost_sort(Path *path, PlannerInfo *root, + List *pathkeys, Cost input_cost, double tuples, int width, + Cost comparison_cost, int sort_mem, + double limit_tuples) + +{ + Cost startup_cost; + Cost run_cost; + + cost_tuplesort(&startup_cost, &run_cost, + tuples, width, + comparison_cost, sort_mem, + limit_tuples); + + if (!enable_sort) + startup_cost += disable_cost; + + startup_cost += input_cost; + + path->rows = tuples; + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * append_nonpartial_cost + * Estimate the cost of the non-partial paths in a Parallel Append. + * The non-partial paths are assumed to be the first "numpaths" paths + * from the subpaths list, and to be in order of decreasing cost. + */ +static Cost +append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers) +{ + Cost *costarr; + int arrlen; + ListCell *l; + ListCell *cell; + int i; + int path_index; + int min_index; + int max_index; + + if (numpaths == 0) + return 0; + + /* + * Array length is number of workers or number of relevant paths, + * whichever is less. + */ + arrlen = Min(parallel_workers, numpaths); + costarr = (Cost *) palloc(sizeof(Cost) * arrlen); + + /* The first few paths will each be claimed by a different worker. */ + path_index = 0; + foreach(cell, subpaths) + { + Path *subpath = (Path *) lfirst(cell); + + if (path_index == arrlen) + break; + costarr[path_index++] = subpath->total_cost; + } + + /* + * Since subpaths are sorted by decreasing cost, the last one will have + * the minimum cost. + */ + min_index = arrlen - 1; + + /* + * For each of the remaining subpaths, add its cost to the array element + * with minimum cost. + */ + for_each_cell(l, subpaths, cell) + { + Path *subpath = (Path *) lfirst(l); + int i; + + /* Consider only the non-partial paths */ + if (path_index++ == numpaths) + break; + + costarr[min_index] += subpath->total_cost; + + /* Update the new min cost array index */ + for (min_index = i = 0; i < arrlen; i++) + { + if (costarr[i] < costarr[min_index]) + min_index = i; + } + } + + /* Return the highest cost from the array */ + for (max_index = i = 0; i < arrlen; i++) + { + if (costarr[i] > costarr[max_index]) + max_index = i; + } + + return costarr[max_index]; +} + +/* + * cost_append + * Determines and returns the cost of an Append node. + */ +void +cost_append(AppendPath *apath) +{ + ListCell *l; + + apath->path.startup_cost = 0; + apath->path.total_cost = 0; + apath->path.rows = 0; + + if (apath->subpaths == NIL) + return; + + if (!apath->path.parallel_aware) + { + List *pathkeys = apath->path.pathkeys; + + if (pathkeys == NIL) + { + Path *subpath = (Path *) linitial(apath->subpaths); + + /* + * For an unordered, non-parallel-aware Append we take the startup + * cost as the startup cost of the first subpath. + */ + apath->path.startup_cost = subpath->startup_cost; + + /* Compute rows and costs as sums of subplan rows and costs. */ + foreach(l, apath->subpaths) + { + Path *subpath = (Path *) lfirst(l); + + apath->path.rows += subpath->rows; + apath->path.total_cost += subpath->total_cost; + } + } + else + { + /* + * For an ordered, non-parallel-aware Append we take the startup + * cost as the sum of the subpath startup costs. This ensures + * that we don't underestimate the startup cost when a query's + * LIMIT is such that several of the children have to be run to + * satisfy it. This might be overkill --- another plausible hack + * would be to take the Append's startup cost as the maximum of + * the child startup costs. But we don't want to risk believing + * that an ORDER BY LIMIT query can be satisfied at small cost + * when the first child has small startup cost but later ones + * don't. (If we had the ability to deal with nonlinear cost + * interpolation for partial retrievals, we would not need to be + * so conservative about this.) + * + * This case is also different from the above in that we have to + * account for possibly injecting sorts into subpaths that aren't + * natively ordered. + */ + foreach(l, apath->subpaths) + { + Path *subpath = (Path *) lfirst(l); + Path sort_path; /* dummy for result of cost_sort */ + + if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + /* + * We'll need to insert a Sort node, so include costs for + * that. We can use the parent's LIMIT if any, since we + * certainly won't pull more than that many tuples from + * any child. + */ + cost_sort(&sort_path, + NULL, /* doesn't currently need root */ + pathkeys, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + apath->limit_tuples); + subpath = &sort_path; + } + + apath->path.rows += subpath->rows; + apath->path.startup_cost += subpath->startup_cost; + apath->path.total_cost += subpath->total_cost; + } + } + } + else /* parallel-aware */ + { + int i = 0; + double parallel_divisor = get_parallel_divisor(&apath->path); + + /* Parallel-aware Append never produces ordered output. */ + Assert(apath->path.pathkeys == NIL); + + /* Calculate startup cost. */ + foreach(l, apath->subpaths) + { + Path *subpath = (Path *) lfirst(l); + + /* + * Append will start returning tuples when the child node having + * lowest startup cost is done setting up. We consider only the + * first few subplans that immediately get a worker assigned. + */ + if (i == 0) + apath->path.startup_cost = subpath->startup_cost; + else if (i < apath->path.parallel_workers) + apath->path.startup_cost = Min(apath->path.startup_cost, + subpath->startup_cost); + + /* + * Apply parallel divisor to subpaths. Scale the number of rows + * for each partial subpath based on the ratio of the parallel + * divisor originally used for the subpath to the one we adopted. + * Also add the cost of partial paths to the total cost, but + * ignore non-partial paths for now. + */ + if (i < apath->first_partial_path) + apath->path.rows += subpath->rows / parallel_divisor; + else + { + double subpath_parallel_divisor; + + subpath_parallel_divisor = get_parallel_divisor(subpath); + apath->path.rows += subpath->rows * (subpath_parallel_divisor / + parallel_divisor); + apath->path.total_cost += subpath->total_cost; + } + + apath->path.rows = clamp_row_est(apath->path.rows); + + i++; + } + + /* Add cost for non-partial subpaths. */ + apath->path.total_cost += + append_nonpartial_cost(apath->subpaths, + apath->first_partial_path, + apath->path.parallel_workers); + } + + /* + * Although Append does not do any selection or projection, it's not free; + * add a small per-tuple overhead. + */ + apath->path.total_cost += + cpu_tuple_cost * APPEND_CPU_COST_MULTIPLIER * apath->path.rows; +} + +/* + * cost_merge_append + * Determines and returns the cost of a MergeAppend node. + * + * MergeAppend merges several pre-sorted input streams, using a heap that + * at any given instant holds the next tuple from each stream. If there + * are N streams, we need about N*log2(N) tuple comparisons to construct + * the heap at startup, and then for each output tuple, about log2(N) + * comparisons to replace the top entry. + * + * (The effective value of N will drop once some of the input streams are + * exhausted, but it seems unlikely to be worth trying to account for that.) + * + * The heap is never spilled to disk, since we assume N is not very large. + * So this is much simpler than cost_sort. + * + * As in cost_sort, we charge two operator evals per tuple comparison. + * + * 'pathkeys' is a list of sort keys + * 'n_streams' is the number of input streams + * 'input_startup_cost' is the sum of the input streams' startup costs + * 'input_total_cost' is the sum of the input streams' total costs + * 'tuples' is the number of tuples in all the streams + */ +void +cost_merge_append(Path *path, PlannerInfo *root, + List *pathkeys, int n_streams, + Cost input_startup_cost, Cost input_total_cost, + double tuples) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + Cost comparison_cost; + double N; + double logN; + + /* + * Avoid log(0)... + */ + N = (n_streams < 2) ? 2.0 : (double) n_streams; + logN = LOG2(N); + + /* Assumed cost per tuple comparison */ + comparison_cost = 2.0 * cpu_operator_cost; + + /* Heap creation cost */ + startup_cost += comparison_cost * N * logN; + + /* Per-tuple heap maintenance cost */ + run_cost += tuples * comparison_cost * logN; + + /* + * Although MergeAppend does not do any selection or projection, it's not + * free; add a small per-tuple overhead. + */ + run_cost += cpu_tuple_cost * APPEND_CPU_COST_MULTIPLIER * tuples; + + path->startup_cost = startup_cost + input_startup_cost; + path->total_cost = startup_cost + run_cost + input_total_cost; +} + +/* + * cost_material + * Determines and returns the cost of materializing a relation, including + * the cost of reading the input data. + * + * If the total volume of data to materialize exceeds work_mem, we will need + * to write it to disk, so the cost is much higher in that case. + * + * Note that here we are estimating the costs for the first scan of the + * relation, so the materialization is all overhead --- any savings will + * occur only on rescan, which is estimated in cost_rescan. + */ +void +cost_material(Path *path, + Cost input_startup_cost, Cost input_total_cost, + double tuples, int width) +{ + Cost startup_cost = input_startup_cost; + Cost run_cost = input_total_cost - input_startup_cost; + double nbytes = relation_byte_size(tuples, width); + long work_mem_bytes = work_mem * 1024L; + + path->rows = tuples; + + /* + * Whether spilling or not, charge 2x cpu_operator_cost per tuple to + * reflect bookkeeping overhead. (This rate must be more than what + * cost_rescan charges for materialize, ie, cpu_operator_cost per tuple; + * if it is exactly the same then there will be a cost tie between + * nestloop with A outer, materialized B inner and nestloop with B outer, + * materialized A inner. The extra cost ensures we'll prefer + * materializing the smaller rel.) Note that this is normally a good deal + * less than cpu_tuple_cost; which is OK because a Material plan node + * doesn't do qual-checking or projection, so it's got less overhead than + * most plan nodes. + */ + run_cost += 2 * cpu_operator_cost * tuples; + + /* + * If we will spill to disk, charge at the rate of seq_page_cost per page. + * This cost is assumed to be evenly spread through the plan run phase, + * which isn't exactly accurate but our cost model doesn't allow for + * nonuniform costs within the run phase. + */ + if (nbytes > work_mem_bytes) + { + double npages = ceil(nbytes / BLCKSZ); + + run_cost += seq_page_cost * npages; + } + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* + * cost_memoize_rescan + * Determines the estimated cost of rescanning a Memoize node. + * + * In order to estimate this, we must gain knowledge of how often we expect to + * be called and how many distinct sets of parameters we are likely to be + * called with. If we expect a good cache hit ratio, then we can set our + * costs to account for that hit ratio, plus a little bit of cost for the + * caching itself. Caching will not work out well if we expect to be called + * with too many distinct parameter values. The worst-case here is that we + * never see any parameter value twice, in which case we'd never get a cache + * hit and caching would be a complete waste of effort. + */ +static void +cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, + Cost *rescan_startup_cost, Cost *rescan_total_cost) +{ + EstimationInfo estinfo; + Cost input_startup_cost = mpath->subpath->startup_cost; + Cost input_total_cost = mpath->subpath->total_cost; + double tuples = mpath->subpath->rows; + double calls = mpath->calls; + int width = mpath->subpath->pathtarget->width; + + double hash_mem_bytes; + double est_entry_bytes; + double est_cache_entries; + double ndistinct; + double evict_ratio; + double hit_ratio; + Cost startup_cost; + Cost total_cost; + + /* available cache space */ + hash_mem_bytes = get_hash_memory_limit(); + + /* + * Set the number of bytes each cache entry should consume in the cache. + * To provide us with better estimations on how many cache entries we can + * store at once, we make a call to the executor here to ask it what + * memory overheads there are for a single cache entry. + * + * XXX we also store the cache key, but that's not accounted for here. + */ + est_entry_bytes = relation_byte_size(tuples, width) + + ExecEstimateCacheEntryOverheadBytes(tuples); + + /* estimate on the upper limit of cache entries we can hold at once */ + est_cache_entries = floor(hash_mem_bytes / est_entry_bytes); + + /* estimate on the distinct number of parameter values */ + ndistinct = estimate_num_groups(root, mpath->param_exprs, calls, NULL, + &estinfo); + + /* + * When the estimation fell back on using a default value, it's a bit too + * risky to assume that it's ok to use a Memoize node. The use of a + * default could cause us to use a Memoize node when it's really + * inappropriate to do so. If we see that this has been done, then we'll + * assume that every call will have unique parameters, which will almost + * certainly mean a MemoizePath will never survive add_path(). + */ + if ((estinfo.flags & SELFLAG_USED_DEFAULT) != 0) + ndistinct = calls; + + /* + * Since we've already estimated the maximum number of entries we can + * store at once and know the estimated number of distinct values we'll be + * called with, we'll take this opportunity to set the path's est_entries. + * This will ultimately determine the hash table size that the executor + * will use. If we leave this at zero, the executor will just choose the + * size itself. Really this is not the right place to do this, but it's + * convenient since everything is already calculated. + */ + mpath->est_entries = Min(Min(ndistinct, est_cache_entries), + PG_UINT32_MAX); + + /* + * When the number of distinct parameter values is above the amount we can + * store in the cache, then we'll have to evict some entries from the + * cache. This is not free. Here we estimate how often we'll incur the + * cost of that eviction. + */ + evict_ratio = 1.0 - Min(est_cache_entries, ndistinct) / ndistinct; + + /* + * In order to estimate how costly a single scan will be, we need to + * attempt to estimate what the cache hit ratio will be. To do that we + * must look at how many scans are estimated in total for this node and + * how many of those scans we expect to get a cache hit. + */ + hit_ratio = 1.0 / ndistinct * Min(est_cache_entries, ndistinct) - + (ndistinct / calls); + + /* Ensure we don't go negative */ + hit_ratio = Max(hit_ratio, 0.0); + + /* + * Set the total_cost accounting for the expected cache hit ratio. We + * also add on a cpu_operator_cost to account for a cache lookup. This + * will happen regardless of whether it's a cache hit or not. + */ + total_cost = input_total_cost * (1.0 - hit_ratio) + cpu_operator_cost; + + /* Now adjust the total cost to account for cache evictions */ + + /* Charge a cpu_tuple_cost for evicting the actual cache entry */ + total_cost += cpu_tuple_cost * evict_ratio; + + /* + * Charge a 10th of cpu_operator_cost to evict every tuple in that entry. + * The per-tuple eviction is really just a pfree, so charging a whole + * cpu_operator_cost seems a little excessive. + */ + total_cost += cpu_operator_cost / 10.0 * evict_ratio * tuples; + + /* + * Now adjust for storing things in the cache, since that's not free + * either. Everything must go in the cache. We don't proportion this + * over any ratio, just apply it once for the scan. We charge a + * cpu_tuple_cost for the creation of the cache entry and also a + * cpu_operator_cost for each tuple we expect to cache. + */ + total_cost += cpu_tuple_cost + cpu_operator_cost * tuples; + + /* + * Getting the first row must be also be proportioned according to the + * expected cache hit ratio. + */ + startup_cost = input_startup_cost * (1.0 - hit_ratio); + + /* + * Additionally we charge a cpu_tuple_cost to account for cache lookups, + * which we'll do regardless of whether it was a cache hit or not. + */ + startup_cost += cpu_tuple_cost; + + *rescan_startup_cost = startup_cost; + *rescan_total_cost = total_cost; +} + +/* + * cost_agg + * Determines and returns the cost of performing an Agg plan node, + * including the cost of its input. + * + * aggcosts can be NULL when there are no actual aggregate functions (i.e., + * we are using a hashed Agg node just to do grouping). + * + * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs + * are for appropriately-sorted input. + */ +void +cost_agg(Path *path, PlannerInfo *root, + AggStrategy aggstrategy, const AggClauseCosts *aggcosts, + int numGroupCols, double numGroups, + List *quals, + Cost input_startup_cost, Cost input_total_cost, + double input_tuples, double input_width) +{ + double output_tuples; + Cost startup_cost; + Cost total_cost; + AggClauseCosts dummy_aggcosts; + + /* Use all-zero per-aggregate costs if NULL is passed */ + if (aggcosts == NULL) + { + Assert(aggstrategy == AGG_HASHED); + MemSet(&dummy_aggcosts, 0, sizeof(AggClauseCosts)); + aggcosts = &dummy_aggcosts; + } + + /* + * The transCost.per_tuple component of aggcosts should be charged once + * per input tuple, corresponding to the costs of evaluating the aggregate + * transfns and their input expressions. The finalCost.per_tuple component + * is charged once per output tuple, corresponding to the costs of + * evaluating the finalfns. Startup costs are of course charged but once. + * + * If we are grouping, we charge an additional cpu_operator_cost per + * grouping column per input tuple for grouping comparisons. + * + * We will produce a single output tuple if not grouping, and a tuple per + * group otherwise. We charge cpu_tuple_cost for each output tuple. + * + * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the + * same total CPU cost, but AGG_SORTED has lower startup cost. If the + * input path is already sorted appropriately, AGG_SORTED should be + * preferred (since it has no risk of memory overflow). This will happen + * as long as the computed total costs are indeed exactly equal --- but if + * there's roundoff error we might do the wrong thing. So be sure that + * the computations below form the same intermediate values in the same + * order. + */ + if (aggstrategy == AGG_PLAIN) + { + startup_cost = input_total_cost; + startup_cost += aggcosts->transCost.startup; + startup_cost += aggcosts->transCost.per_tuple * input_tuples; + startup_cost += aggcosts->finalCost.startup; + startup_cost += aggcosts->finalCost.per_tuple; + /* we aren't grouping */ + total_cost = startup_cost + cpu_tuple_cost; + output_tuples = 1; + } + else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED) + { + /* Here we are able to deliver output on-the-fly */ + startup_cost = input_startup_cost; + total_cost = input_total_cost; + if (aggstrategy == AGG_MIXED && !enable_hashagg) + { + startup_cost += disable_cost; + total_cost += disable_cost; + } + /* calcs phrased this way to match HASHED case, see note above */ + total_cost += aggcosts->transCost.startup; + total_cost += aggcosts->transCost.per_tuple * input_tuples; + total_cost += (cpu_operator_cost * numGroupCols) * input_tuples; + total_cost += aggcosts->finalCost.startup; + total_cost += aggcosts->finalCost.per_tuple * numGroups; + total_cost += cpu_tuple_cost * numGroups; + output_tuples = numGroups; + } + else + { + /* must be AGG_HASHED */ + startup_cost = input_total_cost; + if (!enable_hashagg) + startup_cost += disable_cost; + startup_cost += aggcosts->transCost.startup; + startup_cost += aggcosts->transCost.per_tuple * input_tuples; + /* cost of computing hash value */ + startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples; + startup_cost += aggcosts->finalCost.startup; + + total_cost = startup_cost; + total_cost += aggcosts->finalCost.per_tuple * numGroups; + /* cost of retrieving from hash table */ + total_cost += cpu_tuple_cost * numGroups; + output_tuples = numGroups; + } + + /* + * Add the disk costs of hash aggregation that spills to disk. + * + * Groups that go into the hash table stay in memory until finalized, so + * spilling and reprocessing tuples doesn't incur additional invocations + * of transCost or finalCost. Furthermore, the computed hash value is + * stored with the spilled tuples, so we don't incur extra invocations of + * the hash function. + * + * Hash Agg begins returning tuples after the first batch is complete. + * Accrue writes (spilled tuples) to startup_cost and to total_cost; + * accrue reads only to total_cost. + */ + if (aggstrategy == AGG_HASHED || aggstrategy == AGG_MIXED) + { + double pages; + double pages_written = 0.0; + double pages_read = 0.0; + double spill_cost; + double hashentrysize; + double nbatches; + Size mem_limit; + uint64 ngroups_limit; + int num_partitions; + int depth; + + /* + * Estimate number of batches based on the computed limits. If less + * than or equal to one, all groups are expected to fit in memory; + * otherwise we expect to spill. + */ + hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos), + input_width, + aggcosts->transitionSpace); + hash_agg_set_limits(hashentrysize, numGroups, 0, &mem_limit, + &ngroups_limit, &num_partitions); + + nbatches = Max((numGroups * hashentrysize) / mem_limit, + numGroups / ngroups_limit); + + nbatches = Max(ceil(nbatches), 1.0); + num_partitions = Max(num_partitions, 2); + + /* + * The number of partitions can change at different levels of + * recursion; but for the purposes of this calculation assume it stays + * constant. + */ + depth = ceil(log(nbatches) / log(num_partitions)); + + /* + * Estimate number of pages read and written. For each level of + * recursion, a tuple must be written and then later read. + */ + pages = relation_byte_size(input_tuples, input_width) / BLCKSZ; + pages_written = pages_read = pages * depth; + + /* + * HashAgg has somewhat worse IO behavior than Sort on typical + * hardware/OS combinations. Account for this with a generic penalty. + */ + pages_read *= 2.0; + pages_written *= 2.0; + + startup_cost += pages_written * random_page_cost; + total_cost += pages_written * random_page_cost; + total_cost += pages_read * seq_page_cost; + + /* account for CPU cost of spilling a tuple and reading it back */ + spill_cost = depth * input_tuples * 2.0 * cpu_tuple_cost; + startup_cost += spill_cost; + total_cost += spill_cost; + } + + /* + * If there are quals (HAVING quals), account for their cost and + * selectivity. + */ + if (quals) + { + QualCost qual_cost; + + cost_qual_eval(&qual_cost, quals, root); + startup_cost += qual_cost.startup; + total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple; + + output_tuples = clamp_row_est(output_tuples * + clauselist_selectivity(root, + quals, + 0, + JOIN_INNER, + NULL)); + } + + path->rows = output_tuples; + path->startup_cost = startup_cost; + path->total_cost = total_cost; +} + +/* + * cost_windowagg + * Determines and returns the cost of performing a WindowAgg plan node, + * including the cost of its input. + * + * Input is assumed already properly sorted. + */ +void +cost_windowagg(Path *path, PlannerInfo *root, + List *windowFuncs, int numPartCols, int numOrderCols, + Cost input_startup_cost, Cost input_total_cost, + double input_tuples) +{ + Cost startup_cost; + Cost total_cost; + ListCell *lc; + + startup_cost = input_startup_cost; + total_cost = input_total_cost; + + /* + * Window functions are assumed to cost their stated execution cost, plus + * the cost of evaluating their input expressions, per tuple. Since they + * may in fact evaluate their inputs at multiple rows during each cycle, + * this could be a drastic underestimate; but without a way to know how + * many rows the window function will fetch, it's hard to do better. In + * any case, it's a good estimate for all the built-in window functions, + * so we'll just do this for now. + */ + foreach(lc, windowFuncs) + { + WindowFunc *wfunc = lfirst_node(WindowFunc, lc); + Cost wfunccost; + QualCost argcosts; + + argcosts.startup = argcosts.per_tuple = 0; + add_function_cost(root, wfunc->winfnoid, (Node *) wfunc, + &argcosts); + startup_cost += argcosts.startup; + wfunccost = argcosts.per_tuple; + + /* also add the input expressions' cost to per-input-row costs */ + cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root); + startup_cost += argcosts.startup; + wfunccost += argcosts.per_tuple; + + /* + * Add the filter's cost to per-input-row costs. XXX We should reduce + * input expression costs according to filter selectivity. + */ + cost_qual_eval_node(&argcosts, (Node *) wfunc->aggfilter, root); + startup_cost += argcosts.startup; + wfunccost += argcosts.per_tuple; + + total_cost += wfunccost * input_tuples; + } + + /* + * We also charge cpu_operator_cost per grouping column per tuple for + * grouping comparisons, plus cpu_tuple_cost per tuple for general + * overhead. + * + * XXX this neglects costs of spooling the data to disk when it overflows + * work_mem. Sooner or later that should get accounted for. + */ + total_cost += cpu_operator_cost * (numPartCols + numOrderCols) * input_tuples; + total_cost += cpu_tuple_cost * input_tuples; + + path->rows = input_tuples; + path->startup_cost = startup_cost; + path->total_cost = total_cost; +} + +/* + * cost_group + * Determines and returns the cost of performing a Group plan node, + * including the cost of its input. + * + * Note: caller must ensure that input costs are for appropriately-sorted + * input. + */ +void +cost_group(Path *path, PlannerInfo *root, + int numGroupCols, double numGroups, + List *quals, + Cost input_startup_cost, Cost input_total_cost, + double input_tuples) +{ + double output_tuples; + Cost startup_cost; + Cost total_cost; + + output_tuples = numGroups; + startup_cost = input_startup_cost; + total_cost = input_total_cost; + + /* + * Charge one cpu_operator_cost per comparison per input tuple. We assume + * all columns get compared at most of the tuples. + */ + total_cost += cpu_operator_cost * input_tuples * numGroupCols; + + /* + * If there are quals (HAVING quals), account for their cost and + * selectivity. + */ + if (quals) + { + QualCost qual_cost; + + cost_qual_eval(&qual_cost, quals, root); + startup_cost += qual_cost.startup; + total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple; + + output_tuples = clamp_row_est(output_tuples * + clauselist_selectivity(root, + quals, + 0, + JOIN_INNER, + NULL)); + } + + path->rows = output_tuples; + path->startup_cost = startup_cost; + path->total_cost = total_cost; +} + +/* + * initial_cost_nestloop + * Preliminary estimate of the cost of a nestloop join path. + * + * This must quickly produce lower-bound estimates of the path's startup and + * total costs. If we are unable to eliminate the proposed path from + * consideration using the lower bounds, final_cost_nestloop will be called + * to obtain the final estimates. + * + * The exact division of labor between this function and final_cost_nestloop + * is private to them, and represents a tradeoff between speed of the initial + * estimate and getting a tight lower bound. We choose to not examine the + * join quals here, since that's by far the most expensive part of the + * calculations. The end result is that CPU-cost considerations must be + * left for the second phase; and for SEMI/ANTI joins, we must also postpone + * incorporation of the inner path's run cost. + * + * 'workspace' is to be filled with startup_cost, total_cost, and perhaps + * other data to be used by final_cost_nestloop + * 'jointype' is the type of join to be performed + * 'outer_path' is the outer input to the join + * 'inner_path' is the inner input to the join + * 'extra' contains miscellaneous information about the join + */ +void +initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, + JoinType jointype, + Path *outer_path, Path *inner_path, + JoinPathExtraData *extra) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + double outer_path_rows = outer_path->rows; + Cost inner_rescan_start_cost; + Cost inner_rescan_total_cost; + Cost inner_run_cost; + Cost inner_rescan_run_cost; + + /* estimate costs to rescan the inner relation */ + cost_rescan(root, inner_path, + &inner_rescan_start_cost, + &inner_rescan_total_cost); + + /* cost of source data */ + + /* + * NOTE: clearly, we must pay both outer and inner paths' startup_cost + * before we can start returning tuples, so the join's startup cost is + * their sum. We'll also pay the inner path's rescan startup cost + * multiple times. + */ + startup_cost += outer_path->startup_cost + inner_path->startup_cost; + run_cost += outer_path->total_cost - outer_path->startup_cost; + if (outer_path_rows > 1) + run_cost += (outer_path_rows - 1) * inner_rescan_start_cost; + + inner_run_cost = inner_path->total_cost - inner_path->startup_cost; + inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost; + + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || + extra->inner_unique) + { + /* + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop after the first match. + * + * Getting decent estimates requires inspection of the join quals, + * which we choose to postpone to final_cost_nestloop. + */ + + /* Save private data for final_cost_nestloop */ + workspace->inner_run_cost = inner_run_cost; + workspace->inner_rescan_run_cost = inner_rescan_run_cost; + } + else + { + /* Normal case; we'll scan whole input rel for each outer row */ + run_cost += inner_run_cost; + if (outer_path_rows > 1) + run_cost += (outer_path_rows - 1) * inner_rescan_run_cost; + } + + /* CPU costs left for later */ + + /* Public result fields */ + workspace->startup_cost = startup_cost; + workspace->total_cost = startup_cost + run_cost; + /* Save private data for final_cost_nestloop */ + workspace->run_cost = run_cost; +} + +/* + * final_cost_nestloop + * Final estimate of the cost and result size of a nestloop join path. + * + * 'path' is already filled in except for the rows and cost fields + * 'workspace' is the result from initial_cost_nestloop + * 'extra' contains miscellaneous information about the join + */ +void +final_cost_nestloop(PlannerInfo *root, NestPath *path, + JoinCostWorkspace *workspace, + JoinPathExtraData *extra) +{ + Path *outer_path = path->outerjoinpath; + Path *inner_path = path->innerjoinpath; + double outer_path_rows = outer_path->rows; + double inner_path_rows = inner_path->rows; + Cost startup_cost = workspace->startup_cost; + Cost run_cost = workspace->run_cost; + Cost cpu_per_tuple; + QualCost restrict_qual_cost; + double ntuples; + + /* Protect some assumptions below that rowcounts aren't zero */ + if (outer_path_rows <= 0) + outer_path_rows = 1; + if (inner_path_rows <= 0) + inner_path_rows = 1; + /* Mark the path with the correct row estimate */ + if (path->path.param_info) + path->path.rows = path->path.param_info->ppi_rows; + else + path->path.rows = path->path.parent->rows; + + /* For partial paths, scale row estimate. */ + if (path->path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->path); + + path->path.rows = + clamp_row_est(path->path.rows / parallel_divisor); + } + + /* + * We could include disable_cost in the preliminary estimate, but that + * would amount to optimizing for the case where the join method is + * disabled, which doesn't seem like the way to bet. + */ + if (!enable_nestloop) + startup_cost += disable_cost; + + /* cost of inner-relation source data (we already dealt with outer rel) */ + + if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI || + extra->inner_unique) + { + /* + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop after the first match. + */ + Cost inner_run_cost = workspace->inner_run_cost; + Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost; + double outer_matched_rows; + double outer_unmatched_rows; + Selectivity inner_scan_frac; + + /* + * For an outer-rel row that has at least one match, we can expect the + * inner scan to stop after a fraction 1/(match_count+1) of the inner + * rows, if the matches are evenly distributed. Since they probably + * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to + * that fraction. (If we used a larger fuzz factor, we'd have to + * clamp inner_scan_frac to at most 1.0; but since match_count is at + * least 1, no such clamp is needed now.) + */ + outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac); + outer_unmatched_rows = outer_path_rows - outer_matched_rows; + inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0); + + /* + * Compute number of tuples processed (not number emitted!). First, + * account for successfully-matched outer rows. + */ + ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac; + + /* + * Now we need to estimate the actual costs of scanning the inner + * relation, which may be quite a bit less than N times inner_run_cost + * due to early scan stops. We consider two cases. If the inner path + * is an indexscan using all the joinquals as indexquals, then an + * unmatched outer row results in an indexscan returning no rows, + * which is probably quite cheap. Otherwise, the executor will have + * to scan the whole inner rel for an unmatched row; not so cheap. + */ + if (has_indexed_join_quals(path)) + { + /* + * Successfully-matched outer rows will only require scanning + * inner_scan_frac of the inner relation. In this case, we don't + * need to charge the full inner_run_cost even when that's more + * than inner_rescan_run_cost, because we can assume that none of + * the inner scans ever scan the whole inner relation. So it's + * okay to assume that all the inner scan executions can be + * fractions of the full cost, even if materialization is reducing + * the rescan cost. At this writing, it's impossible to get here + * for a materialized inner scan, so inner_run_cost and + * inner_rescan_run_cost will be the same anyway; but just in + * case, use inner_run_cost for the first matched tuple and + * inner_rescan_run_cost for additional ones. + */ + run_cost += inner_run_cost * inner_scan_frac; + if (outer_matched_rows > 1) + run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac; + + /* + * Add the cost of inner-scan executions for unmatched outer rows. + * We estimate this as the same cost as returning the first tuple + * of a nonempty scan. We consider that these are all rescans, + * since we used inner_run_cost once already. + */ + run_cost += outer_unmatched_rows * + inner_rescan_run_cost / inner_path_rows; + + /* + * We won't be evaluating any quals at all for unmatched rows, so + * don't add them to ntuples. + */ + } + else + { + /* + * Here, a complicating factor is that rescans may be cheaper than + * first scans. If we never scan all the way to the end of the + * inner rel, it might be (depending on the plan type) that we'd + * never pay the whole inner first-scan run cost. However it is + * difficult to estimate whether that will happen (and it could + * not happen if there are any unmatched outer rows!), so be + * conservative and always charge the whole first-scan cost once. + * We consider this charge to correspond to the first unmatched + * outer row, unless there isn't one in our estimate, in which + * case blame it on the first matched row. + */ + + /* First, count all unmatched join tuples as being processed */ + ntuples += outer_unmatched_rows * inner_path_rows; + + /* Now add the forced full scan, and decrement appropriate count */ + run_cost += inner_run_cost; + if (outer_unmatched_rows >= 1) + outer_unmatched_rows -= 1; + else + outer_matched_rows -= 1; + + /* Add inner run cost for additional outer tuples having matches */ + if (outer_matched_rows > 0) + run_cost += outer_matched_rows * inner_rescan_run_cost * inner_scan_frac; + + /* Add inner run cost for additional unmatched outer tuples */ + if (outer_unmatched_rows > 0) + run_cost += outer_unmatched_rows * inner_rescan_run_cost; + } + } + else + { + /* Normal-case source costs were included in preliminary estimate */ + + /* Compute number of tuples processed (not number emitted!) */ + ntuples = outer_path_rows * inner_path_rows; + } + + /* CPU costs */ + cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root); + startup_cost += restrict_qual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple; + run_cost += cpu_per_tuple * ntuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->path.pathtarget->cost.startup; + run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + + path->path.startup_cost = startup_cost; + path->path.total_cost = startup_cost + run_cost; +} + +/* + * initial_cost_mergejoin + * Preliminary estimate of the cost of a mergejoin path. + * + * This must quickly produce lower-bound estimates of the path's startup and + * total costs. If we are unable to eliminate the proposed path from + * consideration using the lower bounds, final_cost_mergejoin will be called + * to obtain the final estimates. + * + * The exact division of labor between this function and final_cost_mergejoin + * is private to them, and represents a tradeoff between speed of the initial + * estimate and getting a tight lower bound. We choose to not examine the + * join quals here, except for obtaining the scan selectivity estimate which + * is really essential (but fortunately, use of caching keeps the cost of + * getting that down to something reasonable). + * We also assume that cost_sort is cheap enough to use here. + * + * 'workspace' is to be filled with startup_cost, total_cost, and perhaps + * other data to be used by final_cost_mergejoin + * 'jointype' is the type of join to be performed + * 'mergeclauses' is the list of joinclauses to be used as merge clauses + * 'outer_path' is the outer input to the join + * 'inner_path' is the inner input to the join + * 'outersortkeys' is the list of sort keys for the outer path + * 'innersortkeys' is the list of sort keys for the inner path + * 'extra' contains miscellaneous information about the join + * + * Note: outersortkeys and innersortkeys should be NIL if no explicit + * sort is needed because the respective source path is already ordered. + */ +void +initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, + JoinType jointype, + List *mergeclauses, + Path *outer_path, Path *inner_path, + List *outersortkeys, List *innersortkeys, + JoinPathExtraData *extra) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + double outer_path_rows = outer_path->rows; + double inner_path_rows = inner_path->rows; + Cost inner_run_cost; + double outer_rows, + inner_rows, + outer_skip_rows, + inner_skip_rows; + Selectivity outerstartsel, + outerendsel, + innerstartsel, + innerendsel; + Path sort_path; /* dummy for result of cost_sort */ + + /* Protect some assumptions below that rowcounts aren't zero */ + if (outer_path_rows <= 0) + outer_path_rows = 1; + if (inner_path_rows <= 0) + inner_path_rows = 1; + + /* + * A merge join will stop as soon as it exhausts either input stream + * (unless it's an outer join, in which case the outer side has to be + * scanned all the way anyway). Estimate fraction of the left and right + * inputs that will actually need to be scanned. Likewise, we can + * estimate the number of rows that will be skipped before the first join + * pair is found, which should be factored into startup cost. We use only + * the first (most significant) merge clause for this purpose. Since + * mergejoinscansel() is a fairly expensive computation, we cache the + * results in the merge clause RestrictInfo. + */ + if (mergeclauses && jointype != JOIN_FULL) + { + RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses); + List *opathkeys; + List *ipathkeys; + PathKey *opathkey; + PathKey *ipathkey; + MergeScanSelCache *cache; + + /* Get the input pathkeys to determine the sort-order details */ + opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys; + ipathkeys = innersortkeys ? innersortkeys : inner_path->pathkeys; + Assert(opathkeys); + Assert(ipathkeys); + opathkey = (PathKey *) linitial(opathkeys); + ipathkey = (PathKey *) linitial(ipathkeys); + /* debugging check */ + if (opathkey->pk_opfamily != ipathkey->pk_opfamily || + opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation || + opathkey->pk_strategy != ipathkey->pk_strategy || + opathkey->pk_nulls_first != ipathkey->pk_nulls_first) + elog(ERROR, "left and right pathkeys do not match in mergejoin"); + + /* Get the selectivity with caching */ + cache = cached_scansel(root, firstclause, opathkey); + + if (bms_is_subset(firstclause->left_relids, + outer_path->parent->relids)) + { + /* left side of clause is outer */ + outerstartsel = cache->leftstartsel; + outerendsel = cache->leftendsel; + innerstartsel = cache->rightstartsel; + innerendsel = cache->rightendsel; + } + else + { + /* left side of clause is inner */ + outerstartsel = cache->rightstartsel; + outerendsel = cache->rightendsel; + innerstartsel = cache->leftstartsel; + innerendsel = cache->leftendsel; + } + if (jointype == JOIN_LEFT || + jointype == JOIN_ANTI) + { + outerstartsel = 0.0; + outerendsel = 1.0; + } + else if (jointype == JOIN_RIGHT) + { + innerstartsel = 0.0; + innerendsel = 1.0; + } + } + else + { + /* cope with clauseless or full mergejoin */ + outerstartsel = innerstartsel = 0.0; + outerendsel = innerendsel = 1.0; + } + + /* + * Convert selectivities to row counts. We force outer_rows and + * inner_rows to be at least 1, but the skip_rows estimates can be zero. + */ + outer_skip_rows = rint(outer_path_rows * outerstartsel); + inner_skip_rows = rint(inner_path_rows * innerstartsel); + outer_rows = clamp_row_est(outer_path_rows * outerendsel); + inner_rows = clamp_row_est(inner_path_rows * innerendsel); + + Assert(outer_skip_rows <= outer_rows); + Assert(inner_skip_rows <= inner_rows); + + /* + * Readjust scan selectivities to account for above rounding. This is + * normally an insignificant effect, but when there are only a few rows in + * the inputs, failing to do this makes for a large percentage error. + */ + outerstartsel = outer_skip_rows / outer_path_rows; + innerstartsel = inner_skip_rows / inner_path_rows; + outerendsel = outer_rows / outer_path_rows; + innerendsel = inner_rows / inner_path_rows; + + Assert(outerstartsel <= outerendsel); + Assert(innerstartsel <= innerendsel); + + /* cost of source data */ + + if (outersortkeys) /* do we need to sort outer? */ + { + cost_sort(&sort_path, + root, + outersortkeys, + outer_path->total_cost, + outer_path_rows, + outer_path->pathtarget->width, + 0.0, + work_mem, + -1.0); + startup_cost += sort_path.startup_cost; + startup_cost += (sort_path.total_cost - sort_path.startup_cost) + * outerstartsel; + run_cost += (sort_path.total_cost - sort_path.startup_cost) + * (outerendsel - outerstartsel); + } + else + { + startup_cost += outer_path->startup_cost; + startup_cost += (outer_path->total_cost - outer_path->startup_cost) + * outerstartsel; + run_cost += (outer_path->total_cost - outer_path->startup_cost) + * (outerendsel - outerstartsel); + } + + if (innersortkeys) /* do we need to sort inner? */ + { + cost_sort(&sort_path, + root, + innersortkeys, + inner_path->total_cost, + inner_path_rows, + inner_path->pathtarget->width, + 0.0, + work_mem, + -1.0); + startup_cost += sort_path.startup_cost; + startup_cost += (sort_path.total_cost - sort_path.startup_cost) + * innerstartsel; + inner_run_cost = (sort_path.total_cost - sort_path.startup_cost) + * (innerendsel - innerstartsel); + } + else + { + startup_cost += inner_path->startup_cost; + startup_cost += (inner_path->total_cost - inner_path->startup_cost) + * innerstartsel; + inner_run_cost = (inner_path->total_cost - inner_path->startup_cost) + * (innerendsel - innerstartsel); + } + + /* + * We can't yet determine whether rescanning occurs, or whether + * materialization of the inner input should be done. The minimum + * possible inner input cost, regardless of rescan and materialization + * considerations, is inner_run_cost. We include that in + * workspace->total_cost, but not yet in run_cost. + */ + + /* CPU costs left for later */ + + /* Public result fields */ + workspace->startup_cost = startup_cost; + workspace->total_cost = startup_cost + run_cost + inner_run_cost; + /* Save private data for final_cost_mergejoin */ + workspace->run_cost = run_cost; + workspace->inner_run_cost = inner_run_cost; + workspace->outer_rows = outer_rows; + workspace->inner_rows = inner_rows; + workspace->outer_skip_rows = outer_skip_rows; + workspace->inner_skip_rows = inner_skip_rows; +} + +/* + * final_cost_mergejoin + * Final estimate of the cost and result size of a mergejoin path. + * + * Unlike other costsize functions, this routine makes two actual decisions: + * whether the executor will need to do mark/restore, and whether we should + * materialize the inner path. It would be logically cleaner to build + * separate paths testing these alternatives, but that would require repeating + * most of the cost calculations, which are not all that cheap. Since the + * choice will not affect output pathkeys or startup cost, only total cost, + * there is no possibility of wanting to keep more than one path. So it seems + * best to make the decisions here and record them in the path's + * skip_mark_restore and materialize_inner fields. + * + * Mark/restore overhead is usually required, but can be skipped if we know + * that the executor need find only one match per outer tuple, and that the + * mergeclauses are sufficient to identify a match. + * + * We materialize the inner path if we need mark/restore and either the inner + * path can't support mark/restore, or it's cheaper to use an interposed + * Material node to handle mark/restore. + * + * 'path' is already filled in except for the rows and cost fields and + * skip_mark_restore and materialize_inner + * 'workspace' is the result from initial_cost_mergejoin + * 'extra' contains miscellaneous information about the join + */ +void +final_cost_mergejoin(PlannerInfo *root, MergePath *path, + JoinCostWorkspace *workspace, + JoinPathExtraData *extra) +{ + Path *outer_path = path->jpath.outerjoinpath; + Path *inner_path = path->jpath.innerjoinpath; + double inner_path_rows = inner_path->rows; + List *mergeclauses = path->path_mergeclauses; + List *innersortkeys = path->innersortkeys; + Cost startup_cost = workspace->startup_cost; + Cost run_cost = workspace->run_cost; + Cost inner_run_cost = workspace->inner_run_cost; + double outer_rows = workspace->outer_rows; + double inner_rows = workspace->inner_rows; + double outer_skip_rows = workspace->outer_skip_rows; + double inner_skip_rows = workspace->inner_skip_rows; + Cost cpu_per_tuple, + bare_inner_cost, + mat_inner_cost; + QualCost merge_qual_cost; + QualCost qp_qual_cost; + double mergejointuples, + rescannedtuples; + double rescanratio; + + /* Protect some assumptions below that rowcounts aren't zero */ + if (inner_path_rows <= 0) + inner_path_rows = 1; + + /* Mark the path with the correct row estimate */ + if (path->jpath.path.param_info) + path->jpath.path.rows = path->jpath.path.param_info->ppi_rows; + else + path->jpath.path.rows = path->jpath.path.parent->rows; + + /* For partial paths, scale row estimate. */ + if (path->jpath.path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->jpath.path); + + path->jpath.path.rows = + clamp_row_est(path->jpath.path.rows / parallel_divisor); + } + + /* + * We could include disable_cost in the preliminary estimate, but that + * would amount to optimizing for the case where the join method is + * disabled, which doesn't seem like the way to bet. + */ + if (!enable_mergejoin) + startup_cost += disable_cost; + + /* + * Compute cost of the mergequals and qpquals (other restriction clauses) + * separately. + */ + cost_qual_eval(&merge_qual_cost, mergeclauses, root); + cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root); + qp_qual_cost.startup -= merge_qual_cost.startup; + qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple; + + /* + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop scanning for matches after the first match. When + * all the joinclauses are merge clauses, this means we don't ever need to + * back up the merge, and so we can skip mark/restore overhead. + */ + if ((path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI || + extra->inner_unique) && + (list_length(path->jpath.joinrestrictinfo) == + list_length(path->path_mergeclauses))) + path->skip_mark_restore = true; + else + path->skip_mark_restore = false; + + /* + * Get approx # tuples passing the mergequals. We use approx_tuple_count + * here because we need an estimate done with JOIN_INNER semantics. + */ + mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses); + + /* + * When there are equal merge keys in the outer relation, the mergejoin + * must rescan any matching tuples in the inner relation. This means + * re-fetching inner tuples; we have to estimate how often that happens. + * + * For regular inner and outer joins, the number of re-fetches can be + * estimated approximately as size of merge join output minus size of + * inner relation. Assume that the distinct key values are 1, 2, ..., and + * denote the number of values of each key in the outer relation as m1, + * m2, ...; in the inner relation, n1, n2, ... Then we have + * + * size of join = m1 * n1 + m2 * n2 + ... + * + * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 * + * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner + * relation + * + * This equation works correctly for outer tuples having no inner match + * (nk = 0), but not for inner tuples having no outer match (mk = 0); we + * are effectively subtracting those from the number of rescanned tuples, + * when we should not. Can we do better without expensive selectivity + * computations? + * + * The whole issue is moot if we are working from a unique-ified outer + * input, or if we know we don't need to mark/restore at all. + */ + if (IsA(outer_path, UniquePath) || path->skip_mark_restore) + rescannedtuples = 0; + else + { + rescannedtuples = mergejointuples - inner_path_rows; + /* Must clamp because of possible underestimate */ + if (rescannedtuples < 0) + rescannedtuples = 0; + } + + /* + * We'll inflate various costs this much to account for rescanning. Note + * that this is to be multiplied by something involving inner_rows, or + * another number related to the portion of the inner rel we'll scan. + */ + rescanratio = 1.0 + (rescannedtuples / inner_rows); + + /* + * Decide whether we want to materialize the inner input to shield it from + * mark/restore and performing re-fetches. Our cost model for regular + * re-fetches is that a re-fetch costs the same as an original fetch, + * which is probably an overestimate; but on the other hand we ignore the + * bookkeeping costs of mark/restore. Not clear if it's worth developing + * a more refined model. So we just need to inflate the inner run cost by + * rescanratio. + */ + bare_inner_cost = inner_run_cost * rescanratio; + + /* + * When we interpose a Material node the re-fetch cost is assumed to be + * just cpu_operator_cost per tuple, independently of the underlying + * plan's cost; and we charge an extra cpu_operator_cost per original + * fetch as well. Note that we're assuming the materialize node will + * never spill to disk, since it only has to remember tuples back to the + * last mark. (If there are a huge number of duplicates, our other cost + * factors will make the path so expensive that it probably won't get + * chosen anyway.) So we don't use cost_rescan here. + * + * Note: keep this estimate in sync with create_mergejoin_plan's labeling + * of the generated Material node. + */ + mat_inner_cost = inner_run_cost + + cpu_operator_cost * inner_rows * rescanratio; + + /* + * If we don't need mark/restore at all, we don't need materialization. + */ + if (path->skip_mark_restore) + path->materialize_inner = false; + + /* + * Prefer materializing if it looks cheaper, unless the user has asked to + * suppress materialization. + */ + else if (enable_material && mat_inner_cost < bare_inner_cost) + path->materialize_inner = true; + + /* + * Even if materializing doesn't look cheaper, we *must* do it if the + * inner path is to be used directly (without sorting) and it doesn't + * support mark/restore. + * + * Since the inner side must be ordered, and only Sorts and IndexScans can + * create order to begin with, and they both support mark/restore, you + * might think there's no problem --- but you'd be wrong. Nestloop and + * merge joins can *preserve* the order of their inputs, so they can be + * selected as the input of a mergejoin, and they don't support + * mark/restore at present. + * + * We don't test the value of enable_material here, because + * materialization is required for correctness in this case, and turning + * it off does not entitle us to deliver an invalid plan. + */ + else if (innersortkeys == NIL && + !ExecSupportsMarkRestore(inner_path)) + path->materialize_inner = true; + + /* + * Also, force materializing if the inner path is to be sorted and the + * sort is expected to spill to disk. This is because the final merge + * pass can be done on-the-fly if it doesn't have to support mark/restore. + * We don't try to adjust the cost estimates for this consideration, + * though. + * + * Since materialization is a performance optimization in this case, + * rather than necessary for correctness, we skip it if enable_material is + * off. + */ + else if (enable_material && innersortkeys != NIL && + relation_byte_size(inner_path_rows, + inner_path->pathtarget->width) > + (work_mem * 1024L)) + path->materialize_inner = true; + else + path->materialize_inner = false; + + /* Charge the right incremental cost for the chosen case */ + if (path->materialize_inner) + run_cost += mat_inner_cost; + else + run_cost += bare_inner_cost; + + /* CPU costs */ + + /* + * The number of tuple comparisons needed is approximately number of outer + * rows plus number of inner rows plus number of rescanned tuples (can we + * refine this?). At each one, we need to evaluate the mergejoin quals. + */ + startup_cost += merge_qual_cost.startup; + startup_cost += merge_qual_cost.per_tuple * + (outer_skip_rows + inner_skip_rows * rescanratio); + run_cost += merge_qual_cost.per_tuple * + ((outer_rows - outer_skip_rows) + + (inner_rows - inner_skip_rows) * rescanratio); + + /* + * For each tuple that gets through the mergejoin proper, we charge + * cpu_tuple_cost plus the cost of evaluating additional restriction + * clauses that are to be applied at the join. (This is pessimistic since + * not all of the quals may get evaluated at each tuple.) + * + * Note: we could adjust for SEMI/ANTI joins skipping some qual + * evaluations here, but it's probably not worth the trouble. + */ + startup_cost += qp_qual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; + run_cost += cpu_per_tuple * mergejointuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->jpath.path.pathtarget->cost.startup; + run_cost += path->jpath.path.pathtarget->cost.per_tuple * path->jpath.path.rows; + + path->jpath.path.startup_cost = startup_cost; + path->jpath.path.total_cost = startup_cost + run_cost; +} + +/* + * run mergejoinscansel() with caching + */ +static MergeScanSelCache * +cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) +{ + MergeScanSelCache *cache; + ListCell *lc; + Selectivity leftstartsel, + leftendsel, + rightstartsel, + rightendsel; + MemoryContext oldcontext; + + /* Do we have this result already? */ + foreach(lc, rinfo->scansel_cache) + { + cache = (MergeScanSelCache *) lfirst(lc); + if (cache->opfamily == pathkey->pk_opfamily && + cache->collation == pathkey->pk_eclass->ec_collation && + cache->strategy == pathkey->pk_strategy && + cache->nulls_first == pathkey->pk_nulls_first) + return cache; + } + + /* Nope, do the computation */ + mergejoinscansel(root, + (Node *) rinfo->clause, + pathkey->pk_opfamily, + pathkey->pk_strategy, + pathkey->pk_nulls_first, + &leftstartsel, + &leftendsel, + &rightstartsel, + &rightendsel); + + /* Cache the result in suitably long-lived workspace */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + cache = (MergeScanSelCache *) palloc(sizeof(MergeScanSelCache)); + cache->opfamily = pathkey->pk_opfamily; + cache->collation = pathkey->pk_eclass->ec_collation; + cache->strategy = pathkey->pk_strategy; + cache->nulls_first = pathkey->pk_nulls_first; + cache->leftstartsel = leftstartsel; + cache->leftendsel = leftendsel; + cache->rightstartsel = rightstartsel; + cache->rightendsel = rightendsel; + + rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache); + + MemoryContextSwitchTo(oldcontext); + + return cache; +} + +/* + * initial_cost_hashjoin + * Preliminary estimate of the cost of a hashjoin path. + * + * This must quickly produce lower-bound estimates of the path's startup and + * total costs. If we are unable to eliminate the proposed path from + * consideration using the lower bounds, final_cost_hashjoin will be called + * to obtain the final estimates. + * + * The exact division of labor between this function and final_cost_hashjoin + * is private to them, and represents a tradeoff between speed of the initial + * estimate and getting a tight lower bound. We choose to not examine the + * join quals here (other than by counting the number of hash clauses), + * so we can't do much with CPU costs. We do assume that + * ExecChooseHashTableSize is cheap enough to use here. + * + * 'workspace' is to be filled with startup_cost, total_cost, and perhaps + * other data to be used by final_cost_hashjoin + * 'jointype' is the type of join to be performed + * 'hashclauses' is the list of joinclauses to be used as hash clauses + * 'outer_path' is the outer input to the join + * 'inner_path' is the inner input to the join + * 'extra' contains miscellaneous information about the join + * 'parallel_hash' indicates that inner_path is partial and that a shared + * hash table will be built in parallel + */ +void +initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, + JoinType jointype, + List *hashclauses, + Path *outer_path, Path *inner_path, + JoinPathExtraData *extra, + bool parallel_hash) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + double outer_path_rows = outer_path->rows; + double inner_path_rows = inner_path->rows; + double inner_path_rows_total = inner_path_rows; + int num_hashclauses = list_length(hashclauses); + int numbuckets; + int numbatches; + int num_skew_mcvs; + size_t space_allowed; /* unused */ + + /* cost of source data */ + startup_cost += outer_path->startup_cost; + run_cost += outer_path->total_cost - outer_path->startup_cost; + startup_cost += inner_path->total_cost; + + /* + * Cost of computing hash function: must do it once per input tuple. We + * charge one cpu_operator_cost for each column's hash function. Also, + * tack on one cpu_tuple_cost per inner row, to model the costs of + * inserting the row into the hashtable. + * + * XXX when a hashclause is more complex than a single operator, we really + * should charge the extra eval costs of the left or right side, as + * appropriate, here. This seems more work than it's worth at the moment. + */ + startup_cost += (cpu_operator_cost * num_hashclauses + cpu_tuple_cost) + * inner_path_rows; + run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows; + + /* + * If this is a parallel hash build, then the value we have for + * inner_rows_total currently refers only to the rows returned by each + * participant. For shared hash table size estimation, we need the total + * number, so we need to undo the division. + */ + if (parallel_hash) + inner_path_rows_total *= get_parallel_divisor(inner_path); + + /* + * Get hash table size that executor would use for inner relation. + * + * XXX for the moment, always assume that skew optimization will be + * performed. As long as SKEW_HASH_MEM_PERCENT is small, it's not worth + * trying to determine that for sure. + * + * XXX at some point it might be interesting to try to account for skew + * optimization in the cost estimate, but for now, we don't. + */ + ExecChooseHashTableSize(inner_path_rows_total, + inner_path->pathtarget->width, + true, /* useskew */ + parallel_hash, /* try_combined_hash_mem */ + outer_path->parallel_workers, + &space_allowed, + &numbuckets, + &numbatches, + &num_skew_mcvs); + + /* + * If inner relation is too big then we will need to "batch" the join, + * which implies writing and reading most of the tuples to disk an extra + * time. Charge seq_page_cost per page, since the I/O should be nice and + * sequential. Writing the inner rel counts as startup cost, all the rest + * as run cost. + */ + if (numbatches > 1) + { + double outerpages = page_size(outer_path_rows, + outer_path->pathtarget->width); + double innerpages = page_size(inner_path_rows, + inner_path->pathtarget->width); + + startup_cost += seq_page_cost * innerpages; + run_cost += seq_page_cost * (innerpages + 2 * outerpages); + } + + /* CPU costs left for later */ + + /* Public result fields */ + workspace->startup_cost = startup_cost; + workspace->total_cost = startup_cost + run_cost; + /* Save private data for final_cost_hashjoin */ + workspace->run_cost = run_cost; + workspace->numbuckets = numbuckets; + workspace->numbatches = numbatches; + workspace->inner_rows_total = inner_path_rows_total; +} + +/* + * final_cost_hashjoin + * Final estimate of the cost and result size of a hashjoin path. + * + * Note: the numbatches estimate is also saved into 'path' for use later + * + * 'path' is already filled in except for the rows and cost fields and + * num_batches + * 'workspace' is the result from initial_cost_hashjoin + * 'extra' contains miscellaneous information about the join + */ +void +final_cost_hashjoin(PlannerInfo *root, HashPath *path, + JoinCostWorkspace *workspace, + JoinPathExtraData *extra) +{ + Path *outer_path = path->jpath.outerjoinpath; + Path *inner_path = path->jpath.innerjoinpath; + double outer_path_rows = outer_path->rows; + double inner_path_rows = inner_path->rows; + double inner_path_rows_total = workspace->inner_rows_total; + List *hashclauses = path->path_hashclauses; + Cost startup_cost = workspace->startup_cost; + Cost run_cost = workspace->run_cost; + int numbuckets = workspace->numbuckets; + int numbatches = workspace->numbatches; + Cost cpu_per_tuple; + QualCost hash_qual_cost; + QualCost qp_qual_cost; + double hashjointuples; + double virtualbuckets; + Selectivity innerbucketsize; + Selectivity innermcvfreq; + ListCell *hcl; + + /* Mark the path with the correct row estimate */ + if (path->jpath.path.param_info) + path->jpath.path.rows = path->jpath.path.param_info->ppi_rows; + else + path->jpath.path.rows = path->jpath.path.parent->rows; + + /* For partial paths, scale row estimate. */ + if (path->jpath.path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->jpath.path); + + path->jpath.path.rows = + clamp_row_est(path->jpath.path.rows / parallel_divisor); + } + + /* + * We could include disable_cost in the preliminary estimate, but that + * would amount to optimizing for the case where the join method is + * disabled, which doesn't seem like the way to bet. + */ + if (!enable_hashjoin) + startup_cost += disable_cost; + + /* mark the path with estimated # of batches */ + path->num_batches = numbatches; + + /* store the total number of tuples (sum of partial row estimates) */ + path->inner_rows_total = inner_path_rows_total; + + /* and compute the number of "virtual" buckets in the whole join */ + virtualbuckets = (double) numbuckets * (double) numbatches; + + /* + * Determine bucketsize fraction and MCV frequency for the inner relation. + * We use the smallest bucketsize or MCV frequency estimated for any + * individual hashclause; this is undoubtedly conservative. + * + * BUT: if inner relation has been unique-ified, we can assume it's good + * for hashing. This is important both because it's the right answer, and + * because we avoid contaminating the cache with a value that's wrong for + * non-unique-ified paths. + */ + if (IsA(inner_path, UniquePath)) + { + innerbucketsize = 1.0 / virtualbuckets; + innermcvfreq = 0.0; + } + else + { + innerbucketsize = 1.0; + innermcvfreq = 1.0; + foreach(hcl, hashclauses) + { + RestrictInfo *restrictinfo = lfirst_node(RestrictInfo, hcl); + Selectivity thisbucketsize; + Selectivity thismcvfreq; + + /* + * First we have to figure out which side of the hashjoin clause + * is the inner side. + * + * Since we tend to visit the same clauses over and over when + * planning a large query, we cache the bucket stats estimates in + * the RestrictInfo node to avoid repeated lookups of statistics. + */ + if (bms_is_subset(restrictinfo->right_relids, + inner_path->parent->relids)) + { + /* righthand side is inner */ + thisbucketsize = restrictinfo->right_bucketsize; + if (thisbucketsize < 0) + { + /* not cached yet */ + estimate_hash_bucket_stats(root, + get_rightop(restrictinfo->clause), + virtualbuckets, + &restrictinfo->right_mcvfreq, + &restrictinfo->right_bucketsize); + thisbucketsize = restrictinfo->right_bucketsize; + } + thismcvfreq = restrictinfo->right_mcvfreq; + } + else + { + Assert(bms_is_subset(restrictinfo->left_relids, + inner_path->parent->relids)); + /* lefthand side is inner */ + thisbucketsize = restrictinfo->left_bucketsize; + if (thisbucketsize < 0) + { + /* not cached yet */ + estimate_hash_bucket_stats(root, + get_leftop(restrictinfo->clause), + virtualbuckets, + &restrictinfo->left_mcvfreq, + &restrictinfo->left_bucketsize); + thisbucketsize = restrictinfo->left_bucketsize; + } + thismcvfreq = restrictinfo->left_mcvfreq; + } + + if (innerbucketsize > thisbucketsize) + innerbucketsize = thisbucketsize; + if (innermcvfreq > thismcvfreq) + innermcvfreq = thismcvfreq; + } + } + + /* + * If the bucket holding the inner MCV would exceed hash_mem, we don't + * want to hash unless there is really no other alternative, so apply + * disable_cost. (The executor normally copes with excessive memory usage + * by splitting batches, but obviously it cannot separate equal values + * that way, so it will be unable to drive the batch size below hash_mem + * when this is true.) + */ + if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq), + inner_path->pathtarget->width) > get_hash_memory_limit()) + startup_cost += disable_cost; + + /* + * Compute cost of the hashquals and qpquals (other restriction clauses) + * separately. + */ + cost_qual_eval(&hash_qual_cost, hashclauses, root); + cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root); + qp_qual_cost.startup -= hash_qual_cost.startup; + qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple; + + /* CPU costs */ + + if (path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI || + extra->inner_unique) + { + double outer_matched_rows; + Selectivity inner_scan_frac; + + /* + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop after the first match. + * + * For an outer-rel row that has at least one match, we can expect the + * bucket scan to stop after a fraction 1/(match_count+1) of the + * bucket's rows, if the matches are evenly distributed. Since they + * probably aren't quite evenly distributed, we apply a fuzz factor of + * 2.0 to that fraction. (If we used a larger fuzz factor, we'd have + * to clamp inner_scan_frac to at most 1.0; but since match_count is + * at least 1, no such clamp is needed now.) + */ + outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac); + inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0); + + startup_cost += hash_qual_cost.startup; + run_cost += hash_qual_cost.per_tuple * outer_matched_rows * + clamp_row_est(inner_path_rows * innerbucketsize * inner_scan_frac) * 0.5; + + /* + * For unmatched outer-rel rows, the picture is quite a lot different. + * In the first place, there is no reason to assume that these rows + * preferentially hit heavily-populated buckets; instead assume they + * are uncorrelated with the inner distribution and so they see an + * average bucket size of inner_path_rows / virtualbuckets. In the + * second place, it seems likely that they will have few if any exact + * hash-code matches and so very few of the tuples in the bucket will + * actually require eval of the hash quals. We don't have any good + * way to estimate how many will, but for the moment assume that the + * effective cost per bucket entry is one-tenth what it is for + * matchable tuples. + */ + run_cost += hash_qual_cost.per_tuple * + (outer_path_rows - outer_matched_rows) * + clamp_row_est(inner_path_rows / virtualbuckets) * 0.05; + + /* Get # of tuples that will pass the basic join */ + if (path->jpath.jointype == JOIN_ANTI) + hashjointuples = outer_path_rows - outer_matched_rows; + else + hashjointuples = outer_matched_rows; + } + else + { + /* + * The number of tuple comparisons needed is the number of outer + * tuples times the typical number of tuples in a hash bucket, which + * is the inner relation size times its bucketsize fraction. At each + * one, we need to evaluate the hashjoin quals. But actually, + * charging the full qual eval cost at each tuple is pessimistic, + * since we don't evaluate the quals unless the hash values match + * exactly. For lack of a better idea, halve the cost estimate to + * allow for that. + */ + startup_cost += hash_qual_cost.startup; + run_cost += hash_qual_cost.per_tuple * outer_path_rows * + clamp_row_est(inner_path_rows * innerbucketsize) * 0.5; + + /* + * Get approx # tuples passing the hashquals. We use + * approx_tuple_count here because we need an estimate done with + * JOIN_INNER semantics. + */ + hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses); + } + + /* + * For each tuple that gets through the hashjoin proper, we charge + * cpu_tuple_cost plus the cost of evaluating additional restriction + * clauses that are to be applied at the join. (This is pessimistic since + * not all of the quals may get evaluated at each tuple.) + */ + startup_cost += qp_qual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; + run_cost += cpu_per_tuple * hashjointuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->jpath.path.pathtarget->cost.startup; + run_cost += path->jpath.path.pathtarget->cost.per_tuple * path->jpath.path.rows; + + path->jpath.path.startup_cost = startup_cost; + path->jpath.path.total_cost = startup_cost + run_cost; +} + + +/* + * cost_subplan + * Figure the costs for a SubPlan (or initplan). + * + * Note: we could dig the subplan's Plan out of the root list, but in practice + * all callers have it handy already, so we make them pass it. + */ +void +cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan) +{ + QualCost sp_cost; + + /* Figure any cost for evaluating the testexpr */ + cost_qual_eval(&sp_cost, + make_ands_implicit((Expr *) subplan->testexpr), + root); + + if (subplan->useHashTable) + { + /* + * If we are using a hash table for the subquery outputs, then the + * cost of evaluating the query is a one-time cost. We charge one + * cpu_operator_cost per tuple for the work of loading the hashtable, + * too. + */ + sp_cost.startup += plan->total_cost + + cpu_operator_cost * plan->plan_rows; + + /* + * The per-tuple costs include the cost of evaluating the lefthand + * expressions, plus the cost of probing the hashtable. We already + * accounted for the lefthand expressions as part of the testexpr, and + * will also have counted one cpu_operator_cost for each comparison + * operator. That is probably too low for the probing cost, but it's + * hard to make a better estimate, so live with it for now. + */ + } + else + { + /* + * Otherwise we will be rescanning the subplan output on each + * evaluation. We need to estimate how much of the output we will + * actually need to scan. NOTE: this logic should agree with the + * tuple_fraction estimates used by make_subplan() in + * plan/subselect.c. + */ + Cost plan_run_cost = plan->total_cost - plan->startup_cost; + + if (subplan->subLinkType == EXISTS_SUBLINK) + { + /* we only need to fetch 1 tuple; clamp to avoid zero divide */ + sp_cost.per_tuple += plan_run_cost / clamp_row_est(plan->plan_rows); + } + else if (subplan->subLinkType == ALL_SUBLINK || + subplan->subLinkType == ANY_SUBLINK) + { + /* assume we need 50% of the tuples */ + sp_cost.per_tuple += 0.50 * plan_run_cost; + /* also charge a cpu_operator_cost per row examined */ + sp_cost.per_tuple += 0.50 * plan->plan_rows * cpu_operator_cost; + } + else + { + /* assume we need all tuples */ + sp_cost.per_tuple += plan_run_cost; + } + + /* + * Also account for subplan's startup cost. If the subplan is + * uncorrelated or undirect correlated, AND its topmost node is one + * that materializes its output, assume that we'll only need to pay + * its startup cost once; otherwise assume we pay the startup cost + * every time. + */ + if (subplan->parParam == NIL && + ExecMaterializesOutput(nodeTag(plan))) + sp_cost.startup += plan->startup_cost; + else + sp_cost.per_tuple += plan->startup_cost; + } + + subplan->startup_cost = sp_cost.startup; + subplan->per_call_cost = sp_cost.per_tuple; +} + + +/* + * cost_rescan + * Given a finished Path, estimate the costs of rescanning it after + * having done so the first time. For some Path types a rescan is + * cheaper than an original scan (if no parameters change), and this + * function embodies knowledge about that. The default is to return + * the same costs stored in the Path. (Note that the cost estimates + * actually stored in Paths are always for first scans.) + * + * This function is not currently intended to model effects such as rescans + * being cheaper due to disk block caching; what we are concerned with is + * plan types wherein the executor caches results explicitly, or doesn't + * redo startup calculations, etc. + */ +static void +cost_rescan(PlannerInfo *root, Path *path, + Cost *rescan_startup_cost, /* output parameters */ + Cost *rescan_total_cost) +{ + switch (path->pathtype) + { + case T_FunctionScan: + + /* + * Currently, nodeFunctionscan.c always executes the function to + * completion before returning any rows, and caches the results in + * a tuplestore. So the function eval cost is all startup cost + * and isn't paid over again on rescans. However, all run costs + * will be paid over again. + */ + *rescan_startup_cost = 0; + *rescan_total_cost = path->total_cost - path->startup_cost; + break; + case T_HashJoin: + + /* + * If it's a single-batch join, we don't need to rebuild the hash + * table during a rescan. + */ + if (((HashPath *) path)->num_batches == 1) + { + /* Startup cost is exactly the cost of hash table building */ + *rescan_startup_cost = 0; + *rescan_total_cost = path->total_cost - path->startup_cost; + } + else + { + /* Otherwise, no special treatment */ + *rescan_startup_cost = path->startup_cost; + *rescan_total_cost = path->total_cost; + } + break; + case T_CteScan: + case T_WorkTableScan: + { + /* + * These plan types materialize their final result in a + * tuplestore or tuplesort object. So the rescan cost is only + * cpu_tuple_cost per tuple, unless the result is large enough + * to spill to disk. + */ + Cost run_cost = cpu_tuple_cost * path->rows; + double nbytes = relation_byte_size(path->rows, + path->pathtarget->width); + long work_mem_bytes = work_mem * 1024L; + + if (nbytes > work_mem_bytes) + { + /* It will spill, so account for re-read cost */ + double npages = ceil(nbytes / BLCKSZ); + + run_cost += seq_page_cost * npages; + } + *rescan_startup_cost = 0; + *rescan_total_cost = run_cost; + } + break; + case T_Material: + case T_Sort: + { + /* + * These plan types not only materialize their results, but do + * not implement qual filtering or projection. So they are + * even cheaper to rescan than the ones above. We charge only + * cpu_operator_cost per tuple. (Note: keep that in sync with + * the run_cost charge in cost_sort, and also see comments in + * cost_material before you change it.) + */ + Cost run_cost = cpu_operator_cost * path->rows; + double nbytes = relation_byte_size(path->rows, + path->pathtarget->width); + long work_mem_bytes = work_mem * 1024L; + + if (nbytes > work_mem_bytes) + { + /* It will spill, so account for re-read cost */ + double npages = ceil(nbytes / BLCKSZ); + + run_cost += seq_page_cost * npages; + } + *rescan_startup_cost = 0; + *rescan_total_cost = run_cost; + } + break; + case T_Memoize: + /* All the hard work is done by cost_memoize_rescan */ + cost_memoize_rescan(root, (MemoizePath *) path, + rescan_startup_cost, rescan_total_cost); + break; + default: + *rescan_startup_cost = path->startup_cost; + *rescan_total_cost = path->total_cost; + break; + } +} + + +/* + * cost_qual_eval + * Estimate the CPU costs of evaluating a WHERE clause. + * The input can be either an implicitly-ANDed list of boolean + * expressions, or a list of RestrictInfo nodes. (The latter is + * preferred since it allows caching of the results.) + * The result includes both a one-time (startup) component, + * and a per-evaluation component. + */ +void +cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root) +{ + cost_qual_eval_context context; + ListCell *l; + + context.root = root; + context.total.startup = 0; + context.total.per_tuple = 0; + + /* We don't charge any cost for the implicit ANDing at top level ... */ + + foreach(l, quals) + { + Node *qual = (Node *) lfirst(l); + + cost_qual_eval_walker(qual, &context); + } + + *cost = context.total; +} + +/* + * cost_qual_eval_node + * As above, for a single RestrictInfo or expression. + */ +void +cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root) +{ + cost_qual_eval_context context; + + context.root = root; + context.total.startup = 0; + context.total.per_tuple = 0; + + cost_qual_eval_walker(qual, &context); + + *cost = context.total; +} + +static bool +cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) +{ + if (node == NULL) + return false; + + /* + * RestrictInfo nodes contain an eval_cost field reserved for this + * routine's use, so that it's not necessary to evaluate the qual clause's + * cost more than once. If the clause's cost hasn't been computed yet, + * the field's startup value will contain -1. + */ + if (IsA(node, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) node; + + if (rinfo->eval_cost.startup < 0) + { + cost_qual_eval_context locContext; + + locContext.root = context->root; + locContext.total.startup = 0; + locContext.total.per_tuple = 0; + + /* + * For an OR clause, recurse into the marked-up tree so that we + * set the eval_cost for contained RestrictInfos too. + */ + if (rinfo->orclause) + cost_qual_eval_walker((Node *) rinfo->orclause, &locContext); + else + cost_qual_eval_walker((Node *) rinfo->clause, &locContext); + + /* + * If the RestrictInfo is marked pseudoconstant, it will be tested + * only once, so treat its cost as all startup cost. + */ + if (rinfo->pseudoconstant) + { + /* count one execution during startup */ + locContext.total.startup += locContext.total.per_tuple; + locContext.total.per_tuple = 0; + } + rinfo->eval_cost = locContext.total; + } + context->total.startup += rinfo->eval_cost.startup; + context->total.per_tuple += rinfo->eval_cost.per_tuple; + /* do NOT recurse into children */ + return false; + } + + /* + * For each operator or function node in the given tree, we charge the + * estimated execution cost given by pg_proc.procost (remember to multiply + * this by cpu_operator_cost). + * + * Vars and Consts are charged zero, and so are boolean operators (AND, + * OR, NOT). Simplistic, but a lot better than no model at all. + * + * Should we try to account for the possibility of short-circuit + * evaluation of AND/OR? Probably *not*, because that would make the + * results depend on the clause ordering, and we are not in any position + * to expect that the current ordering of the clauses is the one that's + * going to end up being used. The above per-RestrictInfo caching would + * not mix well with trying to re-order clauses anyway. + * + * Another issue that is entirely ignored here is that if a set-returning + * function is below top level in the tree, the functions/operators above + * it will need to be evaluated multiple times. In practical use, such + * cases arise so seldom as to not be worth the added complexity needed; + * moreover, since our rowcount estimates for functions tend to be pretty + * phony, the results would also be pretty phony. + */ + if (IsA(node, FuncExpr)) + { + add_function_cost(context->root, ((FuncExpr *) node)->funcid, node, + &context->total); + } + else if (IsA(node, OpExpr) || + IsA(node, DistinctExpr) || + IsA(node, NullIfExpr)) + { + /* rely on struct equivalence to treat these all alike */ + set_opfuncid((OpExpr *) node); + add_function_cost(context->root, ((OpExpr *) node)->opfuncid, node, + &context->total); + } + else if (IsA(node, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node; + Node *arraynode = (Node *) lsecond(saop->args); + QualCost sacosts; + QualCost hcosts; + int estarraylen = estimate_array_length(arraynode); + + set_sa_opfuncid(saop); + sacosts.startup = sacosts.per_tuple = 0; + add_function_cost(context->root, saop->opfuncid, NULL, + &sacosts); + + if (OidIsValid(saop->hashfuncid)) + { + /* Handle costs for hashed ScalarArrayOpExpr */ + hcosts.startup = hcosts.per_tuple = 0; + + add_function_cost(context->root, saop->hashfuncid, NULL, &hcosts); + context->total.startup += sacosts.startup + hcosts.startup; + + /* Estimate the cost of building the hashtable. */ + context->total.startup += estarraylen * hcosts.per_tuple; + + /* + * XXX should we charge a little bit for sacosts.per_tuple when + * building the table, or is it ok to assume there will be zero + * hash collision? + */ + + /* + * Charge for hashtable lookups. Charge a single hash and a + * single comparison. + */ + context->total.per_tuple += hcosts.per_tuple + sacosts.per_tuple; + } + else + { + /* + * Estimate that the operator will be applied to about half of the + * array elements before the answer is determined. + */ + context->total.startup += sacosts.startup; + context->total.per_tuple += sacosts.per_tuple * + estimate_array_length(arraynode) * 0.5; + } + } + else if (IsA(node, Aggref) || + IsA(node, WindowFunc)) + { + /* + * Aggref and WindowFunc nodes are (and should be) treated like Vars, + * ie, zero execution cost in the current model, because they behave + * essentially like Vars at execution. We disregard the costs of + * their input expressions for the same reason. The actual execution + * costs of the aggregate/window functions and their arguments have to + * be factored into plan-node-specific costing of the Agg or WindowAgg + * plan node. + */ + return false; /* don't recurse into children */ + } + else if (IsA(node, GroupingFunc)) + { + /* Treat this as having cost 1 */ + context->total.per_tuple += cpu_operator_cost; + return false; /* don't recurse into children */ + } + else if (IsA(node, CoerceViaIO)) + { + CoerceViaIO *iocoerce = (CoerceViaIO *) node; + Oid iofunc; + Oid typioparam; + bool typisvarlena; + + /* check the result type's input function */ + getTypeInputInfo(iocoerce->resulttype, + &iofunc, &typioparam); + add_function_cost(context->root, iofunc, NULL, + &context->total); + /* check the input type's output function */ + getTypeOutputInfo(exprType((Node *) iocoerce->arg), + &iofunc, &typisvarlena); + add_function_cost(context->root, iofunc, NULL, + &context->total); + } + else if (IsA(node, ArrayCoerceExpr)) + { + ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node; + QualCost perelemcost; + + cost_qual_eval_node(&perelemcost, (Node *) acoerce->elemexpr, + context->root); + context->total.startup += perelemcost.startup; + if (perelemcost.per_tuple > 0) + context->total.per_tuple += perelemcost.per_tuple * + estimate_array_length((Node *) acoerce->arg); + } + else if (IsA(node, RowCompareExpr)) + { + /* Conservatively assume we will check all the columns */ + RowCompareExpr *rcexpr = (RowCompareExpr *) node; + ListCell *lc; + + foreach(lc, rcexpr->opnos) + { + Oid opid = lfirst_oid(lc); + + add_function_cost(context->root, get_opcode(opid), NULL, + &context->total); + } + } + else if (IsA(node, MinMaxExpr) || + IsA(node, SQLValueFunction) || + IsA(node, XmlExpr) || + IsA(node, CoerceToDomain) || + IsA(node, NextValueExpr)) + { + /* Treat all these as having cost 1 */ + context->total.per_tuple += cpu_operator_cost; + } + else if (IsA(node, CurrentOfExpr)) + { + /* Report high cost to prevent selection of anything but TID scan */ + context->total.startup += disable_cost; + } + else if (IsA(node, SubLink)) + { + /* This routine should not be applied to un-planned expressions */ + elog(ERROR, "cannot handle unplanned sub-select"); + } + else if (IsA(node, SubPlan)) + { + /* + * A subplan node in an expression typically indicates that the + * subplan will be executed on each evaluation, so charge accordingly. + * (Sub-selects that can be executed as InitPlans have already been + * removed from the expression.) + */ + SubPlan *subplan = (SubPlan *) node; + + context->total.startup += subplan->startup_cost; + context->total.per_tuple += subplan->per_call_cost; + + /* + * We don't want to recurse into the testexpr, because it was already + * counted in the SubPlan node's costs. So we're done. + */ + return false; + } + else if (IsA(node, AlternativeSubPlan)) + { + /* + * Arbitrarily use the first alternative plan for costing. (We should + * certainly only include one alternative, and we don't yet have + * enough information to know which one the executor is most likely to + * use.) + */ + AlternativeSubPlan *asplan = (AlternativeSubPlan *) node; + + return cost_qual_eval_walker((Node *) linitial(asplan->subplans), + context); + } + else if (IsA(node, PlaceHolderVar)) + { + /* + * A PlaceHolderVar should be given cost zero when considering general + * expression evaluation costs. The expense of doing the contained + * expression is charged as part of the tlist eval costs of the scan + * or join where the PHV is first computed (see set_rel_width and + * add_placeholders_to_joinrel). If we charged it again here, we'd be + * double-counting the cost for each level of plan that the PHV + * bubbles up through. Hence, return without recursing into the + * phexpr. + */ + return false; + } + + /* recurse into children */ + return expression_tree_walker(node, cost_qual_eval_walker, + (void *) context); +} + +/* + * get_restriction_qual_cost + * Compute evaluation costs of a baserel's restriction quals, plus any + * movable join quals that have been pushed down to the scan. + * Results are returned into *qpqual_cost. + * + * This is a convenience subroutine that works for seqscans and other cases + * where all the given quals will be evaluated the hard way. It's not useful + * for cost_index(), for example, where the index machinery takes care of + * some of the quals. We assume baserestrictcost was previously set by + * set_baserel_size_estimates(). + */ +static void +get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, + ParamPathInfo *param_info, + QualCost *qpqual_cost) +{ + if (param_info) + { + /* Include costs of pushed-down clauses */ + cost_qual_eval(qpqual_cost, param_info->ppi_clauses, root); + + qpqual_cost->startup += baserel->baserestrictcost.startup; + qpqual_cost->per_tuple += baserel->baserestrictcost.per_tuple; + } + else + *qpqual_cost = baserel->baserestrictcost; +} + + +/* + * compute_semi_anti_join_factors + * Estimate how much of the inner input a SEMI, ANTI, or inner_unique join + * can be expected to scan. + * + * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning + * inner rows as soon as it finds a match to the current outer row. + * The same happens if we have detected the inner rel is unique. + * We should therefore adjust some of the cost components for this effect. + * This function computes some estimates needed for these adjustments. + * These estimates will be the same regardless of the particular paths used + * for the outer and inner relation, so we compute these once and then pass + * them to all the join cost estimation functions. + * + * Input parameters: + * joinrel: join relation under consideration + * outerrel: outer relation under consideration + * innerrel: inner relation under consideration + * jointype: if not JOIN_SEMI or JOIN_ANTI, we assume it's inner_unique + * sjinfo: SpecialJoinInfo relevant to this join + * restrictlist: join quals + * Output parameters: + * *semifactors is filled in (see pathnodes.h for field definitions) + */ +void +compute_semi_anti_join_factors(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + SpecialJoinInfo *sjinfo, + List *restrictlist, + SemiAntiJoinFactors *semifactors) +{ + Selectivity jselec; + Selectivity nselec; + Selectivity avgmatch; + SpecialJoinInfo norm_sjinfo; + List *joinquals; + ListCell *l; + + /* + * In an ANTI join, we must ignore clauses that are "pushed down", since + * those won't affect the match logic. In a SEMI join, we do not + * distinguish joinquals from "pushed down" quals, so just use the whole + * restrictinfo list. For other outer join types, we should consider only + * non-pushed-down quals, so that this devolves to an IS_OUTER_JOIN check. + */ + if (IS_OUTER_JOIN(jointype)) + { + joinquals = NIL; + foreach(l, restrictlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (!RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) + joinquals = lappend(joinquals, rinfo); + } + } + else + joinquals = restrictlist; + + /* + * Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses. + */ + jselec = clauselist_selectivity(root, + joinquals, + 0, + (jointype == JOIN_ANTI) ? JOIN_ANTI : JOIN_SEMI, + sjinfo); + + /* + * Also get the normal inner-join selectivity of the join clauses. + */ + norm_sjinfo.type = T_SpecialJoinInfo; + norm_sjinfo.min_lefthand = outerrel->relids; + norm_sjinfo.min_righthand = innerrel->relids; + norm_sjinfo.syn_lefthand = outerrel->relids; + norm_sjinfo.syn_righthand = innerrel->relids; + norm_sjinfo.jointype = JOIN_INNER; + /* we don't bother trying to make the remaining fields valid */ + norm_sjinfo.lhs_strict = false; + norm_sjinfo.delay_upper_joins = false; + norm_sjinfo.semi_can_btree = false; + norm_sjinfo.semi_can_hash = false; + norm_sjinfo.semi_operators = NIL; + norm_sjinfo.semi_rhs_exprs = NIL; + + nselec = clauselist_selectivity(root, + joinquals, + 0, + JOIN_INNER, + &norm_sjinfo); + + /* Avoid leaking a lot of ListCells */ + if (IS_OUTER_JOIN(jointype)) + list_free(joinquals); + + /* + * jselec can be interpreted as the fraction of outer-rel rows that have + * any matches (this is true for both SEMI and ANTI cases). And nselec is + * the fraction of the Cartesian product that matches. So, the average + * number of matches for each outer-rel row that has at least one match is + * nselec * inner_rows / jselec. + * + * Note: it is correct to use the inner rel's "rows" count here, even + * though we might later be considering a parameterized inner path with + * fewer rows. This is because we have included all the join clauses in + * the selectivity estimate. + */ + if (jselec > 0) /* protect against zero divide */ + { + avgmatch = nselec * innerrel->rows / jselec; + /* Clamp to sane range */ + avgmatch = Max(1.0, avgmatch); + } + else + avgmatch = 1.0; + + semifactors->outer_match_frac = jselec; + semifactors->match_count = avgmatch; +} + +/* + * has_indexed_join_quals + * Check whether all the joinquals of a nestloop join are used as + * inner index quals. + * + * If the inner path of a SEMI/ANTI join is an indexscan (including bitmap + * indexscan) that uses all the joinquals as indexquals, we can assume that an + * unmatched outer tuple is cheap to process, whereas otherwise it's probably + * expensive. + */ +static bool +has_indexed_join_quals(NestPath *joinpath) +{ + Relids joinrelids = joinpath->path.parent->relids; + Path *innerpath = joinpath->innerjoinpath; + List *indexclauses; + bool found_one; + ListCell *lc; + + /* If join still has quals to evaluate, it's not fast */ + if (joinpath->joinrestrictinfo != NIL) + return false; + /* Nor if the inner path isn't parameterized at all */ + if (innerpath->param_info == NULL) + return false; + + /* Find the indexclauses list for the inner scan */ + switch (innerpath->pathtype) + { + case T_IndexScan: + case T_IndexOnlyScan: + indexclauses = ((IndexPath *) innerpath)->indexclauses; + break; + case T_BitmapHeapScan: + { + /* Accept only a simple bitmap scan, not AND/OR cases */ + Path *bmqual = ((BitmapHeapPath *) innerpath)->bitmapqual; + + if (IsA(bmqual, IndexPath)) + indexclauses = ((IndexPath *) bmqual)->indexclauses; + else + return false; + break; + } + default: + + /* + * If it's not a simple indexscan, it probably doesn't run quickly + * for zero rows out, even if it's a parameterized path using all + * the joinquals. + */ + return false; + } + + /* + * Examine the inner path's param clauses. Any that are from the outer + * path must be found in the indexclauses list, either exactly or in an + * equivalent form generated by equivclass.c. Also, we must find at least + * one such clause, else it's a clauseless join which isn't fast. + */ + found_one = false; + foreach(lc, innerpath->param_info->ppi_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (join_clause_is_movable_into(rinfo, + innerpath->parent->relids, + joinrelids)) + { + if (!is_redundant_with_indexclauses(rinfo, indexclauses)) + return false; + found_one = true; + } + } + return found_one; +} + + +/* + * approx_tuple_count + * Quick-and-dirty estimation of the number of join rows passing + * a set of qual conditions. + * + * The quals can be either an implicitly-ANDed list of boolean expressions, + * or a list of RestrictInfo nodes (typically the latter). + * + * We intentionally compute the selectivity under JOIN_INNER rules, even + * if it's some type of outer join. This is appropriate because we are + * trying to figure out how many tuples pass the initial merge or hash + * join step. + * + * This is quick-and-dirty because we bypass clauselist_selectivity, and + * simply multiply the independent clause selectivities together. Now + * clauselist_selectivity often can't do any better than that anyhow, but + * for some situations (such as range constraints) it is smarter. However, + * we can't effectively cache the results of clauselist_selectivity, whereas + * the individual clause selectivities can be and are cached. + * + * Since we are only using the results to estimate how many potential + * output tuples are generated and passed through qpqual checking, it + * seems OK to live with the approximation. + */ +static double +approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +{ + double tuples; + double outer_tuples = path->outerjoinpath->rows; + double inner_tuples = path->innerjoinpath->rows; + SpecialJoinInfo sjinfo; + Selectivity selec = 1.0; + ListCell *l; + + /* + * Make up a SpecialJoinInfo for JOIN_INNER semantics. + */ + sjinfo.type = T_SpecialJoinInfo; + sjinfo.min_lefthand = path->outerjoinpath->parent->relids; + sjinfo.min_righthand = path->innerjoinpath->parent->relids; + sjinfo.syn_lefthand = path->outerjoinpath->parent->relids; + sjinfo.syn_righthand = path->innerjoinpath->parent->relids; + sjinfo.jointype = JOIN_INNER; + /* we don't bother trying to make the remaining fields valid */ + sjinfo.lhs_strict = false; + sjinfo.delay_upper_joins = false; + sjinfo.semi_can_btree = false; + sjinfo.semi_can_hash = false; + sjinfo.semi_operators = NIL; + sjinfo.semi_rhs_exprs = NIL; + + /* Get the approximate selectivity */ + foreach(l, quals) + { + Node *qual = (Node *) lfirst(l); + + /* Note that clause_selectivity will be able to cache its result */ + selec *= clause_selectivity(root, qual, 0, JOIN_INNER, &sjinfo); + } + + /* Apply it to the input relation sizes */ + tuples = selec * outer_tuples * inner_tuples; + + return clamp_row_est(tuples); +} + + +/* + * set_baserel_size_estimates + * Set the size estimates for the given base relation. + * + * The rel's targetlist and restrictinfo list must have been constructed + * already, and rel->tuples must be set. + * + * We set the following fields of the rel node: + * rows: the estimated number of output tuples (after applying + * restriction clauses). + * width: the estimated average output tuple width in bytes. + * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses. + */ +void +set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +{ + double nrows; + + /* Should only be applied to base relations */ + Assert(rel->relid > 0); + + nrows = rel->tuples * + clauselist_selectivity(root, + rel->baserestrictinfo, + 0, + JOIN_INNER, + NULL); + + rel->rows = clamp_row_est(nrows); + + cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); + + set_rel_width(root, rel); +} + +/* + * get_parameterized_baserel_size + * Make a size estimate for a parameterized scan of a base relation. + * + * 'param_clauses' lists the additional join clauses to be used. + * + * set_baserel_size_estimates must have been applied already. + */ +double +get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, + List *param_clauses) +{ + List *allclauses; + double nrows; + + /* + * Estimate the number of rows returned by the parameterized scan, knowing + * that it will apply all the extra join clauses as well as the rel's own + * restriction clauses. Note that we force the clauses to be treated as + * non-join clauses during selectivity estimation. + */ + allclauses = list_concat_copy(param_clauses, rel->baserestrictinfo); + nrows = rel->tuples * + clauselist_selectivity(root, + allclauses, + rel->relid, /* do not use 0! */ + JOIN_INNER, + NULL); + nrows = clamp_row_est(nrows); + /* For safety, make sure result is not more than the base estimate */ + if (nrows > rel->rows) + nrows = rel->rows; + return nrows; +} + +/* + * set_joinrel_size_estimates + * Set the size estimates for the given join relation. + * + * The rel's targetlist must have been constructed already, and a + * restriction clause list that matches the given component rels must + * be provided. + * + * Since there is more than one way to make a joinrel for more than two + * base relations, the results we get here could depend on which component + * rel pair is provided. In theory we should get the same answers no matter + * which pair is provided; in practice, since the selectivity estimation + * routines don't handle all cases equally well, we might not. But there's + * not much to be done about it. (Would it make sense to repeat the + * calculations for each pair of input rels that's encountered, and somehow + * average the results? Probably way more trouble than it's worth, and + * anyway we must keep the rowcount estimate the same for all paths for the + * joinrel.) + * + * We set only the rows field here. The reltarget field was already set by + * build_joinrel_tlist, and baserestrictcost is not used for join rels. + */ +void +set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + SpecialJoinInfo *sjinfo, + List *restrictlist) +{ + rel->rows = calc_joinrel_size_estimate(root, + rel, + outer_rel, + inner_rel, + outer_rel->rows, + inner_rel->rows, + sjinfo, + restrictlist); +} + +/* + * get_parameterized_joinrel_size + * Make a size estimate for a parameterized scan of a join relation. + * + * 'rel' is the joinrel under consideration. + * 'outer_path', 'inner_path' are (probably also parameterized) Paths that + * produce the relations being joined. + * 'sjinfo' is any SpecialJoinInfo relevant to this join. + * 'restrict_clauses' lists the join clauses that need to be applied at the + * join node (including any movable clauses that were moved down to this join, + * and not including any movable clauses that were pushed down into the + * child paths). + * + * set_joinrel_size_estimates must have been applied already. + */ +double +get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, + Path *outer_path, + Path *inner_path, + SpecialJoinInfo *sjinfo, + List *restrict_clauses) +{ + double nrows; + + /* + * Estimate the number of rows returned by the parameterized join as the + * sizes of the input paths times the selectivity of the clauses that have + * ended up at this join node. + * + * As with set_joinrel_size_estimates, the rowcount estimate could depend + * on the pair of input paths provided, though ideally we'd get the same + * estimate for any pair with the same parameterization. + */ + nrows = calc_joinrel_size_estimate(root, + rel, + outer_path->parent, + inner_path->parent, + outer_path->rows, + inner_path->rows, + sjinfo, + restrict_clauses); + /* For safety, make sure result is not more than the base estimate */ + if (nrows > rel->rows) + nrows = rel->rows; + return nrows; +} + +/* + * calc_joinrel_size_estimate + * Workhorse for set_joinrel_size_estimates and + * get_parameterized_joinrel_size. + * + * outer_rel/inner_rel are the relations being joined, but they should be + * assumed to have sizes outer_rows/inner_rows; those numbers might be less + * than what rel->rows says, when we are considering parameterized paths. + */ +static double +calc_joinrel_size_estimate(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + double outer_rows, + double inner_rows, + SpecialJoinInfo *sjinfo, + List *restrictlist_in) +{ + /* This apparently-useless variable dodges a compiler bug in VS2013: */ + List *restrictlist = restrictlist_in; + JoinType jointype = sjinfo->jointype; + Selectivity fkselec; + Selectivity jselec; + Selectivity pselec; + double nrows; + + /* + * Compute joinclause selectivity. Note that we are only considering + * clauses that become restriction clauses at this join level; we are not + * double-counting them because they were not considered in estimating the + * sizes of the component rels. + * + * First, see whether any of the joinclauses can be matched to known FK + * constraints. If so, drop those clauses from the restrictlist, and + * instead estimate their selectivity using FK semantics. (We do this + * without regard to whether said clauses are local or "pushed down". + * Probably, an FK-matching clause could never be seen as pushed down at + * an outer join, since it would be strict and hence would be grounds for + * join strength reduction.) fkselec gets the net selectivity for + * FK-matching clauses, or 1.0 if there are none. + */ + fkselec = get_foreign_key_join_selectivity(root, + outer_rel->relids, + inner_rel->relids, + sjinfo, + &restrictlist); + + /* + * For an outer join, we have to distinguish the selectivity of the join's + * own clauses (JOIN/ON conditions) from any clauses that were "pushed + * down". For inner joins we just count them all as joinclauses. + */ + if (IS_OUTER_JOIN(jointype)) + { + List *joinquals = NIL; + List *pushedquals = NIL; + ListCell *l; + + /* Grovel through the clauses to separate into two lists */ + foreach(l, restrictlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) + pushedquals = lappend(pushedquals, rinfo); + else + joinquals = lappend(joinquals, rinfo); + } + + /* Get the separate selectivities */ + jselec = clauselist_selectivity(root, + joinquals, + 0, + jointype, + sjinfo); + pselec = clauselist_selectivity(root, + pushedquals, + 0, + jointype, + sjinfo); + + /* Avoid leaking a lot of ListCells */ + list_free(joinquals); + list_free(pushedquals); + } + else + { + jselec = clauselist_selectivity(root, + restrictlist, + 0, + jointype, + sjinfo); + pselec = 0.0; /* not used, keep compiler quiet */ + } + + /* + * Basically, we multiply size of Cartesian product by selectivity. + * + * If we are doing an outer join, take that into account: the joinqual + * selectivity has to be clamped using the knowledge that the output must + * be at least as large as the non-nullable input. However, any + * pushed-down quals are applied after the outer join, so their + * selectivity applies fully. + * + * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction + * of LHS rows that have matches, and we apply that straightforwardly. + */ + switch (jointype) + { + case JOIN_INNER: + nrows = outer_rows * inner_rows * fkselec * jselec; + /* pselec not used */ + break; + case JOIN_LEFT: + nrows = outer_rows * inner_rows * fkselec * jselec; + if (nrows < outer_rows) + nrows = outer_rows; + nrows *= pselec; + break; + case JOIN_FULL: + nrows = outer_rows * inner_rows * fkselec * jselec; + if (nrows < outer_rows) + nrows = outer_rows; + if (nrows < inner_rows) + nrows = inner_rows; + nrows *= pselec; + break; + case JOIN_SEMI: + nrows = outer_rows * fkselec * jselec; + /* pselec not used */ + break; + case JOIN_ANTI: + nrows = outer_rows * (1.0 - fkselec * jselec); + nrows *= pselec; + break; + default: + /* other values not expected here */ + elog(ERROR, "unrecognized join type: %d", (int) jointype); + nrows = 0; /* keep compiler quiet */ + break; + } + + return clamp_row_est(nrows); +} + +/* + * get_foreign_key_join_selectivity + * Estimate join selectivity for foreign-key-related clauses. + * + * Remove any clauses that can be matched to FK constraints from *restrictlist, + * and return a substitute estimate of their selectivity. 1.0 is returned + * when there are no such clauses. + * + * The reason for treating such clauses specially is that we can get better + * estimates this way than by relying on clauselist_selectivity(), especially + * for multi-column FKs where that function's assumption that the clauses are + * independent falls down badly. But even with single-column FKs, we may be + * able to get a better answer when the pg_statistic stats are missing or out + * of date. + */ +static Selectivity +get_foreign_key_join_selectivity(PlannerInfo *root, + Relids outer_relids, + Relids inner_relids, + SpecialJoinInfo *sjinfo, + List **restrictlist) +{ + Selectivity fkselec = 1.0; + JoinType jointype = sjinfo->jointype; + List *worklist = *restrictlist; + ListCell *lc; + + /* Consider each FK constraint that is known to match the query */ + foreach(lc, root->fkey_list) + { + ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); + bool ref_is_outer; + List *removedlist; + ListCell *cell; + + /* + * This FK is not relevant unless it connects a baserel on one side of + * this join to a baserel on the other side. + */ + if (bms_is_member(fkinfo->con_relid, outer_relids) && + bms_is_member(fkinfo->ref_relid, inner_relids)) + ref_is_outer = false; + else if (bms_is_member(fkinfo->ref_relid, outer_relids) && + bms_is_member(fkinfo->con_relid, inner_relids)) + ref_is_outer = true; + else + continue; + + /* + * If we're dealing with a semi/anti join, and the FK's referenced + * relation is on the outside, then knowledge of the FK doesn't help + * us figure out what we need to know (which is the fraction of outer + * rows that have matches). On the other hand, if the referenced rel + * is on the inside, then all outer rows must have matches in the + * referenced table (ignoring nulls). But any restriction or join + * clauses that filter that table will reduce the fraction of matches. + * We can account for restriction clauses, but it's too hard to guess + * how many table rows would get through a join that's inside the RHS. + * Hence, if either case applies, punt and ignore the FK. + */ + if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) && + (ref_is_outer || bms_membership(inner_relids) != BMS_SINGLETON)) + continue; + + /* + * Modify the restrictlist by removing clauses that match the FK (and + * putting them into removedlist instead). It seems unsafe to modify + * the originally-passed List structure, so we make a shallow copy the + * first time through. + */ + if (worklist == *restrictlist) + worklist = list_copy(worklist); + + removedlist = NIL; + foreach(cell, worklist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + bool remove_it = false; + int i; + + /* Drop this clause if it matches any column of the FK */ + for (i = 0; i < fkinfo->nkeys; i++) + { + if (rinfo->parent_ec) + { + /* + * EC-derived clauses can only match by EC. It is okay to + * consider any clause derived from the same EC as + * matching the FK: even if equivclass.c chose to generate + * a clause equating some other pair of Vars, it could + * have generated one equating the FK's Vars. So for + * purposes of estimation, we can act as though it did so. + * + * Note: checking parent_ec is a bit of a cheat because + * there are EC-derived clauses that don't have parent_ec + * set; but such clauses must compare expressions that + * aren't just Vars, so they cannot match the FK anyway. + */ + if (fkinfo->eclass[i] == rinfo->parent_ec) + { + remove_it = true; + break; + } + } + else + { + /* + * Otherwise, see if rinfo was previously matched to FK as + * a "loose" clause. + */ + if (list_member_ptr(fkinfo->rinfos[i], rinfo)) + { + remove_it = true; + break; + } + } + } + if (remove_it) + { + worklist = foreach_delete_current(worklist, cell); + removedlist = lappend(removedlist, rinfo); + } + } + + /* + * If we failed to remove all the matching clauses we expected to + * find, chicken out and ignore this FK; applying its selectivity + * might result in double-counting. Put any clauses we did manage to + * remove back into the worklist. + * + * Since the matching clauses are known not outerjoin-delayed, they + * would normally have appeared in the initial joinclause list. If we + * didn't find them, there are two possibilities: + * + * 1. If the FK match is based on an EC that is ec_has_const, it won't + * have generated any join clauses at all. We discount such ECs while + * checking to see if we have "all" the clauses. (Below, we'll adjust + * the selectivity estimate for this case.) + * + * 2. The clauses were matched to some other FK in a previous + * iteration of this loop, and thus removed from worklist. (A likely + * case is that two FKs are matched to the same EC; there will be only + * one EC-derived clause in the initial list, so the first FK will + * consume it.) Applying both FKs' selectivity independently risks + * underestimating the join size; in particular, this would undo one + * of the main things that ECs were invented for, namely to avoid + * double-counting the selectivity of redundant equality conditions. + * Later we might think of a reasonable way to combine the estimates, + * but for now, just punt, since this is a fairly uncommon situation. + */ + if (removedlist == NIL || + list_length(removedlist) != + (fkinfo->nmatched_ec - fkinfo->nconst_ec + fkinfo->nmatched_ri)) + { + worklist = list_concat(worklist, removedlist); + continue; + } + + /* + * Finally we get to the payoff: estimate selectivity using the + * knowledge that each referencing row will match exactly one row in + * the referenced table. + * + * XXX that's not true in the presence of nulls in the referencing + * column(s), so in principle we should derate the estimate for those. + * However (1) if there are any strict restriction clauses for the + * referencing column(s) elsewhere in the query, derating here would + * be double-counting the null fraction, and (2) it's not very clear + * how to combine null fractions for multiple referencing columns. So + * we do nothing for now about correcting for nulls. + * + * XXX another point here is that if either side of an FK constraint + * is an inheritance parent, we estimate as though the constraint + * covers all its children as well. This is not an unreasonable + * assumption for a referencing table, ie the user probably applied + * identical constraints to all child tables (though perhaps we ought + * to check that). But it's not possible to have done that for a + * referenced table. Fortunately, precisely because that doesn't + * work, it is uncommon in practice to have an FK referencing a parent + * table. So, at least for now, disregard inheritance here. + */ + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + { + /* + * For JOIN_SEMI and JOIN_ANTI, we only get here when the FK's + * referenced table is exactly the inside of the join. The join + * selectivity is defined as the fraction of LHS rows that have + * matches. The FK implies that every LHS row has a match *in the + * referenced table*; but any restriction clauses on it will + * reduce the number of matches. Hence we take the join + * selectivity as equal to the selectivity of the table's + * restriction clauses, which is rows / tuples; but we must guard + * against tuples == 0. + */ + RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid); + double ref_tuples = Max(ref_rel->tuples, 1.0); + + fkselec *= ref_rel->rows / ref_tuples; + } + else + { + /* + * Otherwise, selectivity is exactly 1/referenced-table-size; but + * guard against tuples == 0. Note we should use the raw table + * tuple count, not any estimate of its filtered or joined size. + */ + RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid); + double ref_tuples = Max(ref_rel->tuples, 1.0); + + fkselec *= 1.0 / ref_tuples; + } + + /* + * If any of the FK columns participated in ec_has_const ECs, then + * equivclass.c will have generated "var = const" restrictions for + * each side of the join, thus reducing the sizes of both input + * relations. Taking the fkselec at face value would amount to + * double-counting the selectivity of the constant restriction for the + * referencing Var. Hence, look for the restriction clause(s) that + * were applied to the referencing Var(s), and divide out their + * selectivity to correct for this. + */ + if (fkinfo->nconst_ec > 0) + { + for (int i = 0; i < fkinfo->nkeys; i++) + { + EquivalenceClass *ec = fkinfo->eclass[i]; + + if (ec && ec->ec_has_const) + { + EquivalenceMember *em = fkinfo->fk_eclass_member[i]; + RestrictInfo *rinfo = find_derived_clause_for_ec_member(ec, + em); + + if (rinfo) + { + Selectivity s0; + + s0 = clause_selectivity(root, + (Node *) rinfo, + 0, + jointype, + sjinfo); + if (s0 > 0) + fkselec /= s0; + } + } + } + } + } + + *restrictlist = worklist; + CLAMP_PROBABILITY(fkselec); + return fkselec; +} + +/* + * set_subquery_size_estimates + * Set the size estimates for a base relation that is a subquery. + * + * The rel's targetlist and restrictinfo list must have been constructed + * already, and the Paths for the subquery must have been completed. + * We look at the subquery's PlannerInfo to extract data. + * + * We set the same fields as set_baserel_size_estimates. + */ +void +set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel) +{ + PlannerInfo *subroot = rel->subroot; + RelOptInfo *sub_final_rel; + ListCell *lc; + + /* Should only be applied to base relations that are subqueries */ + Assert(rel->relid > 0); + Assert(planner_rt_fetch(rel->relid, root)->rtekind == RTE_SUBQUERY); + + /* + * Copy raw number of output rows from subquery. All of its paths should + * have the same output rowcount, so just look at cheapest-total. + */ + sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + rel->tuples = sub_final_rel->cheapest_total_path->rows; + + /* + * Compute per-output-column width estimates by examining the subquery's + * targetlist. For any output that is a plain Var, get the width estimate + * that was made while planning the subquery. Otherwise, we leave it to + * set_rel_width to fill in a datatype-based default estimate. + */ + foreach(lc, subroot->parse->targetList) + { + TargetEntry *te = lfirst_node(TargetEntry, lc); + Node *texpr = (Node *) te->expr; + int32 item_width = 0; + + /* junk columns aren't visible to upper query */ + if (te->resjunk) + continue; + + /* + * The subquery could be an expansion of a view that's had columns + * added to it since the current query was parsed, so that there are + * non-junk tlist columns in it that don't correspond to any column + * visible at our query level. Ignore such columns. + */ + if (te->resno < rel->min_attr || te->resno > rel->max_attr) + continue; + + /* + * XXX This currently doesn't work for subqueries containing set + * operations, because the Vars in their tlists are bogus references + * to the first leaf subquery, which wouldn't give the right answer + * even if we could still get to its PlannerInfo. + * + * Also, the subquery could be an appendrel for which all branches are + * known empty due to constraint exclusion, in which case + * set_append_rel_pathlist will have left the attr_widths set to zero. + * + * In either case, we just leave the width estimate zero until + * set_rel_width fixes it. + */ + if (IsA(texpr, Var) && + subroot->parse->setOperations == NULL) + { + Var *var = (Var *) texpr; + RelOptInfo *subrel = find_base_rel(subroot, var->varno); + + item_width = subrel->attr_widths[var->varattno - subrel->min_attr]; + } + rel->attr_widths[te->resno - rel->min_attr] = item_width; + } + + /* Now estimate number of output rows, etc */ + set_baserel_size_estimates(root, rel); +} + +/* + * set_function_size_estimates + * Set the size estimates for a base relation that is a function call. + * + * The rel's targetlist and restrictinfo list must have been constructed + * already. + * + * We set the same fields as set_baserel_size_estimates. + */ +void +set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel) +{ + RangeTblEntry *rte; + ListCell *lc; + + /* Should only be applied to base relations that are functions */ + Assert(rel->relid > 0); + rte = planner_rt_fetch(rel->relid, root); + Assert(rte->rtekind == RTE_FUNCTION); + + /* + * Estimate number of rows the functions will return. The rowcount of the + * node is that of the largest function result. + */ + rel->tuples = 0; + foreach(lc, rte->functions) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); + double ntup = expression_returns_set_rows(root, rtfunc->funcexpr); + + if (ntup > rel->tuples) + rel->tuples = ntup; + } + + /* Now estimate number of output rows, etc */ + set_baserel_size_estimates(root, rel); +} + +/* + * set_function_size_estimates + * Set the size estimates for a base relation that is a function call. + * + * The rel's targetlist and restrictinfo list must have been constructed + * already. + * + * We set the same fields as set_tablefunc_size_estimates. + */ +void +set_tablefunc_size_estimates(PlannerInfo *root, RelOptInfo *rel) +{ + /* Should only be applied to base relations that are functions */ + Assert(rel->relid > 0); + Assert(planner_rt_fetch(rel->relid, root)->rtekind == RTE_TABLEFUNC); + + rel->tuples = 100; + + /* Now estimate number of output rows, etc */ + set_baserel_size_estimates(root, rel); +} + +/* + * set_values_size_estimates + * Set the size estimates for a base relation that is a values list. + * + * The rel's targetlist and restrictinfo list must have been constructed + * already. + * + * We set the same fields as set_baserel_size_estimates. + */ +void +set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel) +{ + RangeTblEntry *rte; + + /* Should only be applied to base relations that are values lists */ + Assert(rel->relid > 0); + rte = planner_rt_fetch(rel->relid, root); + Assert(rte->rtekind == RTE_VALUES); + + /* + * Estimate number of rows the values list will return. We know this + * precisely based on the list length (well, barring set-returning + * functions in list items, but that's a refinement not catered for + * anywhere else either). + */ + rel->tuples = list_length(rte->values_lists); + + /* Now estimate number of output rows, etc */ + set_baserel_size_estimates(root, rel); +} + +/* + * set_cte_size_estimates + * Set the size estimates for a base relation that is a CTE reference. + * + * The rel's targetlist and restrictinfo list must have been constructed + * already, and we need an estimate of the number of rows returned by the CTE + * (if a regular CTE) or the non-recursive term (if a self-reference). + * + * We set the same fields as set_baserel_size_estimates. + */ +void +set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel, double cte_rows) +{ + RangeTblEntry *rte; + + /* Should only be applied to base relations that are CTE references */ + Assert(rel->relid > 0); + rte = planner_rt_fetch(rel->relid, root); + Assert(rte->rtekind == RTE_CTE); + + if (rte->self_reference) + { + /* + * In a self-reference, arbitrarily assume the average worktable size + * is about 10 times the nonrecursive term's size. + */ + rel->tuples = 10 * cte_rows; + } + else + { + /* Otherwise just believe the CTE's rowcount estimate */ + rel->tuples = cte_rows; + } + + /* Now estimate number of output rows, etc */ + set_baserel_size_estimates(root, rel); +} + +/* + * set_namedtuplestore_size_estimates + * Set the size estimates for a base relation that is a tuplestore reference. + * + * The rel's targetlist and restrictinfo list must have been constructed + * already. + * + * We set the same fields as set_baserel_size_estimates. + */ +void +set_namedtuplestore_size_estimates(PlannerInfo *root, RelOptInfo *rel) +{ + RangeTblEntry *rte; + + /* Should only be applied to base relations that are tuplestore references */ + Assert(rel->relid > 0); + rte = planner_rt_fetch(rel->relid, root); + Assert(rte->rtekind == RTE_NAMEDTUPLESTORE); + + /* + * Use the estimate provided by the code which is generating the named + * tuplestore. In some cases, the actual number might be available; in + * others the same plan will be re-used, so a "typical" value might be + * estimated and used. + */ + rel->tuples = rte->enrtuples; + if (rel->tuples < 0) + rel->tuples = 1000; + + /* Now estimate number of output rows, etc */ + set_baserel_size_estimates(root, rel); +} + +/* + * set_result_size_estimates + * Set the size estimates for an RTE_RESULT base relation + * + * The rel's targetlist and restrictinfo list must have been constructed + * already. + * + * We set the same fields as set_baserel_size_estimates. + */ +void +set_result_size_estimates(PlannerInfo *root, RelOptInfo *rel) +{ + /* Should only be applied to RTE_RESULT base relations */ + Assert(rel->relid > 0); + Assert(planner_rt_fetch(rel->relid, root)->rtekind == RTE_RESULT); + + /* RTE_RESULT always generates a single row, natively */ + rel->tuples = 1; + + /* Now estimate number of output rows, etc */ + set_baserel_size_estimates(root, rel); +} + +/* + * set_foreign_size_estimates + * Set the size estimates for a base relation that is a foreign table. + * + * There is not a whole lot that we can do here; the foreign-data wrapper + * is responsible for producing useful estimates. We can do a decent job + * of estimating baserestrictcost, so we set that, and we also set up width + * using what will be purely datatype-driven estimates from the targetlist. + * There is no way to do anything sane with the rows value, so we just put + * a default estimate and hope that the wrapper can improve on it. The + * wrapper's GetForeignRelSize function will be called momentarily. + * + * The rel's targetlist and restrictinfo list must have been constructed + * already. + */ +void +set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) +{ + /* Should only be applied to base relations */ + Assert(rel->relid > 0); + + rel->rows = 1000; /* entirely bogus default estimate */ + + cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); + + set_rel_width(root, rel); +} + + +/* + * set_rel_width + * Set the estimated output width of a base relation. + * + * The estimated output width is the sum of the per-attribute width estimates + * for the actually-referenced columns, plus any PHVs or other expressions + * that have to be calculated at this relation. This is the amount of data + * we'd need to pass upwards in case of a sort, hash, etc. + * + * This function also sets reltarget->cost, so it's a bit misnamed now. + * + * NB: this works best on plain relations because it prefers to look at + * real Vars. For subqueries, set_subquery_size_estimates will already have + * copied up whatever per-column estimates were made within the subquery, + * and for other types of rels there isn't much we can do anyway. We fall + * back on (fairly stupid) datatype-based width estimates if we can't get + * any better number. + * + * The per-attribute width estimates are cached for possible re-use while + * building join relations or post-scan/join pathtargets. + */ +static void +set_rel_width(PlannerInfo *root, RelOptInfo *rel) +{ + Oid reloid = planner_rt_fetch(rel->relid, root)->relid; + int32 tuple_width = 0; + bool have_wholerow_var = false; + ListCell *lc; + + /* Vars are assumed to have cost zero, but other exprs do not */ + rel->reltarget->cost.startup = 0; + rel->reltarget->cost.per_tuple = 0; + + foreach(lc, rel->reltarget->exprs) + { + Node *node = (Node *) lfirst(lc); + + /* + * Ordinarily, a Var in a rel's targetlist must belong to that rel; + * but there are corner cases involving LATERAL references where that + * isn't so. If the Var has the wrong varno, fall through to the + * generic case (it doesn't seem worth the trouble to be any smarter). + */ + if (IsA(node, Var) && + ((Var *) node)->varno == rel->relid) + { + Var *var = (Var *) node; + int ndx; + int32 item_width; + + Assert(var->varattno >= rel->min_attr); + Assert(var->varattno <= rel->max_attr); + + ndx = var->varattno - rel->min_attr; + + /* + * If it's a whole-row Var, we'll deal with it below after we have + * already cached as many attr widths as possible. + */ + if (var->varattno == 0) + { + have_wholerow_var = true; + continue; + } + + /* + * The width may have been cached already (especially if it's a + * subquery), so don't duplicate effort. + */ + if (rel->attr_widths[ndx] > 0) + { + tuple_width += rel->attr_widths[ndx]; + continue; + } + + /* Try to get column width from statistics */ + if (reloid != InvalidOid && var->varattno > 0) + { + item_width = get_attavgwidth(reloid, var->varattno); + if (item_width > 0) + { + rel->attr_widths[ndx] = item_width; + tuple_width += item_width; + continue; + } + } + + /* + * Not a plain relation, or can't find statistics for it. Estimate + * using just the type info. + */ + item_width = get_typavgwidth(var->vartype, var->vartypmod); + Assert(item_width > 0); + rel->attr_widths[ndx] = item_width; + tuple_width += item_width; + } + else if (IsA(node, PlaceHolderVar)) + { + /* + * We will need to evaluate the PHV's contained expression while + * scanning this rel, so be sure to include it in reltarget->cost. + */ + PlaceHolderVar *phv = (PlaceHolderVar *) node; + PlaceHolderInfo *phinfo = find_placeholder_info(root, phv, false); + QualCost cost; + + tuple_width += phinfo->ph_width; + cost_qual_eval_node(&cost, (Node *) phv->phexpr, root); + rel->reltarget->cost.startup += cost.startup; + rel->reltarget->cost.per_tuple += cost.per_tuple; + } + else + { + /* + * We could be looking at an expression pulled up from a subquery, + * or a ROW() representing a whole-row child Var, etc. Do what we + * can using the expression type information. + */ + int32 item_width; + QualCost cost; + + item_width = get_typavgwidth(exprType(node), exprTypmod(node)); + Assert(item_width > 0); + tuple_width += item_width; + /* Not entirely clear if we need to account for cost, but do so */ + cost_qual_eval_node(&cost, node, root); + rel->reltarget->cost.startup += cost.startup; + rel->reltarget->cost.per_tuple += cost.per_tuple; + } + } + + /* + * If we have a whole-row reference, estimate its width as the sum of + * per-column widths plus heap tuple header overhead. + */ + if (have_wholerow_var) + { + int32 wholerow_width = MAXALIGN(SizeofHeapTupleHeader); + + if (reloid != InvalidOid) + { + /* Real relation, so estimate true tuple width */ + wholerow_width += get_relation_data_width(reloid, + rel->attr_widths - rel->min_attr); + } + else + { + /* Do what we can with info for a phony rel */ + AttrNumber i; + + for (i = 1; i <= rel->max_attr; i++) + wholerow_width += rel->attr_widths[i - rel->min_attr]; + } + + rel->attr_widths[0 - rel->min_attr] = wholerow_width; + + /* + * Include the whole-row Var as part of the output tuple. Yes, that + * really is what happens at runtime. + */ + tuple_width += wholerow_width; + } + + Assert(tuple_width >= 0); + rel->reltarget->width = tuple_width; +} + +/* + * set_pathtarget_cost_width + * Set the estimated eval cost and output width of a PathTarget tlist. + * + * As a notational convenience, returns the same PathTarget pointer passed in. + * + * Most, though not quite all, uses of this function occur after we've run + * set_rel_width() for base relations; so we can usually obtain cached width + * estimates for Vars. If we can't, fall back on datatype-based width + * estimates. Present early-planning uses of PathTargets don't need accurate + * widths badly enough to justify going to the catalogs for better data. + */ +PathTarget * +set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target) +{ + int32 tuple_width = 0; + ListCell *lc; + + /* Vars are assumed to have cost zero, but other exprs do not */ + target->cost.startup = 0; + target->cost.per_tuple = 0; + + foreach(lc, target->exprs) + { + Node *node = (Node *) lfirst(lc); + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + int32 item_width; + + /* We should not see any upper-level Vars here */ + Assert(var->varlevelsup == 0); + + /* Try to get data from RelOptInfo cache */ + if (var->varno < root->simple_rel_array_size) + { + RelOptInfo *rel = root->simple_rel_array[var->varno]; + + if (rel != NULL && + var->varattno >= rel->min_attr && + var->varattno <= rel->max_attr) + { + int ndx = var->varattno - rel->min_attr; + + if (rel->attr_widths[ndx] > 0) + { + tuple_width += rel->attr_widths[ndx]; + continue; + } + } + } + + /* + * No cached data available, so estimate using just the type info. + */ + item_width = get_typavgwidth(var->vartype, var->vartypmod); + Assert(item_width > 0); + tuple_width += item_width; + } + else + { + /* + * Handle general expressions using type info. + */ + int32 item_width; + QualCost cost; + + item_width = get_typavgwidth(exprType(node), exprTypmod(node)); + Assert(item_width > 0); + tuple_width += item_width; + + /* Account for cost, too */ + cost_qual_eval_node(&cost, node, root); + target->cost.startup += cost.startup; + target->cost.per_tuple += cost.per_tuple; + } + } + + Assert(tuple_width >= 0); + target->width = tuple_width; + + return target; +} + +/* + * relation_byte_size + * Estimate the storage space in bytes for a given number of tuples + * of a given width (size in bytes). + */ +static double +relation_byte_size(double tuples, int width) +{ + return tuples * (MAXALIGN(width) + MAXALIGN(SizeofHeapTupleHeader)); +} + +/* + * page_size + * Returns an estimate of the number of pages covered by a given + * number of tuples of a given width (size in bytes). + */ +static double +page_size(double tuples, int width) +{ + return ceil(relation_byte_size(tuples, width) / BLCKSZ); +} + +/* + * Estimate the fraction of the work that each worker will do given the + * number of workers budgeted for the path. + */ +static double +get_parallel_divisor(Path *path) +{ + double parallel_divisor = path->parallel_workers; + + /* + * Early experience with parallel query suggests that when there is only + * one worker, the leader often makes a very substantial contribution to + * executing the parallel portion of the plan, but as more workers are + * added, it does less and less, because it's busy reading tuples from the + * workers and doing whatever non-parallel post-processing is needed. By + * the time we reach 4 workers, the leader no longer makes a meaningful + * contribution. Thus, for now, estimate that the leader spends 30% of + * its time servicing each worker, and the remainder executing the + * parallel plan. + */ + if (parallel_leader_participation) + { + double leader_contribution; + + leader_contribution = 1.0 - (0.3 * path->parallel_workers); + if (leader_contribution > 0) + parallel_divisor += leader_contribution; + } + + return parallel_divisor; +} + +/* + * compute_bitmap_pages + * + * compute number of pages fetched from heap in bitmap heap scan. + */ +double +compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, + int loop_count, Cost *cost, double *tuple) +{ + Cost indexTotalCost; + Selectivity indexSelectivity; + double T; + double pages_fetched; + double tuples_fetched; + double heap_pages; + long maxentries; + + /* + * Fetch total cost of obtaining the bitmap, as well as its total + * selectivity. + */ + cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity); + + /* + * Estimate number of main-table pages fetched. + */ + tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); + + T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; + + /* + * For a single scan, the number of heap pages that need to be fetched is + * the same as the Mackert and Lohman formula for the case T <= b (ie, no + * re-reads needed). + */ + pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + + /* + * Calculate the number of pages fetched from the heap. Then based on + * current work_mem estimate get the estimated maxentries in the bitmap. + * (Note that we always do this calculation based on the number of pages + * that would be fetched in a single iteration, even if loop_count > 1. + * That's correct, because only that number of entries will be stored in + * the bitmap at one time.) + */ + heap_pages = Min(pages_fetched, baserel->pages); + maxentries = tbm_calculate_entries(work_mem * 1024L); + + if (loop_count > 1) + { + /* + * For repeated bitmap scans, scale up the number of tuples fetched in + * the Mackert and Lohman formula by the number of scans, so that we + * estimate the number of pages fetched by all the scans. Then + * pro-rate for one scan. + */ + pages_fetched = index_pages_fetched(tuples_fetched * loop_count, + baserel->pages, + get_indexpath_pages(bitmapqual), + root); + pages_fetched /= loop_count; + } + + if (pages_fetched >= T) + pages_fetched = T; + else + pages_fetched = ceil(pages_fetched); + + if (maxentries < heap_pages) + { + double exact_pages; + double lossy_pages; + + /* + * Crude approximation of the number of lossy pages. Because of the + * way tbm_lossify() is coded, the number of lossy pages increases + * very sharply as soon as we run short of memory; this formula has + * that property and seems to perform adequately in testing, but it's + * possible we could do better somehow. + */ + lossy_pages = Max(0, heap_pages - maxentries / 2); + exact_pages = heap_pages - lossy_pages; + + /* + * If there are lossy pages then recompute the number of tuples + * processed by the bitmap heap node. We assume here that the chance + * of a given tuple coming from an exact page is the same as the + * chance that a given page is exact. This might not be true, but + * it's not clear how we can do any better. + */ + if (lossy_pages > 0) + tuples_fetched = + clamp_row_est(indexSelectivity * + (exact_pages / heap_pages) * baserel->tuples + + (lossy_pages / heap_pages) * baserel->tuples); + } + + if (cost) + *cost = indexTotalCost; + if (tuple) + *tuple = tuples_fetched; + + return pages_fetched; +} diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c new file mode 100644 index 0000000..01d14df --- /dev/null +++ b/src/backend/optimizer/path/equivclass.c @@ -0,0 +1,3255 @@ +/*------------------------------------------------------------------------- + * + * equivclass.c + * Routines for managing EquivalenceClasses + * + * See src/backend/optimizer/README for discussion of EquivalenceClasses. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/optimizer/path/equivclass.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <limits.h> + +#include "access/stratnum.h" +#include "catalog/pg_type.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/appendinfo.h" +#include "optimizer/clauses.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/planmain.h" +#include "optimizer/restrictinfo.h" +#include "utils/lsyscache.h" + + +static EquivalenceMember *add_eq_member(EquivalenceClass *ec, + Expr *expr, Relids relids, Relids nullable_relids, + bool is_child, Oid datatype); +static bool is_exprlist_member(Expr *node, List *exprs); +static void generate_base_implied_equalities_const(PlannerInfo *root, + EquivalenceClass *ec); +static void generate_base_implied_equalities_no_const(PlannerInfo *root, + EquivalenceClass *ec); +static void generate_base_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec); +static List *generate_join_implied_equalities_normal(PlannerInfo *root, + EquivalenceClass *ec, + Relids join_relids, + Relids outer_relids, + Relids inner_relids); +static List *generate_join_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec, + Relids nominal_join_relids, + Relids outer_relids, + Relids nominal_inner_relids, + RelOptInfo *inner_rel); +static Oid select_equality_operator(EquivalenceClass *ec, + Oid lefttype, Oid righttype); +static RestrictInfo *create_join_clause(PlannerInfo *root, + EquivalenceClass *ec, Oid opno, + EquivalenceMember *leftem, + EquivalenceMember *rightem, + EquivalenceClass *parent_ec); +static bool reconsider_outer_join_clause(PlannerInfo *root, + RestrictInfo *rinfo, + bool outer_on_left); +static bool reconsider_full_join_clause(PlannerInfo *root, + RestrictInfo *rinfo); +static Bitmapset *get_eclass_indexes_for_relids(PlannerInfo *root, + Relids relids); +static Bitmapset *get_common_eclass_indexes(PlannerInfo *root, Relids relids1, + Relids relids2); + + +/* + * process_equivalence + * The given clause has a mergejoinable operator and can be applied without + * any delay by an outer join, so its two sides can be considered equal + * anywhere they are both computable; moreover that equality can be + * extended transitively. Record this knowledge in the EquivalenceClass + * data structure, if applicable. Returns true if successful, false if not + * (in which case caller should treat the clause as ordinary, not an + * equivalence). + * + * In some cases, although we cannot convert a clause into EquivalenceClass + * knowledge, we can still modify it to a more useful form than the original. + * Then, *p_restrictinfo will be replaced by a new RestrictInfo, which is what + * the caller should use for further processing. + * + * If below_outer_join is true, then the clause was found below the nullable + * side of an outer join, so its sides might validly be both NULL rather than + * strictly equal. We can still deduce equalities in such cases, but we take + * care to mark an EquivalenceClass if it came from any such clauses. Also, + * we have to check that both sides are either pseudo-constants or strict + * functions of Vars, else they might not both go to NULL above the outer + * join. (This is the main reason why we need a failure return. It's more + * convenient to check this case here than at the call sites...) + * + * We also reject proposed equivalence clauses if they contain leaky functions + * and have security_level above zero. The EC evaluation rules require us to + * apply certain tests at certain joining levels, and we can't tolerate + * delaying any test on security_level grounds. By rejecting candidate clauses + * that might require security delays, we ensure it's safe to apply an EC + * clause as soon as it's supposed to be applied. + * + * On success return, we have also initialized the clause's left_ec/right_ec + * fields to point to the EquivalenceClass representing it. This saves lookup + * effort later. + * + * Note: constructing merged EquivalenceClasses is a standard UNION-FIND + * problem, for which there exist better data structures than simple lists. + * If this code ever proves to be a bottleneck then it could be sped up --- + * but for now, simple is beautiful. + * + * Note: this is only called during planner startup, not during GEQO + * exploration, so we need not worry about whether we're in the right + * memory context. + */ +bool +process_equivalence(PlannerInfo *root, + RestrictInfo **p_restrictinfo, + bool below_outer_join) +{ + RestrictInfo *restrictinfo = *p_restrictinfo; + Expr *clause = restrictinfo->clause; + Oid opno, + collation, + item1_type, + item2_type; + Expr *item1; + Expr *item2; + Relids item1_relids, + item2_relids, + item1_nullable_relids, + item2_nullable_relids; + List *opfamilies; + EquivalenceClass *ec1, + *ec2; + EquivalenceMember *em1, + *em2; + ListCell *lc1; + int ec2_idx; + + /* Should not already be marked as having generated an eclass */ + Assert(restrictinfo->left_ec == NULL); + Assert(restrictinfo->right_ec == NULL); + + /* Reject if it is potentially postponable by security considerations */ + if (restrictinfo->security_level > 0 && !restrictinfo->leakproof) + return false; + + /* Extract info from given clause */ + Assert(is_opclause(clause)); + opno = ((OpExpr *) clause)->opno; + collation = ((OpExpr *) clause)->inputcollid; + item1 = (Expr *) get_leftop(clause); + item2 = (Expr *) get_rightop(clause); + item1_relids = restrictinfo->left_relids; + item2_relids = restrictinfo->right_relids; + + /* + * Ensure both input expressions expose the desired collation (their types + * should be OK already); see comments for canonicalize_ec_expression. + */ + item1 = canonicalize_ec_expression(item1, + exprType((Node *) item1), + collation); + item2 = canonicalize_ec_expression(item2, + exprType((Node *) item2), + collation); + + /* + * Clauses of the form X=X cannot be translated into EquivalenceClasses. + * We'd either end up with a single-entry EC, losing the knowledge that + * the clause was present at all, or else make an EC with duplicate + * entries, causing other issues. + */ + if (equal(item1, item2)) + { + /* + * If the operator is strict, then the clause can be treated as just + * "X IS NOT NULL". (Since we know we are considering a top-level + * qual, we can ignore the difference between FALSE and NULL results.) + * It's worth making the conversion because we'll typically get a much + * better selectivity estimate than we would for X=X. + * + * If the operator is not strict, we can't be sure what it will do + * with NULLs, so don't attempt to optimize it. + */ + set_opfuncid((OpExpr *) clause); + if (func_strict(((OpExpr *) clause)->opfuncid)) + { + NullTest *ntest = makeNode(NullTest); + + ntest->arg = item1; + ntest->nulltesttype = IS_NOT_NULL; + ntest->argisrow = false; /* correct even if composite arg */ + ntest->location = -1; + + *p_restrictinfo = + make_restrictinfo(root, + (Expr *) ntest, + restrictinfo->is_pushed_down, + restrictinfo->outerjoin_delayed, + restrictinfo->pseudoconstant, + restrictinfo->security_level, + NULL, + restrictinfo->outer_relids, + restrictinfo->nullable_relids); + } + return false; + } + + /* + * If below outer join, check for strictness, else reject. + */ + if (below_outer_join) + { + if (!bms_is_empty(item1_relids) && + contain_nonstrict_functions((Node *) item1)) + return false; /* LHS is non-strict but not constant */ + if (!bms_is_empty(item2_relids) && + contain_nonstrict_functions((Node *) item2)) + return false; /* RHS is non-strict but not constant */ + } + + /* Calculate nullable-relid sets for each side of the clause */ + item1_nullable_relids = bms_intersect(item1_relids, + restrictinfo->nullable_relids); + item2_nullable_relids = bms_intersect(item2_relids, + restrictinfo->nullable_relids); + + /* + * We use the declared input types of the operator, not exprType() of the + * inputs, as the nominal datatypes for opfamily lookup. This presumes + * that btree operators are always registered with amoplefttype and + * amoprighttype equal to their declared input types. We will need this + * info anyway to build EquivalenceMember nodes, and by extracting it now + * we can use type comparisons to short-circuit some equal() tests. + */ + op_input_types(opno, &item1_type, &item2_type); + + opfamilies = restrictinfo->mergeopfamilies; + + /* + * Sweep through the existing EquivalenceClasses looking for matches to + * item1 and item2. These are the possible outcomes: + * + * 1. We find both in the same EC. The equivalence is already known, so + * there's nothing to do. + * + * 2. We find both in different ECs. Merge the two ECs together. + * + * 3. We find just one. Add the other to its EC. + * + * 4. We find neither. Make a new, two-entry EC. + * + * Note: since all ECs are built through this process or the similar + * search in get_eclass_for_sort_expr(), it's impossible that we'd match + * an item in more than one existing nonvolatile EC. So it's okay to stop + * at the first match. + */ + ec1 = ec2 = NULL; + em1 = em2 = NULL; + ec2_idx = -1; + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* Never match to a volatile EC */ + if (cur_ec->ec_has_volatile) + continue; + + /* + * The collation has to match; check this first since it's cheaper + * than the opfamily comparison. + */ + if (collation != cur_ec->ec_collation) + continue; + + /* + * A "match" requires matching sets of btree opfamilies. Use of + * equal() for this test has implications discussed in the comments + * for get_mergejoin_opfamilies(). + */ + if (!equal(opfamilies, cur_ec->ec_opfamilies)) + continue; + + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + Assert(!cur_em->em_is_child); /* no children yet */ + + /* + * If below an outer join, don't match constants: they're not as + * constant as they look. + */ + if ((below_outer_join || cur_ec->ec_below_outer_join) && + cur_em->em_is_const) + continue; + + if (!ec1 && + item1_type == cur_em->em_datatype && + equal(item1, cur_em->em_expr)) + { + ec1 = cur_ec; + em1 = cur_em; + if (ec2) + break; + } + + if (!ec2 && + item2_type == cur_em->em_datatype && + equal(item2, cur_em->em_expr)) + { + ec2 = cur_ec; + ec2_idx = foreach_current_index(lc1); + em2 = cur_em; + if (ec1) + break; + } + } + + if (ec1 && ec2) + break; + } + + /* Sweep finished, what did we find? */ + + if (ec1 && ec2) + { + /* If case 1, nothing to do, except add to sources */ + if (ec1 == ec2) + { + ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); + ec1->ec_below_outer_join |= below_outer_join; + ec1->ec_min_security = Min(ec1->ec_min_security, + restrictinfo->security_level); + ec1->ec_max_security = Max(ec1->ec_max_security, + restrictinfo->security_level); + /* mark the RI as associated with this eclass */ + restrictinfo->left_ec = ec1; + restrictinfo->right_ec = ec1; + /* mark the RI as usable with this pair of EMs */ + restrictinfo->left_em = em1; + restrictinfo->right_em = em2; + return true; + } + + /* + * Case 2: need to merge ec1 and ec2. This should never happen after + * the ECs have reached canonical state; otherwise, pathkeys could be + * rendered non-canonical by the merge, and relation eclass indexes + * would get broken by removal of an eq_classes list entry. + */ + if (root->ec_merging_done) + elog(ERROR, "too late to merge equivalence classes"); + + /* + * We add ec2's items to ec1, then set ec2's ec_merged link to point + * to ec1 and remove ec2 from the eq_classes list. We cannot simply + * delete ec2 because that could leave dangling pointers in existing + * PathKeys. We leave it behind with a link so that the merged EC can + * be found. + */ + ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members); + ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources); + ec1->ec_derives = list_concat(ec1->ec_derives, ec2->ec_derives); + ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids); + ec1->ec_has_const |= ec2->ec_has_const; + /* can't need to set has_volatile */ + ec1->ec_below_outer_join |= ec2->ec_below_outer_join; + ec1->ec_min_security = Min(ec1->ec_min_security, + ec2->ec_min_security); + ec1->ec_max_security = Max(ec1->ec_max_security, + ec2->ec_max_security); + ec2->ec_merged = ec1; + root->eq_classes = list_delete_nth_cell(root->eq_classes, ec2_idx); + /* just to avoid debugging confusion w/ dangling pointers: */ + ec2->ec_members = NIL; + ec2->ec_sources = NIL; + ec2->ec_derives = NIL; + ec2->ec_relids = NULL; + ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); + ec1->ec_below_outer_join |= below_outer_join; + ec1->ec_min_security = Min(ec1->ec_min_security, + restrictinfo->security_level); + ec1->ec_max_security = Max(ec1->ec_max_security, + restrictinfo->security_level); + /* mark the RI as associated with this eclass */ + restrictinfo->left_ec = ec1; + restrictinfo->right_ec = ec1; + /* mark the RI as usable with this pair of EMs */ + restrictinfo->left_em = em1; + restrictinfo->right_em = em2; + } + else if (ec1) + { + /* Case 3: add item2 to ec1 */ + em2 = add_eq_member(ec1, item2, item2_relids, item2_nullable_relids, + false, item2_type); + ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); + ec1->ec_below_outer_join |= below_outer_join; + ec1->ec_min_security = Min(ec1->ec_min_security, + restrictinfo->security_level); + ec1->ec_max_security = Max(ec1->ec_max_security, + restrictinfo->security_level); + /* mark the RI as associated with this eclass */ + restrictinfo->left_ec = ec1; + restrictinfo->right_ec = ec1; + /* mark the RI as usable with this pair of EMs */ + restrictinfo->left_em = em1; + restrictinfo->right_em = em2; + } + else if (ec2) + { + /* Case 3: add item1 to ec2 */ + em1 = add_eq_member(ec2, item1, item1_relids, item1_nullable_relids, + false, item1_type); + ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo); + ec2->ec_below_outer_join |= below_outer_join; + ec2->ec_min_security = Min(ec2->ec_min_security, + restrictinfo->security_level); + ec2->ec_max_security = Max(ec2->ec_max_security, + restrictinfo->security_level); + /* mark the RI as associated with this eclass */ + restrictinfo->left_ec = ec2; + restrictinfo->right_ec = ec2; + /* mark the RI as usable with this pair of EMs */ + restrictinfo->left_em = em1; + restrictinfo->right_em = em2; + } + else + { + /* Case 4: make a new, two-entry EC */ + EquivalenceClass *ec = makeNode(EquivalenceClass); + + ec->ec_opfamilies = opfamilies; + ec->ec_collation = collation; + ec->ec_members = NIL; + ec->ec_sources = list_make1(restrictinfo); + ec->ec_derives = NIL; + ec->ec_relids = NULL; + ec->ec_has_const = false; + ec->ec_has_volatile = false; + ec->ec_below_outer_join = below_outer_join; + ec->ec_broken = false; + ec->ec_sortref = 0; + ec->ec_min_security = restrictinfo->security_level; + ec->ec_max_security = restrictinfo->security_level; + ec->ec_merged = NULL; + em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids, + false, item1_type); + em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids, + false, item2_type); + + root->eq_classes = lappend(root->eq_classes, ec); + + /* mark the RI as associated with this eclass */ + restrictinfo->left_ec = ec; + restrictinfo->right_ec = ec; + /* mark the RI as usable with this pair of EMs */ + restrictinfo->left_em = em1; + restrictinfo->right_em = em2; + } + + return true; +} + +/* + * canonicalize_ec_expression + * + * This function ensures that the expression exposes the expected type and + * collation, so that it will be equal() to other equivalence-class expressions + * that it ought to be equal() to. + * + * The rule for datatypes is that the exposed type should match what it would + * be for an input to an operator of the EC's opfamilies; which is usually + * the declared input type of the operator, but in the case of polymorphic + * operators no relabeling is wanted (compare the behavior of parse_coerce.c). + * Expressions coming in from quals will generally have the right type + * already, but expressions coming from indexkeys may not (because they are + * represented without any explicit relabel in pg_index), and the same problem + * occurs for sort expressions (because the parser is likewise cavalier about + * putting relabels on them). Such cases will be binary-compatible with the + * real operators, so adding a RelabelType is sufficient. + * + * Also, the expression's exposed collation must match the EC's collation. + * This is important because in comparisons like "foo < bar COLLATE baz", + * only one of the expressions has the correct exposed collation as we receive + * it from the parser. Forcing both of them to have it ensures that all + * variant spellings of such a construct behave the same. Again, we can + * stick on a RelabelType to force the right exposed collation. (It might + * work to not label the collation at all in EC members, but this is risky + * since some parts of the system expect exprCollation() to deliver the + * right answer for a sort key.) + */ +Expr * +canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation) +{ + Oid expr_type = exprType((Node *) expr); + + /* + * For a polymorphic-input-type opclass, just keep the same exposed type. + * RECORD opclasses work like polymorphic-type ones for this purpose. + */ + if (IsPolymorphicType(req_type) || req_type == RECORDOID) + req_type = expr_type; + + /* + * No work if the expression exposes the right type/collation already. + */ + if (expr_type != req_type || + exprCollation((Node *) expr) != req_collation) + { + /* + * If we have to change the type of the expression, set typmod to -1, + * since the new type may not have the same typmod interpretation. + * When we only have to change collation, preserve the exposed typmod. + */ + int32 req_typmod; + + if (expr_type != req_type) + req_typmod = -1; + else + req_typmod = exprTypmod((Node *) expr); + + /* + * Use applyRelabelType so that we preserve const-flatness. This is + * important since eval_const_expressions has already been applied. + */ + expr = (Expr *) applyRelabelType((Node *) expr, + req_type, req_typmod, req_collation, + COERCE_IMPLICIT_CAST, -1, false); + } + + return expr; +} + +/* + * add_eq_member - build a new EquivalenceMember and add it to an EC + */ +static EquivalenceMember * +add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, + Relids nullable_relids, bool is_child, Oid datatype) +{ + EquivalenceMember *em = makeNode(EquivalenceMember); + + em->em_expr = expr; + em->em_relids = relids; + em->em_nullable_relids = nullable_relids; + em->em_is_const = false; + em->em_is_child = is_child; + em->em_datatype = datatype; + + if (bms_is_empty(relids)) + { + /* + * No Vars, assume it's a pseudoconstant. This is correct for entries + * generated from process_equivalence(), because a WHERE clause can't + * contain aggregates or SRFs, and non-volatility was checked before + * process_equivalence() ever got called. But + * get_eclass_for_sort_expr() has to work harder. We put the tests + * there not here to save cycles in the equivalence case. + */ + Assert(!is_child); + em->em_is_const = true; + ec->ec_has_const = true; + /* it can't affect ec_relids */ + } + else if (!is_child) /* child members don't add to ec_relids */ + { + ec->ec_relids = bms_add_members(ec->ec_relids, relids); + } + ec->ec_members = lappend(ec->ec_members, em); + + return em; +} + + +/* + * get_eclass_for_sort_expr + * Given an expression and opfamily/collation info, find an existing + * equivalence class it is a member of; if none, optionally build a new + * single-member EquivalenceClass for it. + * + * expr is the expression, and nullable_relids is the set of base relids + * that are potentially nullable below it. We actually only care about + * the set of such relids that are used in the expression; but for caller + * convenience, we perform that intersection step here. The caller need + * only be sure that nullable_relids doesn't omit any nullable rels that + * might appear in the expr. + * + * sortref is the SortGroupRef of the originating SortGroupClause, if any, + * or zero if not. (It should never be zero if the expression is volatile!) + * + * If rel is not NULL, it identifies a specific relation we're considering + * a path for, and indicates that child EC members for that relation can be + * considered. Otherwise child members are ignored. (Note: since child EC + * members aren't guaranteed unique, a non-NULL value means that there could + * be more than one EC that matches the expression; if so it's order-dependent + * which one you get. This is annoying but it only happens in corner cases, + * so for now we live with just reporting the first match. See also + * generate_implied_equalities_for_column and match_pathkeys_to_index.) + * + * If create_it is true, we'll build a new EquivalenceClass when there is no + * match. If create_it is false, we just return NULL when no match. + * + * This can be used safely both before and after EquivalenceClass merging; + * since it never causes merging it does not invalidate any existing ECs + * or PathKeys. However, ECs added after path generation has begun are + * of limited usefulness, so usually it's best to create them beforehand. + * + * Note: opfamilies must be chosen consistently with the way + * process_equivalence() would do; that is, generated from a mergejoinable + * equality operator. Else we might fail to detect valid equivalences, + * generating poor (but not incorrect) plans. + */ +EquivalenceClass * +get_eclass_for_sort_expr(PlannerInfo *root, + Expr *expr, + Relids nullable_relids, + List *opfamilies, + Oid opcintype, + Oid collation, + Index sortref, + Relids rel, + bool create_it) +{ + Relids expr_relids; + EquivalenceClass *newec; + EquivalenceMember *newem; + ListCell *lc1; + MemoryContext oldcontext; + + /* + * Ensure the expression exposes the correct type and collation. + */ + expr = canonicalize_ec_expression(expr, opcintype, collation); + + /* + * Scan through the existing EquivalenceClasses for a match + */ + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* + * Never match to a volatile EC, except when we are looking at another + * reference to the same volatile SortGroupClause. + */ + if (cur_ec->ec_has_volatile && + (sortref == 0 || sortref != cur_ec->ec_sortref)) + continue; + + if (collation != cur_ec->ec_collation) + continue; + if (!equal(opfamilies, cur_ec->ec_opfamilies)) + continue; + + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + /* + * Ignore child members unless they match the request. + */ + if (cur_em->em_is_child && + !bms_equal(cur_em->em_relids, rel)) + continue; + + /* + * If below an outer join, don't match constants: they're not as + * constant as they look. + */ + if (cur_ec->ec_below_outer_join && + cur_em->em_is_const) + continue; + + if (opcintype == cur_em->em_datatype && + equal(expr, cur_em->em_expr)) + return cur_ec; /* Match! */ + } + } + + /* No match; does caller want a NULL result? */ + if (!create_it) + return NULL; + + /* + * OK, build a new single-member EC + * + * Here, we must be sure that we construct the EC in the right context. + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + newec = makeNode(EquivalenceClass); + newec->ec_opfamilies = list_copy(opfamilies); + newec->ec_collation = collation; + newec->ec_members = NIL; + newec->ec_sources = NIL; + newec->ec_derives = NIL; + newec->ec_relids = NULL; + newec->ec_has_const = false; + newec->ec_has_volatile = contain_volatile_functions((Node *) expr); + newec->ec_below_outer_join = false; + newec->ec_broken = false; + newec->ec_sortref = sortref; + newec->ec_min_security = UINT_MAX; + newec->ec_max_security = 0; + newec->ec_merged = NULL; + + if (newec->ec_has_volatile && sortref == 0) /* should not happen */ + elog(ERROR, "volatile EquivalenceClass has no sortref"); + + /* + * Get the precise set of nullable relids appearing in the expression. + */ + expr_relids = pull_varnos(root, (Node *) expr); + nullable_relids = bms_intersect(nullable_relids, expr_relids); + + newem = add_eq_member(newec, copyObject(expr), expr_relids, + nullable_relids, false, opcintype); + + /* + * add_eq_member doesn't check for volatile functions, set-returning + * functions, aggregates, or window functions, but such could appear in + * sort expressions; so we have to check whether its const-marking was + * correct. + */ + if (newec->ec_has_const) + { + if (newec->ec_has_volatile || + expression_returns_set((Node *) expr) || + contain_agg_clause((Node *) expr) || + contain_window_function((Node *) expr)) + { + newec->ec_has_const = false; + newem->em_is_const = false; + } + } + + root->eq_classes = lappend(root->eq_classes, newec); + + /* + * If EC merging is already complete, we have to mop up by adding the new + * EC to the eclass_indexes of the relation(s) mentioned in it. + */ + if (root->ec_merging_done) + { + int ec_index = list_length(root->eq_classes) - 1; + int i = -1; + + while ((i = bms_next_member(newec->ec_relids, i)) > 0) + { + RelOptInfo *rel = root->simple_rel_array[i]; + + Assert(rel->reloptkind == RELOPT_BASEREL || + rel->reloptkind == RELOPT_DEADREL); + + rel->eclass_indexes = bms_add_member(rel->eclass_indexes, + ec_index); + } + } + + MemoryContextSwitchTo(oldcontext); + + return newec; +} + +/* + * find_ec_member_matching_expr + * Locate an EquivalenceClass member matching the given expr, if any; + * return NULL if no match. + * + * "Matching" is defined as "equal after stripping RelabelTypes". + * This is used for identifying sort expressions, and we need to allow + * binary-compatible relabeling for some cases involving binary-compatible + * sort operators. + * + * Child EC members are ignored unless they belong to given 'relids'. + */ +EquivalenceMember * +find_ec_member_matching_expr(EquivalenceClass *ec, + Expr *expr, + Relids relids) +{ + ListCell *lc; + + /* We ignore binary-compatible relabeling on both ends */ + while (expr && IsA(expr, RelabelType)) + expr = ((RelabelType *) expr)->arg; + + foreach(lc, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc); + Expr *emexpr; + + /* + * We shouldn't be trying to sort by an equivalence class that + * contains a constant, so no need to consider such cases any further. + */ + if (em->em_is_const) + continue; + + /* + * Ignore child members unless they belong to the requested rel. + */ + if (em->em_is_child && + !bms_is_subset(em->em_relids, relids)) + continue; + + /* + * Match if same expression (after stripping relabel). + */ + emexpr = em->em_expr; + while (emexpr && IsA(emexpr, RelabelType)) + emexpr = ((RelabelType *) emexpr)->arg; + + if (equal(emexpr, expr)) + return em; + } + + return NULL; +} + +/* + * find_computable_ec_member + * Locate an EquivalenceClass member that can be computed from the + * expressions appearing in "exprs"; return NULL if no match. + * + * "exprs" can be either a list of bare expression trees, or a list of + * TargetEntry nodes. Either way, it should contain Vars and possibly + * Aggrefs and WindowFuncs, which are matched to the corresponding elements + * of the EquivalenceClass's expressions. + * + * Unlike find_ec_member_matching_expr, there's no special provision here + * for binary-compatible relabeling. This is intentional: if we have to + * compute an expression in this way, setrefs.c is going to insist on exact + * matches of Vars to the source tlist. + * + * Child EC members are ignored unless they belong to given 'relids'. + * Also, non-parallel-safe expressions are ignored if 'require_parallel_safe'. + * + * Note: some callers pass root == NULL for notational reasons. This is OK + * when require_parallel_safe is false. + */ +EquivalenceMember * +find_computable_ec_member(PlannerInfo *root, + EquivalenceClass *ec, + List *exprs, + Relids relids, + bool require_parallel_safe) +{ + ListCell *lc; + + foreach(lc, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc); + List *exprvars; + ListCell *lc2; + + /* + * We shouldn't be trying to sort by an equivalence class that + * contains a constant, so no need to consider such cases any further. + */ + if (em->em_is_const) + continue; + + /* + * Ignore child members unless they belong to the requested rel. + */ + if (em->em_is_child && + !bms_is_subset(em->em_relids, relids)) + continue; + + /* + * Match if all Vars and quasi-Vars are available in "exprs". + */ + exprvars = pull_var_clause((Node *) em->em_expr, + PVC_INCLUDE_AGGREGATES | + PVC_INCLUDE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + foreach(lc2, exprvars) + { + if (!is_exprlist_member(lfirst(lc2), exprs)) + break; + } + list_free(exprvars); + if (lc2) + continue; /* we hit a non-available Var */ + + /* + * If requested, reject expressions that are not parallel-safe. We + * check this last because it's a rather expensive test. + */ + if (require_parallel_safe && + !is_parallel_safe(root, (Node *) em->em_expr)) + continue; + + return em; /* found usable expression */ + } + + return NULL; +} + +/* + * is_exprlist_member + * Subroutine for find_computable_ec_member: is "node" in "exprs"? + * + * Per the requirements of that function, "exprs" might or might not have + * TargetEntry superstructure. + */ +static bool +is_exprlist_member(Expr *node, List *exprs) +{ + ListCell *lc; + + foreach(lc, exprs) + { + Expr *expr = (Expr *) lfirst(lc); + + if (expr && IsA(expr, TargetEntry)) + expr = ((TargetEntry *) expr)->expr; + + if (equal(node, expr)) + return true; + } + return false; +} + +/* + * Find an equivalence class member expression, all of whose Vars, come from + * the indicated relation. + */ +Expr * +find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) +{ + ListCell *lc_em; + + foreach(lc_em, ec->ec_members) + { + EquivalenceMember *em = lfirst(lc_em); + + if (bms_is_subset(em->em_relids, rel->relids) && + !bms_is_empty(em->em_relids)) + { + /* + * If there is more than one equivalence member whose Vars are + * taken entirely from this relation, we'll be content to choose + * any one of those. + */ + return em->em_expr; + } + } + + /* We didn't find any suitable equivalence class expression */ + return NULL; +} + +/* + * relation_can_be_sorted_early + * Can this relation be sorted on this EC before the final output step? + * + * To succeed, we must find an EC member that prepare_sort_from_pathkeys knows + * how to sort on, given the rel's reltarget as input. There are also a few + * additional constraints based on the fact that the desired sort will be done + * "early", within the scan/join part of the plan. Also, non-parallel-safe + * expressions are ignored if 'require_parallel_safe'. + * + * At some point we might want to return the identified EquivalenceMember, + * but for now, callers only want to know if there is one. + */ +bool +relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel, + EquivalenceClass *ec, bool require_parallel_safe) +{ + PathTarget *target = rel->reltarget; + EquivalenceMember *em; + ListCell *lc; + + /* + * Reject volatile ECs immediately; such sorts must always be postponed. + */ + if (ec->ec_has_volatile) + return false; + + /* + * Try to find an EM directly matching some reltarget member. + */ + foreach(lc, target->exprs) + { + Expr *targetexpr = (Expr *) lfirst(lc); + + em = find_ec_member_matching_expr(ec, targetexpr, rel->relids); + if (!em) + continue; + + /* + * Reject expressions involving set-returning functions, as those + * can't be computed early either. (Note: this test and the following + * one are effectively checking properties of targetexpr, so there's + * no point in asking whether some other EC member would be better.) + */ + if (expression_returns_set((Node *) em->em_expr)) + continue; + + /* + * If requested, reject expressions that are not parallel-safe. We + * check this last because it's a rather expensive test. + */ + if (require_parallel_safe && + !is_parallel_safe(root, (Node *) em->em_expr)) + continue; + + return true; + } + + /* + * Try to find a expression computable from the reltarget. + */ + em = find_computable_ec_member(root, ec, target->exprs, rel->relids, + require_parallel_safe); + if (!em) + return false; + + /* + * Reject expressions involving set-returning functions, as those can't be + * computed early either. (There's no point in looking for another EC + * member in this case; since SRFs can't appear in WHERE, they cannot + * belong to multi-member ECs.) + */ + if (expression_returns_set((Node *) em->em_expr)) + return false; + + return true; +} + +/* + * generate_base_implied_equalities + * Generate any restriction clauses that we can deduce from equivalence + * classes. + * + * When an EC contains pseudoconstants, our strategy is to generate + * "member = const1" clauses where const1 is the first constant member, for + * every other member (including other constants). If we are able to do this + * then we don't need any "var = var" comparisons because we've successfully + * constrained all the vars at their points of creation. If we fail to + * generate any of these clauses due to lack of cross-type operators, we fall + * back to the "ec_broken" strategy described below. (XXX if there are + * multiple constants of different types, it's possible that we might succeed + * in forming all the required clauses if we started from a different const + * member; but this seems a sufficiently hokey corner case to not be worth + * spending lots of cycles on.) + * + * For ECs that contain no pseudoconstants, we generate derived clauses + * "member1 = member2" for each pair of members belonging to the same base + * relation (actually, if there are more than two for the same base relation, + * we only need enough clauses to link each to each other). This provides + * the base case for the recursion: each row emitted by a base relation scan + * will constrain all computable members of the EC to be equal. As each + * join path is formed, we'll add additional derived clauses on-the-fly + * to maintain this invariant (see generate_join_implied_equalities). + * + * If the opfamilies used by the EC do not provide complete sets of cross-type + * equality operators, it is possible that we will fail to generate a clause + * that must be generated to maintain the invariant. (An example: given + * "WHERE a.x = b.y AND b.y = a.z", the scheme breaks down if we cannot + * generate "a.x = a.z" as a restriction clause for A.) In this case we mark + * the EC "ec_broken" and fall back to regurgitating its original source + * RestrictInfos at appropriate times. We do not try to retract any derived + * clauses already generated from the broken EC, so the resulting plan could + * be poor due to bad selectivity estimates caused by redundant clauses. But + * the correct solution to that is to fix the opfamilies ... + * + * Equality clauses derived by this function are passed off to + * process_implied_equality (in plan/initsplan.c) to be inserted into the + * restrictinfo datastructures. Note that this must be called after initial + * scanning of the quals and before Path construction begins. + * + * We make no attempt to avoid generating duplicate RestrictInfos here: we + * don't search ec_sources or ec_derives for matches. It doesn't really + * seem worth the trouble to do so. + */ +void +generate_base_implied_equalities(PlannerInfo *root) +{ + int ec_index; + ListCell *lc; + + /* + * At this point, we're done absorbing knowledge of equivalences in the + * query, so no further EC merging should happen, and ECs remaining in the + * eq_classes list can be considered canonical. (But note that it's still + * possible for new single-member ECs to be added through + * get_eclass_for_sort_expr().) + */ + root->ec_merging_done = true; + + ec_index = 0; + foreach(lc, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc); + bool can_generate_joinclause = false; + int i; + + Assert(ec->ec_merged == NULL); /* else shouldn't be in list */ + Assert(!ec->ec_broken); /* not yet anyway... */ + + /* + * Generate implied equalities that are restriction clauses. + * Single-member ECs won't generate any deductions, either here or at + * the join level. + */ + if (list_length(ec->ec_members) > 1) + { + if (ec->ec_has_const) + generate_base_implied_equalities_const(root, ec); + else + generate_base_implied_equalities_no_const(root, ec); + + /* Recover if we failed to generate required derived clauses */ + if (ec->ec_broken) + generate_base_implied_equalities_broken(root, ec); + + /* Detect whether this EC might generate join clauses */ + can_generate_joinclause = + (bms_membership(ec->ec_relids) == BMS_MULTIPLE); + } + + /* + * Mark the base rels cited in each eclass (which should all exist by + * now) with the eq_classes indexes of all eclasses mentioning them. + * This will let us avoid searching in subsequent lookups. While + * we're at it, we can mark base rels that have pending eclass joins; + * this is a cheap version of has_relevant_eclass_joinclause(). + */ + i = -1; + while ((i = bms_next_member(ec->ec_relids, i)) > 0) + { + RelOptInfo *rel = root->simple_rel_array[i]; + + Assert(rel->reloptkind == RELOPT_BASEREL); + + rel->eclass_indexes = bms_add_member(rel->eclass_indexes, + ec_index); + + if (can_generate_joinclause) + rel->has_eclass_joins = true; + } + + ec_index++; + } +} + +/* + * generate_base_implied_equalities when EC contains pseudoconstant(s) + */ +static void +generate_base_implied_equalities_const(PlannerInfo *root, + EquivalenceClass *ec) +{ + EquivalenceMember *const_em = NULL; + ListCell *lc; + + /* + * In the trivial case where we just had one "var = const" clause, push + * the original clause back into the main planner machinery. There is + * nothing to be gained by doing it differently, and we save the effort to + * re-build and re-analyze an equality clause that will be exactly + * equivalent to the old one. + */ + if (list_length(ec->ec_members) == 2 && + list_length(ec->ec_sources) == 1) + { + RestrictInfo *restrictinfo = (RestrictInfo *) linitial(ec->ec_sources); + + if (bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE) + { + distribute_restrictinfo_to_rels(root, restrictinfo); + return; + } + } + + /* + * Find the constant member to use. We prefer an actual constant to + * pseudo-constants (such as Params), because the constraint exclusion + * machinery might be able to exclude relations on the basis of generated + * "var = const" equalities, but "var = param" won't work for that. + */ + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + + if (cur_em->em_is_const) + { + const_em = cur_em; + if (IsA(cur_em->em_expr, Const)) + break; + } + } + Assert(const_em != NULL); + + /* Generate a derived equality against each other member */ + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + Oid eq_op; + RestrictInfo *rinfo; + + Assert(!cur_em->em_is_child); /* no children yet */ + if (cur_em == const_em) + continue; + eq_op = select_equality_operator(ec, + cur_em->em_datatype, + const_em->em_datatype); + if (!OidIsValid(eq_op)) + { + /* failed... */ + ec->ec_broken = true; + break; + } + rinfo = process_implied_equality(root, eq_op, ec->ec_collation, + cur_em->em_expr, const_em->em_expr, + bms_copy(ec->ec_relids), + bms_union(cur_em->em_nullable_relids, + const_em->em_nullable_relids), + ec->ec_min_security, + ec->ec_below_outer_join, + cur_em->em_is_const); + + /* + * If the clause didn't degenerate to a constant, fill in the correct + * markings for a mergejoinable clause, and save it in ec_derives. (We + * will not re-use such clauses directly, but selectivity estimation + * may consult the list later. Note that this use of ec_derives does + * not overlap with its use for join clauses, since we never generate + * join clauses from an ec_has_const eclass.) + */ + if (rinfo && rinfo->mergeopfamilies) + { + /* it's not redundant, so don't set parent_ec */ + rinfo->left_ec = rinfo->right_ec = ec; + rinfo->left_em = cur_em; + rinfo->right_em = const_em; + ec->ec_derives = lappend(ec->ec_derives, rinfo); + } + } +} + +/* + * generate_base_implied_equalities when EC contains no pseudoconstants + */ +static void +generate_base_implied_equalities_no_const(PlannerInfo *root, + EquivalenceClass *ec) +{ + EquivalenceMember **prev_ems; + ListCell *lc; + + /* + * We scan the EC members once and track the last-seen member for each + * base relation. When we see another member of the same base relation, + * we generate "prev_em = cur_em". This results in the minimum number of + * derived clauses, but it's possible that it will fail when a different + * ordering would succeed. XXX FIXME: use a UNION-FIND algorithm similar + * to the way we build merged ECs. (Use a list-of-lists for each rel.) + */ + prev_ems = (EquivalenceMember **) + palloc0(root->simple_rel_array_size * sizeof(EquivalenceMember *)); + + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + int relid; + + Assert(!cur_em->em_is_child); /* no children yet */ + if (!bms_get_singleton_member(cur_em->em_relids, &relid)) + continue; + Assert(relid < root->simple_rel_array_size); + + if (prev_ems[relid] != NULL) + { + EquivalenceMember *prev_em = prev_ems[relid]; + Oid eq_op; + RestrictInfo *rinfo; + + eq_op = select_equality_operator(ec, + prev_em->em_datatype, + cur_em->em_datatype); + if (!OidIsValid(eq_op)) + { + /* failed... */ + ec->ec_broken = true; + break; + } + rinfo = process_implied_equality(root, eq_op, ec->ec_collation, + prev_em->em_expr, cur_em->em_expr, + bms_copy(ec->ec_relids), + bms_union(prev_em->em_nullable_relids, + cur_em->em_nullable_relids), + ec->ec_min_security, + ec->ec_below_outer_join, + false); + + /* + * If the clause didn't degenerate to a constant, fill in the + * correct markings for a mergejoinable clause. We don't put it + * in ec_derives however; we don't currently need to re-find such + * clauses, and we don't want to clutter that list with non-join + * clauses. + */ + if (rinfo && rinfo->mergeopfamilies) + { + /* it's not redundant, so don't set parent_ec */ + rinfo->left_ec = rinfo->right_ec = ec; + rinfo->left_em = prev_em; + rinfo->right_em = cur_em; + } + } + prev_ems[relid] = cur_em; + } + + pfree(prev_ems); + + /* + * We also have to make sure that all the Vars used in the member clauses + * will be available at any join node we might try to reference them at. + * For the moment we force all the Vars to be available at all join nodes + * for this eclass. Perhaps this could be improved by doing some + * pre-analysis of which members we prefer to join, but it's no worse than + * what happened in the pre-8.3 code. + */ + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + List *vars = pull_var_clause((Node *) cur_em->em_expr, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + + add_vars_to_targetlist(root, vars, ec->ec_relids, false); + list_free(vars); + } +} + +/* + * generate_base_implied_equalities cleanup after failure + * + * What we must do here is push any zero- or one-relation source RestrictInfos + * of the EC back into the main restrictinfo datastructures. Multi-relation + * clauses will be regurgitated later by generate_join_implied_equalities(). + * (We do it this way to maintain continuity with the case that ec_broken + * becomes set only after we've gone up a join level or two.) However, for + * an EC that contains constants, we can adopt a simpler strategy and just + * throw back all the source RestrictInfos immediately; that works because + * we know that such an EC can't become broken later. (This rule justifies + * ignoring ec_has_const ECs in generate_join_implied_equalities, even when + * they are broken.) + */ +static void +generate_base_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec) +{ + ListCell *lc; + + foreach(lc, ec->ec_sources) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + if (ec->ec_has_const || + bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE) + distribute_restrictinfo_to_rels(root, restrictinfo); + } +} + + +/* + * generate_join_implied_equalities + * Generate any join clauses that we can deduce from equivalence classes. + * + * At a join node, we must enforce restriction clauses sufficient to ensure + * that all equivalence-class members computable at that node are equal. + * Since the set of clauses to enforce can vary depending on which subset + * relations are the inputs, we have to compute this afresh for each join + * relation pair. Hence a fresh List of RestrictInfo nodes is built and + * passed back on each call. + * + * In addition to its use at join nodes, this can be applied to generate + * eclass-based join clauses for use in a parameterized scan of a base rel. + * The reason for the asymmetry of specifying the inner rel as a RelOptInfo + * and the outer rel by Relids is that this usage occurs before we have + * built any join RelOptInfos. + * + * An annoying special case for parameterized scans is that the inner rel can + * be an appendrel child (an "other rel"). In this case we must generate + * appropriate clauses using child EC members. add_child_rel_equivalences + * must already have been done for the child rel. + * + * The results are sufficient for use in merge, hash, and plain nestloop join + * methods. We do not worry here about selecting clauses that are optimal + * for use in a parameterized indexscan. indxpath.c makes its own selections + * of clauses to use, and if the ones we pick here are redundant with those, + * the extras will be eliminated at createplan time, using the parent_ec + * markers that we provide (see is_redundant_derived_clause()). + * + * Because the same join clauses are likely to be needed multiple times as + * we consider different join paths, we avoid generating multiple copies: + * whenever we select a particular pair of EquivalenceMembers to join, + * we check to see if the pair matches any original clause (in ec_sources) + * or previously-built clause (in ec_derives). This saves memory and allows + * re-use of information cached in RestrictInfos. + * + * join_relids should always equal bms_union(outer_relids, inner_rel->relids). + * We could simplify this function's API by computing it internally, but in + * most current uses, the caller has the value at hand anyway. + */ +List * +generate_join_implied_equalities(PlannerInfo *root, + Relids join_relids, + Relids outer_relids, + RelOptInfo *inner_rel) +{ + List *result = NIL; + Relids inner_relids = inner_rel->relids; + Relids nominal_inner_relids; + Relids nominal_join_relids; + Bitmapset *matching_ecs; + int i; + + /* If inner rel is a child, extra setup work is needed */ + if (IS_OTHER_REL(inner_rel)) + { + Assert(!bms_is_empty(inner_rel->top_parent_relids)); + + /* Fetch relid set for the topmost parent rel */ + nominal_inner_relids = inner_rel->top_parent_relids; + /* ECs will be marked with the parent's relid, not the child's */ + nominal_join_relids = bms_union(outer_relids, nominal_inner_relids); + } + else + { + nominal_inner_relids = inner_relids; + nominal_join_relids = join_relids; + } + + /* + * Get all eclasses that mention both inner and outer sides of the join + */ + matching_ecs = get_common_eclass_indexes(root, nominal_inner_relids, + outer_relids); + + i = -1; + while ((i = bms_next_member(matching_ecs, i)) >= 0) + { + EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i); + List *sublist = NIL; + + /* ECs containing consts do not need any further enforcement */ + if (ec->ec_has_const) + continue; + + /* Single-member ECs won't generate any deductions */ + if (list_length(ec->ec_members) <= 1) + continue; + + /* Sanity check that this eclass overlaps the join */ + Assert(bms_overlap(ec->ec_relids, nominal_join_relids)); + + if (!ec->ec_broken) + sublist = generate_join_implied_equalities_normal(root, + ec, + join_relids, + outer_relids, + inner_relids); + + /* Recover if we failed to generate required derived clauses */ + if (ec->ec_broken) + sublist = generate_join_implied_equalities_broken(root, + ec, + nominal_join_relids, + outer_relids, + nominal_inner_relids, + inner_rel); + + result = list_concat(result, sublist); + } + + return result; +} + +/* + * generate_join_implied_equalities_for_ecs + * As above, but consider only the listed ECs. + */ +List * +generate_join_implied_equalities_for_ecs(PlannerInfo *root, + List *eclasses, + Relids join_relids, + Relids outer_relids, + RelOptInfo *inner_rel) +{ + List *result = NIL; + Relids inner_relids = inner_rel->relids; + Relids nominal_inner_relids; + Relids nominal_join_relids; + ListCell *lc; + + /* If inner rel is a child, extra setup work is needed */ + if (IS_OTHER_REL(inner_rel)) + { + Assert(!bms_is_empty(inner_rel->top_parent_relids)); + + /* Fetch relid set for the topmost parent rel */ + nominal_inner_relids = inner_rel->top_parent_relids; + /* ECs will be marked with the parent's relid, not the child's */ + nominal_join_relids = bms_union(outer_relids, nominal_inner_relids); + } + else + { + nominal_inner_relids = inner_relids; + nominal_join_relids = join_relids; + } + + foreach(lc, eclasses) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc); + List *sublist = NIL; + + /* ECs containing consts do not need any further enforcement */ + if (ec->ec_has_const) + continue; + + /* Single-member ECs won't generate any deductions */ + if (list_length(ec->ec_members) <= 1) + continue; + + /* We can quickly ignore any that don't overlap the join, too */ + if (!bms_overlap(ec->ec_relids, nominal_join_relids)) + continue; + + if (!ec->ec_broken) + sublist = generate_join_implied_equalities_normal(root, + ec, + join_relids, + outer_relids, + inner_relids); + + /* Recover if we failed to generate required derived clauses */ + if (ec->ec_broken) + sublist = generate_join_implied_equalities_broken(root, + ec, + nominal_join_relids, + outer_relids, + nominal_inner_relids, + inner_rel); + + result = list_concat(result, sublist); + } + + return result; +} + +/* + * generate_join_implied_equalities for a still-valid EC + */ +static List * +generate_join_implied_equalities_normal(PlannerInfo *root, + EquivalenceClass *ec, + Relids join_relids, + Relids outer_relids, + Relids inner_relids) +{ + List *result = NIL; + List *new_members = NIL; + List *outer_members = NIL; + List *inner_members = NIL; + ListCell *lc1; + + /* + * First, scan the EC to identify member values that are computable at the + * outer rel, at the inner rel, or at this relation but not in either + * input rel. The outer-rel members should already be enforced equal, + * likewise for the inner-rel members. We'll need to create clauses to + * enforce that any newly computable members are all equal to each other + * as well as to at least one input member, plus enforce at least one + * outer-rel member equal to at least one inner-rel member. + */ + foreach(lc1, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1); + + /* + * We don't need to check explicitly for child EC members. This test + * against join_relids will cause them to be ignored except when + * considering a child inner rel, which is what we want. + */ + if (!bms_is_subset(cur_em->em_relids, join_relids)) + continue; /* not computable yet, or wrong child */ + + if (bms_is_subset(cur_em->em_relids, outer_relids)) + outer_members = lappend(outer_members, cur_em); + else if (bms_is_subset(cur_em->em_relids, inner_relids)) + inner_members = lappend(inner_members, cur_em); + else + new_members = lappend(new_members, cur_em); + } + + /* + * First, select the joinclause if needed. We can equate any one outer + * member to any one inner member, but we have to find a datatype + * combination for which an opfamily member operator exists. If we have + * choices, we prefer simple Var members (possibly with RelabelType) since + * these are (a) cheapest to compute at runtime and (b) most likely to + * have useful statistics. Also, prefer operators that are also + * hashjoinable. + */ + if (outer_members && inner_members) + { + EquivalenceMember *best_outer_em = NULL; + EquivalenceMember *best_inner_em = NULL; + Oid best_eq_op = InvalidOid; + int best_score = -1; + RestrictInfo *rinfo; + + foreach(lc1, outer_members) + { + EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc1); + ListCell *lc2; + + foreach(lc2, inner_members) + { + EquivalenceMember *inner_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + int score; + + eq_op = select_equality_operator(ec, + outer_em->em_datatype, + inner_em->em_datatype); + if (!OidIsValid(eq_op)) + continue; + score = 0; + if (IsA(outer_em->em_expr, Var) || + (IsA(outer_em->em_expr, RelabelType) && + IsA(((RelabelType *) outer_em->em_expr)->arg, Var))) + score++; + if (IsA(inner_em->em_expr, Var) || + (IsA(inner_em->em_expr, RelabelType) && + IsA(((RelabelType *) inner_em->em_expr)->arg, Var))) + score++; + if (op_hashjoinable(eq_op, + exprType((Node *) outer_em->em_expr))) + score++; + if (score > best_score) + { + best_outer_em = outer_em; + best_inner_em = inner_em; + best_eq_op = eq_op; + best_score = score; + if (best_score == 3) + break; /* no need to look further */ + } + } + if (best_score == 3) + break; /* no need to look further */ + } + if (best_score < 0) + { + /* failed... */ + ec->ec_broken = true; + return NIL; + } + + /* + * Create clause, setting parent_ec to mark it as redundant with other + * joinclauses + */ + rinfo = create_join_clause(root, ec, best_eq_op, + best_outer_em, best_inner_em, + ec); + + result = lappend(result, rinfo); + } + + /* + * Now deal with building restrictions for any expressions that involve + * Vars from both sides of the join. We have to equate all of these to + * each other as well as to at least one old member (if any). + * + * XXX as in generate_base_implied_equalities_no_const, we could be a lot + * smarter here to avoid unnecessary failures in cross-type situations. + * For now, use the same left-to-right method used there. + */ + if (new_members) + { + List *old_members = list_concat(outer_members, inner_members); + EquivalenceMember *prev_em = NULL; + RestrictInfo *rinfo; + + /* For now, arbitrarily take the first old_member as the one to use */ + if (old_members) + new_members = lappend(new_members, linitial(old_members)); + + foreach(lc1, new_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1); + + if (prev_em != NULL) + { + Oid eq_op; + + eq_op = select_equality_operator(ec, + prev_em->em_datatype, + cur_em->em_datatype); + if (!OidIsValid(eq_op)) + { + /* failed... */ + ec->ec_broken = true; + return NIL; + } + /* do NOT set parent_ec, this qual is not redundant! */ + rinfo = create_join_clause(root, ec, eq_op, + prev_em, cur_em, + NULL); + + result = lappend(result, rinfo); + } + prev_em = cur_em; + } + } + + return result; +} + +/* + * generate_join_implied_equalities cleanup after failure + * + * Return any original RestrictInfos that are enforceable at this join. + * + * In the case of a child inner relation, we have to translate the + * original RestrictInfos from parent to child Vars. + */ +static List * +generate_join_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec, + Relids nominal_join_relids, + Relids outer_relids, + Relids nominal_inner_relids, + RelOptInfo *inner_rel) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, ec->ec_sources) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + Relids clause_relids = restrictinfo->required_relids; + + if (bms_is_subset(clause_relids, nominal_join_relids) && + !bms_is_subset(clause_relids, outer_relids) && + !bms_is_subset(clause_relids, nominal_inner_relids)) + result = lappend(result, restrictinfo); + } + + /* + * If we have to translate, just brute-force apply adjust_appendrel_attrs + * to all the RestrictInfos at once. This will result in returning + * RestrictInfos that are not listed in ec_derives, but there shouldn't be + * any duplication, and it's a sufficiently narrow corner case that we + * shouldn't sweat too much over it anyway. + * + * Since inner_rel might be an indirect descendant of the baserel + * mentioned in the ec_sources clauses, we have to be prepared to apply + * multiple levels of Var translation. + */ + if (IS_OTHER_REL(inner_rel) && result != NIL) + result = (List *) adjust_appendrel_attrs_multilevel(root, + (Node *) result, + inner_rel->relids, + inner_rel->top_parent_relids); + + return result; +} + + +/* + * select_equality_operator + * Select a suitable equality operator for comparing two EC members + * + * Returns InvalidOid if no operator can be found for this datatype combination + */ +static Oid +select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype) +{ + ListCell *lc; + + foreach(lc, ec->ec_opfamilies) + { + Oid opfamily = lfirst_oid(lc); + Oid opno; + + opno = get_opfamily_member(opfamily, lefttype, righttype, + BTEqualStrategyNumber); + if (!OidIsValid(opno)) + continue; + /* If no barrier quals in query, don't worry about leaky operators */ + if (ec->ec_max_security == 0) + return opno; + /* Otherwise, insist that selected operators be leakproof */ + if (get_func_leakproof(get_opcode(opno))) + return opno; + } + return InvalidOid; +} + + +/* + * create_join_clause + * Find or make a RestrictInfo comparing the two given EC members + * with the given operator. + * + * parent_ec is either equal to ec (if the clause is a potentially-redundant + * join clause) or NULL (if not). We have to treat this as part of the + * match requirements --- it's possible that a clause comparing the same two + * EMs is a join clause in one join path and a restriction clause in another. + */ +static RestrictInfo * +create_join_clause(PlannerInfo *root, + EquivalenceClass *ec, Oid opno, + EquivalenceMember *leftem, + EquivalenceMember *rightem, + EquivalenceClass *parent_ec) +{ + RestrictInfo *rinfo; + ListCell *lc; + MemoryContext oldcontext; + + /* + * Search to see if we already built a RestrictInfo for this pair of + * EquivalenceMembers. We can use either original source clauses or + * previously-derived clauses. The check on opno is probably redundant, + * but be safe ... + */ + foreach(lc, ec->ec_sources) + { + rinfo = (RestrictInfo *) lfirst(lc); + if (rinfo->left_em == leftem && + rinfo->right_em == rightem && + rinfo->parent_ec == parent_ec && + opno == ((OpExpr *) rinfo->clause)->opno) + return rinfo; + } + + foreach(lc, ec->ec_derives) + { + rinfo = (RestrictInfo *) lfirst(lc); + if (rinfo->left_em == leftem && + rinfo->right_em == rightem && + rinfo->parent_ec == parent_ec && + opno == ((OpExpr *) rinfo->clause)->opno) + return rinfo; + } + + /* + * Not there, so build it, in planner context so we can re-use it. (Not + * important in normal planning, but definitely so in GEQO.) + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + rinfo = build_implied_join_equality(root, + opno, + ec->ec_collation, + leftem->em_expr, + rightem->em_expr, + bms_union(leftem->em_relids, + rightem->em_relids), + bms_union(leftem->em_nullable_relids, + rightem->em_nullable_relids), + ec->ec_min_security); + + /* Mark the clause as redundant, or not */ + rinfo->parent_ec = parent_ec; + + /* + * We know the correct values for left_ec/right_ec, ie this particular EC, + * so we can just set them directly instead of forcing another lookup. + */ + rinfo->left_ec = ec; + rinfo->right_ec = ec; + + /* Mark it as usable with these EMs */ + rinfo->left_em = leftem; + rinfo->right_em = rightem; + /* and save it for possible re-use */ + ec->ec_derives = lappend(ec->ec_derives, rinfo); + + MemoryContextSwitchTo(oldcontext); + + return rinfo; +} + + +/* + * reconsider_outer_join_clauses + * Re-examine any outer-join clauses that were set aside by + * distribute_qual_to_rels(), and see if we can derive any + * EquivalenceClasses from them. Then, if they were not made + * redundant, push them out into the regular join-clause lists. + * + * When we have mergejoinable clauses A = B that are outer-join clauses, + * we can't blindly combine them with other clauses A = C to deduce B = C, + * since in fact the "equality" A = B won't necessarily hold above the + * outer join (one of the variables might be NULL instead). Nonetheless + * there are cases where we can add qual clauses using transitivity. + * + * One case that we look for here is an outer-join clause OUTERVAR = INNERVAR + * for which there is also an equivalence clause OUTERVAR = CONSTANT. + * It is safe and useful to push a clause INNERVAR = CONSTANT into the + * evaluation of the inner (nullable) relation, because any inner rows not + * meeting this condition will not contribute to the outer-join result anyway. + * (Any outer rows they could join to will be eliminated by the pushed-down + * equivalence clause.) + * + * Note that the above rule does not work for full outer joins; nor is it + * very interesting to consider cases where the generated equivalence clause + * would involve relations outside the outer join, since such clauses couldn't + * be pushed into the inner side's scan anyway. So the restriction to + * outervar = pseudoconstant is not really giving up anything. + * + * For full-join cases, we can only do something useful if it's a FULL JOIN + * USING and a merged column has an equivalence MERGEDVAR = CONSTANT. + * By the time it gets here, the merged column will look like + * COALESCE(LEFTVAR, RIGHTVAR) + * and we will have a full-join clause LEFTVAR = RIGHTVAR that we can match + * the COALESCE expression to. In this situation we can push LEFTVAR = CONSTANT + * and RIGHTVAR = CONSTANT into the input relations, since any rows not + * meeting these conditions cannot contribute to the join result. + * + * Again, there isn't any traction to be gained by trying to deal with + * clauses comparing a mergedvar to a non-pseudoconstant. So we can make + * use of the EquivalenceClasses to search for matching variables that were + * equivalenced to constants. The interesting outer-join clauses were + * accumulated for us by distribute_qual_to_rels. + * + * When we find one of these cases, we implement the changes we want by + * generating a new equivalence clause INNERVAR = CONSTANT (or LEFTVAR, etc) + * and pushing it into the EquivalenceClass structures. This is because we + * may already know that INNERVAR is equivalenced to some other var(s), and + * we'd like the constant to propagate to them too. Note that it would be + * unsafe to merge any existing EC for INNERVAR with the OUTERVAR's EC --- + * that could result in propagating constant restrictions from + * INNERVAR to OUTERVAR, which would be very wrong. + * + * It's possible that the INNERVAR is also an OUTERVAR for some other + * outer-join clause, in which case the process can be repeated. So we repeat + * looping over the lists of clauses until no further deductions can be made. + * Whenever we do make a deduction, we remove the generating clause from the + * lists, since we don't want to make the same deduction twice. + * + * If we don't find any match for a set-aside outer join clause, we must + * throw it back into the regular joinclause processing by passing it to + * distribute_restrictinfo_to_rels(). If we do generate a derived clause, + * however, the outer-join clause is redundant. We still throw it back, + * because otherwise the join will be seen as a clauseless join and avoided + * during join order searching; but we mark it as redundant to keep from + * messing up the joinrel's size estimate. (This behavior means that the + * API for this routine is uselessly complex: we could have just put all + * the clauses into the regular processing initially. We keep it because + * someday we might want to do something else, such as inserting "dummy" + * joinclauses instead of real ones.) + * + * Outer join clauses that are marked outerjoin_delayed are special: this + * condition means that one or both VARs might go to null due to a lower + * outer join. We can still push a constant through the clause, but only + * if its operator is strict; and we *have to* throw the clause back into + * regular joinclause processing. By keeping the strict join clause, + * we ensure that any null-extended rows that are mistakenly generated due + * to suppressing rows not matching the constant will be rejected at the + * upper outer join. (This doesn't work for full-join clauses.) + */ +void +reconsider_outer_join_clauses(PlannerInfo *root) +{ + bool found; + ListCell *cell; + + /* Outer loop repeats until we find no more deductions */ + do + { + found = false; + + /* Process the LEFT JOIN clauses */ + foreach(cell, root->left_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + if (reconsider_outer_join_clause(root, rinfo, true)) + { + found = true; + /* remove it from the list */ + root->left_join_clauses = + foreach_delete_current(root->left_join_clauses, cell); + /* we throw it back anyway (see notes above) */ + /* but the thrown-back clause has no extra selectivity */ + rinfo->norm_selec = 2.0; + rinfo->outer_selec = 1.0; + distribute_restrictinfo_to_rels(root, rinfo); + } + } + + /* Process the RIGHT JOIN clauses */ + foreach(cell, root->right_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + if (reconsider_outer_join_clause(root, rinfo, false)) + { + found = true; + /* remove it from the list */ + root->right_join_clauses = + foreach_delete_current(root->right_join_clauses, cell); + /* we throw it back anyway (see notes above) */ + /* but the thrown-back clause has no extra selectivity */ + rinfo->norm_selec = 2.0; + rinfo->outer_selec = 1.0; + distribute_restrictinfo_to_rels(root, rinfo); + } + } + + /* Process the FULL JOIN clauses */ + foreach(cell, root->full_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + if (reconsider_full_join_clause(root, rinfo)) + { + found = true; + /* remove it from the list */ + root->full_join_clauses = + foreach_delete_current(root->full_join_clauses, cell); + /* we throw it back anyway (see notes above) */ + /* but the thrown-back clause has no extra selectivity */ + rinfo->norm_selec = 2.0; + rinfo->outer_selec = 1.0; + distribute_restrictinfo_to_rels(root, rinfo); + } + } + } while (found); + + /* Now, any remaining clauses have to be thrown back */ + foreach(cell, root->left_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + distribute_restrictinfo_to_rels(root, rinfo); + } + foreach(cell, root->right_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + distribute_restrictinfo_to_rels(root, rinfo); + } + foreach(cell, root->full_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + distribute_restrictinfo_to_rels(root, rinfo); + } +} + +/* + * reconsider_outer_join_clauses for a single LEFT/RIGHT JOIN clause + * + * Returns true if we were able to propagate a constant through the clause. + */ +static bool +reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, + bool outer_on_left) +{ + Expr *outervar, + *innervar; + Oid opno, + collation, + left_type, + right_type, + inner_datatype; + Relids inner_relids, + inner_nullable_relids; + ListCell *lc1; + + Assert(is_opclause(rinfo->clause)); + opno = ((OpExpr *) rinfo->clause)->opno; + collation = ((OpExpr *) rinfo->clause)->inputcollid; + + /* If clause is outerjoin_delayed, operator must be strict */ + if (rinfo->outerjoin_delayed && !op_strict(opno)) + return false; + + /* Extract needed info from the clause */ + op_input_types(opno, &left_type, &right_type); + if (outer_on_left) + { + outervar = (Expr *) get_leftop(rinfo->clause); + innervar = (Expr *) get_rightop(rinfo->clause); + inner_datatype = right_type; + inner_relids = rinfo->right_relids; + } + else + { + outervar = (Expr *) get_rightop(rinfo->clause); + innervar = (Expr *) get_leftop(rinfo->clause); + inner_datatype = left_type; + inner_relids = rinfo->left_relids; + } + inner_nullable_relids = bms_intersect(inner_relids, + rinfo->nullable_relids); + + /* Scan EquivalenceClasses for a match to outervar */ + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + bool match; + ListCell *lc2; + + /* Ignore EC unless it contains pseudoconstants */ + if (!cur_ec->ec_has_const) + continue; + /* Never match to a volatile EC */ + if (cur_ec->ec_has_volatile) + continue; + /* It has to match the outer-join clause as to semantics, too */ + if (collation != cur_ec->ec_collation) + continue; + if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies)) + continue; + /* Does it contain a match to outervar? */ + match = false; + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + Assert(!cur_em->em_is_child); /* no children yet */ + if (equal(outervar, cur_em->em_expr)) + { + match = true; + break; + } + } + if (!match) + continue; /* no match, so ignore this EC */ + + /* + * Yes it does! Try to generate a clause INNERVAR = CONSTANT for each + * CONSTANT in the EC. Note that we must succeed with at least one + * constant before we can decide to throw away the outer-join clause. + */ + match = false; + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + RestrictInfo *newrinfo; + + if (!cur_em->em_is_const) + continue; /* ignore non-const members */ + eq_op = select_equality_operator(cur_ec, + inner_datatype, + cur_em->em_datatype); + if (!OidIsValid(eq_op)) + continue; /* can't generate equality */ + newrinfo = build_implied_join_equality(root, + eq_op, + cur_ec->ec_collation, + innervar, + cur_em->em_expr, + bms_copy(inner_relids), + bms_copy(inner_nullable_relids), + cur_ec->ec_min_security); + if (process_equivalence(root, &newrinfo, true)) + match = true; + } + + /* + * If we were able to equate INNERVAR to any constant, report success. + * Otherwise, fall out of the search loop, since we know the OUTERVAR + * appears in at most one EC. + */ + if (match) + return true; + else + break; + } + + return false; /* failed to make any deduction */ +} + +/* + * reconsider_outer_join_clauses for a single FULL JOIN clause + * + * Returns true if we were able to propagate a constant through the clause. + */ +static bool +reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) +{ + Expr *leftvar; + Expr *rightvar; + Oid opno, + collation, + left_type, + right_type; + Relids left_relids, + right_relids, + left_nullable_relids, + right_nullable_relids; + ListCell *lc1; + + /* Can't use an outerjoin_delayed clause here */ + if (rinfo->outerjoin_delayed) + return false; + + /* Extract needed info from the clause */ + Assert(is_opclause(rinfo->clause)); + opno = ((OpExpr *) rinfo->clause)->opno; + collation = ((OpExpr *) rinfo->clause)->inputcollid; + op_input_types(opno, &left_type, &right_type); + leftvar = (Expr *) get_leftop(rinfo->clause); + rightvar = (Expr *) get_rightop(rinfo->clause); + left_relids = rinfo->left_relids; + right_relids = rinfo->right_relids; + left_nullable_relids = bms_intersect(left_relids, + rinfo->nullable_relids); + right_nullable_relids = bms_intersect(right_relids, + rinfo->nullable_relids); + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + EquivalenceMember *coal_em = NULL; + bool match; + bool matchleft; + bool matchright; + ListCell *lc2; + int coal_idx = -1; + + /* Ignore EC unless it contains pseudoconstants */ + if (!cur_ec->ec_has_const) + continue; + /* Never match to a volatile EC */ + if (cur_ec->ec_has_volatile) + continue; + /* It has to match the outer-join clause as to semantics, too */ + if (collation != cur_ec->ec_collation) + continue; + if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies)) + continue; + + /* + * Does it contain a COALESCE(leftvar, rightvar) construct? + * + * We can assume the COALESCE() inputs are in the same order as the + * join clause, since both were automatically generated in the cases + * we care about. + * + * XXX currently this may fail to match in cross-type cases because + * the COALESCE will contain typecast operations while the join clause + * may not (if there is a cross-type mergejoin operator available for + * the two column types). Is it OK to strip implicit coercions from + * the COALESCE arguments? + */ + match = false; + foreach(lc2, cur_ec->ec_members) + { + coal_em = (EquivalenceMember *) lfirst(lc2); + Assert(!coal_em->em_is_child); /* no children yet */ + if (IsA(coal_em->em_expr, CoalesceExpr)) + { + CoalesceExpr *cexpr = (CoalesceExpr *) coal_em->em_expr; + Node *cfirst; + Node *csecond; + + if (list_length(cexpr->args) != 2) + continue; + cfirst = (Node *) linitial(cexpr->args); + csecond = (Node *) lsecond(cexpr->args); + + if (equal(leftvar, cfirst) && equal(rightvar, csecond)) + { + coal_idx = foreach_current_index(lc2); + match = true; + break; + } + } + } + if (!match) + continue; /* no match, so ignore this EC */ + + /* + * Yes it does! Try to generate clauses LEFTVAR = CONSTANT and + * RIGHTVAR = CONSTANT for each CONSTANT in the EC. Note that we must + * succeed with at least one constant for each var before we can + * decide to throw away the outer-join clause. + */ + matchleft = matchright = false; + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + RestrictInfo *newrinfo; + + if (!cur_em->em_is_const) + continue; /* ignore non-const members */ + eq_op = select_equality_operator(cur_ec, + left_type, + cur_em->em_datatype); + if (OidIsValid(eq_op)) + { + newrinfo = build_implied_join_equality(root, + eq_op, + cur_ec->ec_collation, + leftvar, + cur_em->em_expr, + bms_copy(left_relids), + bms_copy(left_nullable_relids), + cur_ec->ec_min_security); + if (process_equivalence(root, &newrinfo, true)) + matchleft = true; + } + eq_op = select_equality_operator(cur_ec, + right_type, + cur_em->em_datatype); + if (OidIsValid(eq_op)) + { + newrinfo = build_implied_join_equality(root, + eq_op, + cur_ec->ec_collation, + rightvar, + cur_em->em_expr, + bms_copy(right_relids), + bms_copy(right_nullable_relids), + cur_ec->ec_min_security); + if (process_equivalence(root, &newrinfo, true)) + matchright = true; + } + } + + /* + * If we were able to equate both vars to constants, we're done, and + * we can throw away the full-join clause as redundant. Moreover, we + * can remove the COALESCE entry from the EC, since the added + * restrictions ensure it will always have the expected value. (We + * don't bother trying to update ec_relids or ec_sources.) + */ + if (matchleft && matchright) + { + cur_ec->ec_members = list_delete_nth_cell(cur_ec->ec_members, coal_idx); + return true; + } + + /* + * Otherwise, fall out of the search loop, since we know the COALESCE + * appears in at most one EC (XXX might stop being true if we allow + * stripping of coercions above?) + */ + break; + } + + return false; /* failed to make any deduction */ +} + + +/* + * exprs_known_equal + * Detect whether two expressions are known equal due to equivalence + * relationships. + * + * Actually, this only shows that the expressions are equal according + * to some opfamily's notion of equality --- but we only use it for + * selectivity estimation, so a fuzzy idea of equality is OK. + * + * Note: does not bother to check for "equal(item1, item2)"; caller must + * check that case if it's possible to pass identical items. + */ +bool +exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2) +{ + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1); + bool item1member = false; + bool item2member = false; + ListCell *lc2; + + /* Never match to a volatile EC */ + if (ec->ec_has_volatile) + continue; + + foreach(lc2, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); + + if (em->em_is_child) + continue; /* ignore children here */ + if (equal(item1, em->em_expr)) + item1member = true; + else if (equal(item2, em->em_expr)) + item2member = true; + /* Exit as soon as equality is proven */ + if (item1member && item2member) + return true; + } + } + return false; +} + + +/* + * match_eclasses_to_foreign_key_col + * See whether a foreign key column match is proven by any eclass. + * + * If the referenced and referencing Vars of the fkey's colno'th column are + * known equal due to any eclass, return that eclass; otherwise return NULL. + * (In principle there might be more than one matching eclass if multiple + * collations are involved, but since collation doesn't matter for equality, + * we ignore that fine point here.) This is much like exprs_known_equal, + * except that we insist on the comparison operator matching the eclass, so + * that the result is definite not approximate. + * + * On success, we also set fkinfo->eclass[colno] to the matching eclass, + * and set fkinfo->fk_eclass_member[colno] to the eclass member for the + * referencing Var. + */ +EquivalenceClass * +match_eclasses_to_foreign_key_col(PlannerInfo *root, + ForeignKeyOptInfo *fkinfo, + int colno) +{ + Index var1varno = fkinfo->con_relid; + AttrNumber var1attno = fkinfo->conkey[colno]; + Index var2varno = fkinfo->ref_relid; + AttrNumber var2attno = fkinfo->confkey[colno]; + Oid eqop = fkinfo->conpfeqop[colno]; + RelOptInfo *rel1 = root->simple_rel_array[var1varno]; + RelOptInfo *rel2 = root->simple_rel_array[var2varno]; + List *opfamilies = NIL; /* compute only if needed */ + Bitmapset *matching_ecs; + int i; + + /* Consider only eclasses mentioning both relations */ + Assert(root->ec_merging_done); + Assert(IS_SIMPLE_REL(rel1)); + Assert(IS_SIMPLE_REL(rel2)); + matching_ecs = bms_intersect(rel1->eclass_indexes, + rel2->eclass_indexes); + + i = -1; + while ((i = bms_next_member(matching_ecs, i)) >= 0) + { + EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, + i); + EquivalenceMember *item1_em = NULL; + EquivalenceMember *item2_em = NULL; + ListCell *lc2; + + /* Never match to a volatile EC */ + if (ec->ec_has_volatile) + continue; + /* Note: it seems okay to match to "broken" eclasses here */ + + foreach(lc2, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); + Var *var; + + if (em->em_is_child) + continue; /* ignore children here */ + + /* EM must be a Var, possibly with RelabelType */ + var = (Var *) em->em_expr; + while (var && IsA(var, RelabelType)) + var = (Var *) ((RelabelType *) var)->arg; + if (!(var && IsA(var, Var))) + continue; + + /* Match? */ + if (var->varno == var1varno && var->varattno == var1attno) + item1_em = em; + else if (var->varno == var2varno && var->varattno == var2attno) + item2_em = em; + + /* Have we found both PK and FK column in this EC? */ + if (item1_em && item2_em) + { + /* + * Succeed if eqop matches EC's opfamilies. We could test + * this before scanning the members, but it's probably cheaper + * to test for member matches first. + */ + if (opfamilies == NIL) /* compute if we didn't already */ + opfamilies = get_mergejoin_opfamilies(eqop); + if (equal(opfamilies, ec->ec_opfamilies)) + { + fkinfo->eclass[colno] = ec; + fkinfo->fk_eclass_member[colno] = item2_em; + return ec; + } + /* Otherwise, done with this EC, move on to the next */ + break; + } + } + } + return NULL; +} + +/* + * find_derived_clause_for_ec_member + * Search for a previously-derived clause mentioning the given EM. + * + * The eclass should be an ec_has_const EC, of which the EM is a non-const + * member. This should ensure there is just one derived clause mentioning + * the EM (and equating it to a constant). + * Returns NULL if no such clause can be found. + */ +RestrictInfo * +find_derived_clause_for_ec_member(EquivalenceClass *ec, + EquivalenceMember *em) +{ + ListCell *lc; + + Assert(ec->ec_has_const); + Assert(!em->em_is_const); + foreach(lc, ec->ec_derives) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* + * generate_base_implied_equalities_const will have put non-const + * members on the left side of derived clauses. + */ + if (rinfo->left_em == em) + return rinfo; + } + return NULL; +} + + +/* + * add_child_rel_equivalences + * Search for EC members that reference the root parent of child_rel, and + * add transformed members referencing the child_rel. + * + * Note that this function won't be called at all unless we have at least some + * reason to believe that the EC members it generates will be useful. + * + * parent_rel and child_rel could be derived from appinfo, but since the + * caller has already computed them, we might as well just pass them in. + * + * The passed-in AppendRelInfo is not used when the parent_rel is not a + * top-level baserel, since it shows the mapping from the parent_rel but + * we need to translate EC expressions that refer to the top-level parent. + * Using it is faster than using adjust_appendrel_attrs_multilevel(), though, + * so we prefer it when we can. + */ +void +add_child_rel_equivalences(PlannerInfo *root, + AppendRelInfo *appinfo, + RelOptInfo *parent_rel, + RelOptInfo *child_rel) +{ + Relids top_parent_relids = child_rel->top_parent_relids; + Relids child_relids = child_rel->relids; + int i; + + /* + * EC merging should be complete already, so we can use the parent rel's + * eclass_indexes to avoid searching all of root->eq_classes. + */ + Assert(root->ec_merging_done); + Assert(IS_SIMPLE_REL(parent_rel)); + + i = -1; + while ((i = bms_next_member(parent_rel->eclass_indexes, i)) >= 0) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) list_nth(root->eq_classes, i); + int num_members; + + /* + * If this EC contains a volatile expression, then generating child + * EMs would be downright dangerous, so skip it. We rely on a + * volatile EC having only one EM. + */ + if (cur_ec->ec_has_volatile) + continue; + + /* Sanity check eclass_indexes only contain ECs for parent_rel */ + Assert(bms_is_subset(top_parent_relids, cur_ec->ec_relids)); + + /* + * We don't use foreach() here because there's no point in scanning + * newly-added child members, so we can stop after the last + * pre-existing EC member. + */ + num_members = list_length(cur_ec->ec_members); + for (int pos = 0; pos < num_members; pos++) + { + EquivalenceMember *cur_em = (EquivalenceMember *) list_nth(cur_ec->ec_members, pos); + + if (cur_em->em_is_const) + continue; /* ignore consts here */ + + /* + * We consider only original EC members here, not + * already-transformed child members. Otherwise, if some original + * member expression references more than one appendrel, we'd get + * an O(N^2) explosion of useless derived expressions for + * combinations of children. (But add_child_join_rel_equivalences + * may add targeted combinations for partitionwise-join purposes.) + */ + if (cur_em->em_is_child) + continue; /* ignore children here */ + + /* Does this member reference child's topmost parent rel? */ + if (bms_overlap(cur_em->em_relids, top_parent_relids)) + { + /* Yes, generate transformed child version */ + Expr *child_expr; + Relids new_relids; + Relids new_nullable_relids; + + if (parent_rel->reloptkind == RELOPT_BASEREL) + { + /* Simple single-level transformation */ + child_expr = (Expr *) + adjust_appendrel_attrs(root, + (Node *) cur_em->em_expr, + 1, &appinfo); + } + else + { + /* Must do multi-level transformation */ + child_expr = (Expr *) + adjust_appendrel_attrs_multilevel(root, + (Node *) cur_em->em_expr, + child_relids, + top_parent_relids); + } + + /* + * Transform em_relids to match. Note we do *not* do + * pull_varnos(child_expr) here, as for example the + * transformation might have substituted a constant, but we + * don't want the child member to be marked as constant. + */ + new_relids = bms_difference(cur_em->em_relids, + top_parent_relids); + new_relids = bms_add_members(new_relids, child_relids); + + /* + * And likewise for nullable_relids. Note this code assumes + * parent and child relids are singletons. + */ + new_nullable_relids = cur_em->em_nullable_relids; + if (bms_overlap(new_nullable_relids, top_parent_relids)) + { + new_nullable_relids = bms_difference(new_nullable_relids, + top_parent_relids); + new_nullable_relids = bms_add_members(new_nullable_relids, + child_relids); + } + + (void) add_eq_member(cur_ec, child_expr, + new_relids, new_nullable_relids, + true, cur_em->em_datatype); + + /* Record this EC index for the child rel */ + child_rel->eclass_indexes = bms_add_member(child_rel->eclass_indexes, i); + } + } + } +} + +/* + * add_child_join_rel_equivalences + * Like add_child_rel_equivalences(), but for joinrels + * + * Here we find the ECs relevant to the top parent joinrel and add transformed + * member expressions that refer to this child joinrel. + * + * Note that this function won't be called at all unless we have at least some + * reason to believe that the EC members it generates will be useful. + */ +void +add_child_join_rel_equivalences(PlannerInfo *root, + int nappinfos, AppendRelInfo **appinfos, + RelOptInfo *parent_joinrel, + RelOptInfo *child_joinrel) +{ + Relids top_parent_relids = child_joinrel->top_parent_relids; + Relids child_relids = child_joinrel->relids; + Bitmapset *matching_ecs; + MemoryContext oldcontext; + int i; + + Assert(IS_JOIN_REL(child_joinrel) && IS_JOIN_REL(parent_joinrel)); + + /* We need consider only ECs that mention the parent joinrel */ + matching_ecs = get_eclass_indexes_for_relids(root, top_parent_relids); + + /* + * If we're being called during GEQO join planning, we still have to + * create any new EC members in the main planner context, to avoid having + * a corrupt EC data structure after the GEQO context is reset. This is + * problematic since we'll leak memory across repeated GEQO cycles. For + * now, though, bloat is better than crash. If it becomes a real issue + * we'll have to do something to avoid generating duplicate EC members. + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + i = -1; + while ((i = bms_next_member(matching_ecs, i)) >= 0) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) list_nth(root->eq_classes, i); + int num_members; + + /* + * If this EC contains a volatile expression, then generating child + * EMs would be downright dangerous, so skip it. We rely on a + * volatile EC having only one EM. + */ + if (cur_ec->ec_has_volatile) + continue; + + /* Sanity check on get_eclass_indexes_for_relids result */ + Assert(bms_overlap(top_parent_relids, cur_ec->ec_relids)); + + /* + * We don't use foreach() here because there's no point in scanning + * newly-added child members, so we can stop after the last + * pre-existing EC member. + */ + num_members = list_length(cur_ec->ec_members); + for (int pos = 0; pos < num_members; pos++) + { + EquivalenceMember *cur_em = (EquivalenceMember *) list_nth(cur_ec->ec_members, pos); + + if (cur_em->em_is_const) + continue; /* ignore consts here */ + + /* + * We consider only original EC members here, not + * already-transformed child members. + */ + if (cur_em->em_is_child) + continue; /* ignore children here */ + + /* + * We may ignore expressions that reference a single baserel, + * because add_child_rel_equivalences should have handled them. + */ + if (bms_membership(cur_em->em_relids) != BMS_MULTIPLE) + continue; + + /* Does this member reference child's topmost parent rel? */ + if (bms_overlap(cur_em->em_relids, top_parent_relids)) + { + /* Yes, generate transformed child version */ + Expr *child_expr; + Relids new_relids; + Relids new_nullable_relids; + + if (parent_joinrel->reloptkind == RELOPT_JOINREL) + { + /* Simple single-level transformation */ + child_expr = (Expr *) + adjust_appendrel_attrs(root, + (Node *) cur_em->em_expr, + nappinfos, appinfos); + } + else + { + /* Must do multi-level transformation */ + Assert(parent_joinrel->reloptkind == RELOPT_OTHER_JOINREL); + child_expr = (Expr *) + adjust_appendrel_attrs_multilevel(root, + (Node *) cur_em->em_expr, + child_relids, + top_parent_relids); + } + + /* + * Transform em_relids to match. Note we do *not* do + * pull_varnos(child_expr) here, as for example the + * transformation might have substituted a constant, but we + * don't want the child member to be marked as constant. + */ + new_relids = bms_difference(cur_em->em_relids, + top_parent_relids); + new_relids = bms_add_members(new_relids, child_relids); + + /* + * For nullable_relids, we must selectively replace parent + * nullable relids with child ones. + */ + new_nullable_relids = cur_em->em_nullable_relids; + if (bms_overlap(new_nullable_relids, top_parent_relids)) + new_nullable_relids = + adjust_child_relids_multilevel(root, + new_nullable_relids, + child_relids, + top_parent_relids); + + (void) add_eq_member(cur_ec, child_expr, + new_relids, new_nullable_relids, + true, cur_em->em_datatype); + } + } + } + + MemoryContextSwitchTo(oldcontext); +} + + +/* + * generate_implied_equalities_for_column + * Create EC-derived joinclauses usable with a specific column. + * + * This is used by indxpath.c to extract potentially indexable joinclauses + * from ECs, and can be used by foreign data wrappers for similar purposes. + * We assume that only expressions in Vars of a single table are of interest, + * but the caller provides a callback function to identify exactly which + * such expressions it would like to know about. + * + * We assume that any given table/index column could appear in only one EC. + * (This should be true in all but the most pathological cases, and if it + * isn't, we stop on the first match anyway.) Therefore, what we return + * is a redundant list of clauses equating the table/index column to each of + * the other-relation values it is known to be equal to. Any one of + * these clauses can be used to create a parameterized path, and there + * is no value in using more than one. (But it *is* worthwhile to create + * a separate parameterized path for each one, since that leads to different + * join orders.) + * + * The caller can pass a Relids set of rels we aren't interested in joining + * to, so as to save the work of creating useless clauses. + */ +List * +generate_implied_equalities_for_column(PlannerInfo *root, + RelOptInfo *rel, + ec_matches_callback_type callback, + void *callback_arg, + Relids prohibited_rels) +{ + List *result = NIL; + bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL); + Relids parent_relids; + int i; + + /* Should be OK to rely on eclass_indexes */ + Assert(root->ec_merging_done); + + /* Indexes are available only on base or "other" member relations. */ + Assert(IS_SIMPLE_REL(rel)); + + /* If it's a child rel, we'll need to know what its parent(s) are */ + if (is_child_rel) + parent_relids = find_childrel_parents(root, rel); + else + parent_relids = NULL; /* not used, but keep compiler quiet */ + + i = -1; + while ((i = bms_next_member(rel->eclass_indexes, i)) >= 0) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) list_nth(root->eq_classes, i); + EquivalenceMember *cur_em; + ListCell *lc2; + + /* Sanity check eclass_indexes only contain ECs for rel */ + Assert(is_child_rel || bms_is_subset(rel->relids, cur_ec->ec_relids)); + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1) + continue; + + /* + * Scan members, looking for a match to the target column. Note that + * child EC members are considered, but only when they belong to the + * target relation. (Unlike regular members, the same expression + * could be a child member of more than one EC. Therefore, it's + * potentially order-dependent which EC a child relation's target + * column gets matched to. This is annoying but it only happens in + * corner cases, so for now we live with just reporting the first + * match. See also get_eclass_for_sort_expr.) + */ + cur_em = NULL; + foreach(lc2, cur_ec->ec_members) + { + cur_em = (EquivalenceMember *) lfirst(lc2); + if (bms_equal(cur_em->em_relids, rel->relids) && + callback(root, rel, cur_ec, cur_em, callback_arg)) + break; + cur_em = NULL; + } + + if (!cur_em) + continue; + + /* + * Found our match. Scan the other EC members and attempt to generate + * joinclauses. + */ + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *other_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + RestrictInfo *rinfo; + + if (other_em->em_is_child) + continue; /* ignore children here */ + + /* Make sure it'll be a join to a different rel */ + if (other_em == cur_em || + bms_overlap(other_em->em_relids, rel->relids)) + continue; + + /* Forget it if caller doesn't want joins to this rel */ + if (bms_overlap(other_em->em_relids, prohibited_rels)) + continue; + + /* + * Also, if this is a child rel, avoid generating a useless join + * to its parent rel(s). + */ + if (is_child_rel && + bms_overlap(parent_relids, other_em->em_relids)) + continue; + + eq_op = select_equality_operator(cur_ec, + cur_em->em_datatype, + other_em->em_datatype); + if (!OidIsValid(eq_op)) + continue; + + /* set parent_ec to mark as redundant with other joinclauses */ + rinfo = create_join_clause(root, cur_ec, eq_op, + cur_em, other_em, + cur_ec); + + result = lappend(result, rinfo); + } + + /* + * If somehow we failed to create any join clauses, we might as well + * keep scanning the ECs for another match. But if we did make any, + * we're done, because we don't want to return non-redundant clauses. + */ + if (result) + break; + } + + return result; +} + +/* + * have_relevant_eclass_joinclause + * Detect whether there is an EquivalenceClass that could produce + * a joinclause involving the two given relations. + * + * This is essentially a very cut-down version of + * generate_join_implied_equalities(). Note it's OK to occasionally say "yes" + * incorrectly. Hence we don't bother with details like whether the lack of a + * cross-type operator might prevent the clause from actually being generated. + */ +bool +have_relevant_eclass_joinclause(PlannerInfo *root, + RelOptInfo *rel1, RelOptInfo *rel2) +{ + Bitmapset *matching_ecs; + int i; + + /* Examine only eclasses mentioning both rel1 and rel2 */ + matching_ecs = get_common_eclass_indexes(root, rel1->relids, + rel2->relids); + + i = -1; + while ((i = bms_next_member(matching_ecs, i)) >= 0) + { + EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, + i); + + /* + * Sanity check that get_common_eclass_indexes gave only ECs + * containing both rels. + */ + Assert(bms_overlap(rel1->relids, ec->ec_relids)); + Assert(bms_overlap(rel2->relids, ec->ec_relids)); + + /* + * Won't generate joinclauses if single-member (this test covers the + * volatile case too) + */ + if (list_length(ec->ec_members) <= 1) + continue; + + /* + * We do not need to examine the individual members of the EC, because + * all that we care about is whether each rel overlaps the relids of + * at least one member, and get_common_eclass_indexes() and the single + * member check above are sufficient to prove that. (As with + * have_relevant_joinclause(), it is not necessary that the EC be able + * to form a joinclause relating exactly the two given rels, only that + * it be able to form a joinclause mentioning both, and this will + * surely be true if both of them overlap ec_relids.) + * + * Note we don't test ec_broken; if we did, we'd need a separate code + * path to look through ec_sources. Checking the membership anyway is + * OK as a possibly-overoptimistic heuristic. + * + * We don't test ec_has_const either, even though a const eclass won't + * generate real join clauses. This is because if we had "WHERE a.x = + * b.y and a.x = 42", it is worth considering a join between a and b, + * since the join result is likely to be small even though it'll end + * up being an unqualified nestloop. + */ + + return true; + } + + return false; +} + + +/* + * has_relevant_eclass_joinclause + * Detect whether there is an EquivalenceClass that could produce + * a joinclause involving the given relation and anything else. + * + * This is the same as have_relevant_eclass_joinclause with the other rel + * implicitly defined as "everything else in the query". + */ +bool +has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1) +{ + Bitmapset *matched_ecs; + int i; + + /* Examine only eclasses mentioning rel1 */ + matched_ecs = get_eclass_indexes_for_relids(root, rel1->relids); + + i = -1; + while ((i = bms_next_member(matched_ecs, i)) >= 0) + { + EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, + i); + + /* + * Won't generate joinclauses if single-member (this test covers the + * volatile case too) + */ + if (list_length(ec->ec_members) <= 1) + continue; + + /* + * Per the comment in have_relevant_eclass_joinclause, it's sufficient + * to find an EC that mentions both this rel and some other rel. + */ + if (!bms_is_subset(ec->ec_relids, rel1->relids)) + return true; + } + + return false; +} + + +/* + * eclass_useful_for_merging + * Detect whether the EC could produce any mergejoinable join clauses + * against the specified relation. + * + * This is just a heuristic test and doesn't have to be exact; it's better + * to say "yes" incorrectly than "no". Hence we don't bother with details + * like whether the lack of a cross-type operator might prevent the clause + * from actually being generated. + */ +bool +eclass_useful_for_merging(PlannerInfo *root, + EquivalenceClass *eclass, + RelOptInfo *rel) +{ + Relids relids; + ListCell *lc; + + Assert(!eclass->ec_merged); + + /* + * Won't generate joinclauses if const or single-member (the latter test + * covers the volatile case too) + */ + if (eclass->ec_has_const || list_length(eclass->ec_members) <= 1) + return false; + + /* + * Note we don't test ec_broken; if we did, we'd need a separate code path + * to look through ec_sources. Checking the members anyway is OK as a + * possibly-overoptimistic heuristic. + */ + + /* If specified rel is a child, we must consider the topmost parent rel */ + if (IS_OTHER_REL(rel)) + { + Assert(!bms_is_empty(rel->top_parent_relids)); + relids = rel->top_parent_relids; + } + else + relids = rel->relids; + + /* If rel already includes all members of eclass, no point in searching */ + if (bms_is_subset(eclass->ec_relids, relids)) + return false; + + /* To join, we need a member not in the given rel */ + foreach(lc, eclass->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + + if (cur_em->em_is_child) + continue; /* ignore children here */ + + if (!bms_overlap(cur_em->em_relids, relids)) + return true; + } + + return false; +} + + +/* + * is_redundant_derived_clause + * Test whether rinfo is derived from same EC as any clause in clauselist; + * if so, it can be presumed to represent a condition that's redundant + * with that member of the list. + */ +bool +is_redundant_derived_clause(RestrictInfo *rinfo, List *clauselist) +{ + EquivalenceClass *parent_ec = rinfo->parent_ec; + ListCell *lc; + + /* Fail if it's not a potentially-redundant clause from some EC */ + if (parent_ec == NULL) + return false; + + foreach(lc, clauselist) + { + RestrictInfo *otherrinfo = (RestrictInfo *) lfirst(lc); + + if (otherrinfo->parent_ec == parent_ec) + return true; + } + + return false; +} + +/* + * is_redundant_with_indexclauses + * Test whether rinfo is redundant with any clause in the IndexClause + * list. Here, for convenience, we test both simple identity and + * whether it is derived from the same EC as any member of the list. + */ +bool +is_redundant_with_indexclauses(RestrictInfo *rinfo, List *indexclauses) +{ + EquivalenceClass *parent_ec = rinfo->parent_ec; + ListCell *lc; + + foreach(lc, indexclauses) + { + IndexClause *iclause = lfirst_node(IndexClause, lc); + RestrictInfo *otherrinfo = iclause->rinfo; + + /* If indexclause is lossy, it won't enforce the condition exactly */ + if (iclause->lossy) + continue; + + /* Match if it's same clause (pointer equality should be enough) */ + if (rinfo == otherrinfo) + return true; + /* Match if derived from same EC */ + if (parent_ec && otherrinfo->parent_ec == parent_ec) + return true; + + /* + * No need to look at the derived clauses in iclause->indexquals; they + * couldn't match if the parent clause didn't. + */ + } + + return false; +} + +/* + * get_eclass_indexes_for_relids + * Build and return a Bitmapset containing the indexes into root's + * eq_classes list for all eclasses that mention any of these relids + */ +static Bitmapset * +get_eclass_indexes_for_relids(PlannerInfo *root, Relids relids) +{ + Bitmapset *ec_indexes = NULL; + int i = -1; + + /* Should be OK to rely on eclass_indexes */ + Assert(root->ec_merging_done); + + while ((i = bms_next_member(relids, i)) > 0) + { + RelOptInfo *rel = root->simple_rel_array[i]; + + ec_indexes = bms_add_members(ec_indexes, rel->eclass_indexes); + } + return ec_indexes; +} + +/* + * get_common_eclass_indexes + * Build and return a Bitmapset containing the indexes into root's + * eq_classes list for all eclasses that mention rels in both + * relids1 and relids2. + */ +static Bitmapset * +get_common_eclass_indexes(PlannerInfo *root, Relids relids1, Relids relids2) +{ + Bitmapset *rel1ecs; + Bitmapset *rel2ecs; + int relid; + + rel1ecs = get_eclass_indexes_for_relids(root, relids1); + + /* + * We can get away with just using the relation's eclass_indexes directly + * when relids2 is a singleton set. + */ + if (bms_get_singleton_member(relids2, &relid)) + rel2ecs = root->simple_rel_array[relid]->eclass_indexes; + else + rel2ecs = get_eclass_indexes_for_relids(root, relids2); + + /* Calculate and return the common EC indexes, recycling the left input. */ + return bms_int_members(rel1ecs, rel2ecs); +} diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c new file mode 100644 index 0000000..0e4e00e --- /dev/null +++ b/src/backend/optimizer/path/indxpath.c @@ -0,0 +1,3826 @@ +/*------------------------------------------------------------------------- + * + * indxpath.c + * Routines to determine which indexes are usable for scanning a + * given relation, and create Paths accordingly. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/path/indxpath.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "access/stratnum.h" +#include "access/sysattr.h" +#include "catalog/pg_am.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_type.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/prep.h" +#include "optimizer/restrictinfo.h" +#include "utils/lsyscache.h" +#include "utils/selfuncs.h" + + +/* XXX see PartCollMatchesExprColl */ +#define IndexCollMatchesExprColl(idxcollation, exprcollation) \ + ((idxcollation) == InvalidOid || (idxcollation) == (exprcollation)) + +/* Whether we are looking for plain indexscan, bitmap scan, or either */ +typedef enum +{ + ST_INDEXSCAN, /* must support amgettuple */ + ST_BITMAPSCAN, /* must support amgetbitmap */ + ST_ANYSCAN /* either is okay */ +} ScanTypeControl; + +/* Data structure for collecting qual clauses that match an index */ +typedef struct +{ + bool nonempty; /* True if lists are not all empty */ + /* Lists of IndexClause nodes, one list per index column */ + List *indexclauses[INDEX_MAX_KEYS]; +} IndexClauseSet; + +/* Per-path data used within choose_bitmap_and() */ +typedef struct +{ + Path *path; /* IndexPath, BitmapAndPath, or BitmapOrPath */ + List *quals; /* the WHERE clauses it uses */ + List *preds; /* predicates of its partial index(es) */ + Bitmapset *clauseids; /* quals+preds represented as a bitmapset */ + bool unclassifiable; /* has too many quals+preds to process? */ +} PathClauseUsage; + +/* Callback argument for ec_member_matches_indexcol */ +typedef struct +{ + IndexOptInfo *index; /* index we're considering */ + int indexcol; /* index column we want to match to */ +} ec_member_matches_arg; + + +static void consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, + IndexClauseSet *rclauseset, + IndexClauseSet *jclauseset, + IndexClauseSet *eclauseset, + List **bitindexpaths); +static void consider_index_join_outer_rels(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, + IndexClauseSet *rclauseset, + IndexClauseSet *jclauseset, + IndexClauseSet *eclauseset, + List **bitindexpaths, + List *indexjoinclauses, + int considered_clauses, + List **considered_relids); +static void get_join_index_paths(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, + IndexClauseSet *rclauseset, + IndexClauseSet *jclauseset, + IndexClauseSet *eclauseset, + List **bitindexpaths, + Relids relids, + List **considered_relids); +static bool eclass_already_used(EquivalenceClass *parent_ec, Relids oldrelids, + List *indexjoinclauses); +static bool bms_equal_any(Relids relids, List *relids_list); +static void get_index_paths(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, IndexClauseSet *clauses, + List **bitindexpaths); +static List *build_index_paths(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, IndexClauseSet *clauses, + bool useful_predicate, + ScanTypeControl scantype, + bool *skip_nonnative_saop, + bool *skip_lower_saop); +static List *build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel, + List *clauses, List *other_clauses); +static List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, + List *clauses, List *other_clauses); +static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, + List *paths); +static int path_usage_comparator(const void *a, const void *b); +static Cost bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, + Path *ipath); +static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, + List *paths); +static PathClauseUsage *classify_index_clause_usage(Path *path, + List **clauselist); +static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds); +static int find_list_position(Node *node, List **nodelist); +static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index); +static double get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids); +static double adjust_rowcount_for_semijoins(PlannerInfo *root, + Index cur_relid, + Index outer_relid, + double rowcount); +static double approximate_joinrel_size(PlannerInfo *root, Relids relids); +static void match_restriction_clauses_to_index(PlannerInfo *root, + IndexOptInfo *index, + IndexClauseSet *clauseset); +static void match_join_clauses_to_index(PlannerInfo *root, + RelOptInfo *rel, IndexOptInfo *index, + IndexClauseSet *clauseset, + List **joinorclauses); +static void match_eclass_clauses_to_index(PlannerInfo *root, + IndexOptInfo *index, + IndexClauseSet *clauseset); +static void match_clauses_to_index(PlannerInfo *root, + List *clauses, + IndexOptInfo *index, + IndexClauseSet *clauseset); +static void match_clause_to_index(PlannerInfo *root, + RestrictInfo *rinfo, + IndexOptInfo *index, + IndexClauseSet *clauseset); +static IndexClause *match_clause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index); +static IndexClause *match_boolean_index_clause(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, IndexOptInfo *index); +static IndexClause *match_opclause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index); +static IndexClause *match_funcclause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index); +static IndexClause *get_index_clause_from_support(PlannerInfo *root, + RestrictInfo *rinfo, + Oid funcid, + int indexarg, + int indexcol, + IndexOptInfo *index); +static IndexClause *match_saopclause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index); +static IndexClause *match_rowcompare_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index); +static IndexClause *expand_indexqual_rowcompare(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index, + Oid expr_op, + bool var_on_left); +static void match_pathkeys_to_index(IndexOptInfo *index, List *pathkeys, + List **orderby_clauses_p, + List **clause_columns_p); +static Expr *match_clause_to_ordering_op(IndexOptInfo *index, + int indexcol, Expr *clause, Oid pk_opfamily); +static bool ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, + EquivalenceClass *ec, EquivalenceMember *em, + void *arg); + + +/* + * create_index_paths() + * Generate all interesting index paths for the given relation. + * Candidate paths are added to the rel's pathlist (using add_path). + * + * To be considered for an index scan, an index must match one or more + * restriction clauses or join clauses from the query's qual condition, + * or match the query's ORDER BY condition, or have a predicate that + * matches the query's qual condition. + * + * There are two basic kinds of index scans. A "plain" index scan uses + * only restriction clauses (possibly none at all) in its indexqual, + * so it can be applied in any context. A "parameterized" index scan uses + * join clauses (plus restriction clauses, if available) in its indexqual. + * When joining such a scan to one of the relations supplying the other + * variables used in its indexqual, the parameterized scan must appear as + * the inner relation of a nestloop join; it can't be used on the outer side, + * nor in a merge or hash join. In that context, values for the other rels' + * attributes are available and fixed during any one scan of the indexpath. + * + * An IndexPath is generated and submitted to add_path() for each plain or + * parameterized index scan this routine deems potentially interesting for + * the current query. + * + * 'rel' is the relation for which we want to generate index paths + * + * Note: check_index_predicates() must have been run previously for this rel. + * + * Note: in cases involving LATERAL references in the relation's tlist, it's + * possible that rel->lateral_relids is nonempty. Currently, we include + * lateral_relids into the parameterization reported for each path, but don't + * take it into account otherwise. The fact that any such rels *must* be + * available as parameter sources perhaps should influence our choices of + * index quals ... but for now, it doesn't seem worth troubling over. + * In particular, comments below about "unparameterized" paths should be read + * as meaning "unparameterized so far as the indexquals are concerned". + */ +void +create_index_paths(PlannerInfo *root, RelOptInfo *rel) +{ + List *indexpaths; + List *bitindexpaths; + List *bitjoinpaths; + List *joinorclauses; + IndexClauseSet rclauseset; + IndexClauseSet jclauseset; + IndexClauseSet eclauseset; + ListCell *lc; + + /* Skip the whole mess if no indexes */ + if (rel->indexlist == NIL) + return; + + /* Bitmap paths are collected and then dealt with at the end */ + bitindexpaths = bitjoinpaths = joinorclauses = NIL; + + /* Examine each index in turn */ + foreach(lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + + /* Protect limited-size array in IndexClauseSets */ + Assert(index->nkeycolumns <= INDEX_MAX_KEYS); + + /* + * Ignore partial indexes that do not match the query. + * (generate_bitmap_or_paths() might be able to do something with + * them, but that's of no concern here.) + */ + if (index->indpred != NIL && !index->predOK) + continue; + + /* + * Identify the restriction clauses that can match the index. + */ + MemSet(&rclauseset, 0, sizeof(rclauseset)); + match_restriction_clauses_to_index(root, index, &rclauseset); + + /* + * Build index paths from the restriction clauses. These will be + * non-parameterized paths. Plain paths go directly to add_path(), + * bitmap paths are added to bitindexpaths to be handled below. + */ + get_index_paths(root, rel, index, &rclauseset, + &bitindexpaths); + + /* + * Identify the join clauses that can match the index. For the moment + * we keep them separate from the restriction clauses. Note that this + * step finds only "loose" join clauses that have not been merged into + * EquivalenceClasses. Also, collect join OR clauses for later. + */ + MemSet(&jclauseset, 0, sizeof(jclauseset)); + match_join_clauses_to_index(root, rel, index, + &jclauseset, &joinorclauses); + + /* + * Look for EquivalenceClasses that can generate joinclauses matching + * the index. + */ + MemSet(&eclauseset, 0, sizeof(eclauseset)); + match_eclass_clauses_to_index(root, index, + &eclauseset); + + /* + * If we found any plain or eclass join clauses, build parameterized + * index paths using them. + */ + if (jclauseset.nonempty || eclauseset.nonempty) + consider_index_join_clauses(root, rel, index, + &rclauseset, + &jclauseset, + &eclauseset, + &bitjoinpaths); + } + + /* + * Generate BitmapOrPaths for any suitable OR-clauses present in the + * restriction list. Add these to bitindexpaths. + */ + indexpaths = generate_bitmap_or_paths(root, rel, + rel->baserestrictinfo, NIL); + bitindexpaths = list_concat(bitindexpaths, indexpaths); + + /* + * Likewise, generate BitmapOrPaths for any suitable OR-clauses present in + * the joinclause list. Add these to bitjoinpaths. + */ + indexpaths = generate_bitmap_or_paths(root, rel, + joinorclauses, rel->baserestrictinfo); + bitjoinpaths = list_concat(bitjoinpaths, indexpaths); + + /* + * If we found anything usable, generate a BitmapHeapPath for the most + * promising combination of restriction bitmap index paths. Note there + * will be only one such path no matter how many indexes exist. This + * should be sufficient since there's basically only one figure of merit + * (total cost) for such a path. + */ + if (bitindexpaths != NIL) + { + Path *bitmapqual; + BitmapHeapPath *bpath; + + bitmapqual = choose_bitmap_and(root, rel, bitindexpaths); + bpath = create_bitmap_heap_path(root, rel, bitmapqual, + rel->lateral_relids, 1.0, 0); + add_path(rel, (Path *) bpath); + + /* create a partial bitmap heap path */ + if (rel->consider_parallel && rel->lateral_relids == NULL) + create_partial_bitmap_paths(root, rel, bitmapqual); + } + + /* + * Likewise, if we found anything usable, generate BitmapHeapPaths for the + * most promising combinations of join bitmap index paths. Our strategy + * is to generate one such path for each distinct parameterization seen + * among the available bitmap index paths. This may look pretty + * expensive, but usually there won't be very many distinct + * parameterizations. (This logic is quite similar to that in + * consider_index_join_clauses, but we're working with whole paths not + * individual clauses.) + */ + if (bitjoinpaths != NIL) + { + List *all_path_outers; + ListCell *lc; + + /* Identify each distinct parameterization seen in bitjoinpaths */ + all_path_outers = NIL; + foreach(lc, bitjoinpaths) + { + Path *path = (Path *) lfirst(lc); + Relids required_outer = PATH_REQ_OUTER(path); + + if (!bms_equal_any(required_outer, all_path_outers)) + all_path_outers = lappend(all_path_outers, required_outer); + } + + /* Now, for each distinct parameterization set ... */ + foreach(lc, all_path_outers) + { + Relids max_outers = (Relids) lfirst(lc); + List *this_path_set; + Path *bitmapqual; + Relids required_outer; + double loop_count; + BitmapHeapPath *bpath; + ListCell *lcp; + + /* Identify all the bitmap join paths needing no more than that */ + this_path_set = NIL; + foreach(lcp, bitjoinpaths) + { + Path *path = (Path *) lfirst(lcp); + + if (bms_is_subset(PATH_REQ_OUTER(path), max_outers)) + this_path_set = lappend(this_path_set, path); + } + + /* + * Add in restriction bitmap paths, since they can be used + * together with any join paths. + */ + this_path_set = list_concat(this_path_set, bitindexpaths); + + /* Select best AND combination for this parameterization */ + bitmapqual = choose_bitmap_and(root, rel, this_path_set); + + /* And push that path into the mix */ + required_outer = PATH_REQ_OUTER(bitmapqual); + loop_count = get_loop_count(root, rel->relid, required_outer); + bpath = create_bitmap_heap_path(root, rel, bitmapqual, + required_outer, loop_count, 0); + add_path(rel, (Path *) bpath); + } + } +} + +/* + * consider_index_join_clauses + * Given sets of join clauses for an index, decide which parameterized + * index paths to build. + * + * Plain indexpaths are sent directly to add_path, while potential + * bitmap indexpaths are added to *bitindexpaths for later processing. + * + * 'rel' is the index's heap relation + * 'index' is the index for which we want to generate paths + * 'rclauseset' is the collection of indexable restriction clauses + * 'jclauseset' is the collection of indexable simple join clauses + * 'eclauseset' is the collection of indexable clauses from EquivalenceClasses + * '*bitindexpaths' is the list to add bitmap paths to + */ +static void +consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, + IndexClauseSet *rclauseset, + IndexClauseSet *jclauseset, + IndexClauseSet *eclauseset, + List **bitindexpaths) +{ + int considered_clauses = 0; + List *considered_relids = NIL; + int indexcol; + + /* + * The strategy here is to identify every potentially useful set of outer + * rels that can provide indexable join clauses. For each such set, + * select all the join clauses available from those outer rels, add on all + * the indexable restriction clauses, and generate plain and/or bitmap + * index paths for that set of clauses. This is based on the assumption + * that it's always better to apply a clause as an indexqual than as a + * filter (qpqual); which is where an available clause would end up being + * applied if we omit it from the indexquals. + * + * This looks expensive, but in most practical cases there won't be very + * many distinct sets of outer rels to consider. As a safety valve when + * that's not true, we use a heuristic: limit the number of outer rel sets + * considered to a multiple of the number of clauses considered. (We'll + * always consider using each individual join clause, though.) + * + * For simplicity in selecting relevant clauses, we represent each set of + * outer rels as a maximum set of clause_relids --- that is, the indexed + * relation itself is also included in the relids set. considered_relids + * lists all relids sets we've already tried. + */ + for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++) + { + /* Consider each applicable simple join clause */ + considered_clauses += list_length(jclauseset->indexclauses[indexcol]); + consider_index_join_outer_rels(root, rel, index, + rclauseset, jclauseset, eclauseset, + bitindexpaths, + jclauseset->indexclauses[indexcol], + considered_clauses, + &considered_relids); + /* Consider each applicable eclass join clause */ + considered_clauses += list_length(eclauseset->indexclauses[indexcol]); + consider_index_join_outer_rels(root, rel, index, + rclauseset, jclauseset, eclauseset, + bitindexpaths, + eclauseset->indexclauses[indexcol], + considered_clauses, + &considered_relids); + } +} + +/* + * consider_index_join_outer_rels + * Generate parameterized paths based on clause relids in the clause list. + * + * Workhorse for consider_index_join_clauses; see notes therein for rationale. + * + * 'rel', 'index', 'rclauseset', 'jclauseset', 'eclauseset', and + * 'bitindexpaths' as above + * 'indexjoinclauses' is a list of IndexClauses for join clauses + * 'considered_clauses' is the total number of clauses considered (so far) + * '*considered_relids' is a list of all relids sets already considered + */ +static void +consider_index_join_outer_rels(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, + IndexClauseSet *rclauseset, + IndexClauseSet *jclauseset, + IndexClauseSet *eclauseset, + List **bitindexpaths, + List *indexjoinclauses, + int considered_clauses, + List **considered_relids) +{ + ListCell *lc; + + /* Examine relids of each joinclause in the given list */ + foreach(lc, indexjoinclauses) + { + IndexClause *iclause = (IndexClause *) lfirst(lc); + Relids clause_relids = iclause->rinfo->clause_relids; + EquivalenceClass *parent_ec = iclause->rinfo->parent_ec; + int num_considered_relids; + + /* If we already tried its relids set, no need to do so again */ + if (bms_equal_any(clause_relids, *considered_relids)) + continue; + + /* + * Generate the union of this clause's relids set with each + * previously-tried set. This ensures we try this clause along with + * every interesting subset of previous clauses. However, to avoid + * exponential growth of planning time when there are many clauses, + * limit the number of relid sets accepted to 10 * considered_clauses. + * + * Note: get_join_index_paths appends entries to *considered_relids, + * but we do not need to visit such newly-added entries within this + * loop, so we don't use foreach() here. No real harm would be done + * if we did visit them, since the subset check would reject them; but + * it would waste some cycles. + */ + num_considered_relids = list_length(*considered_relids); + for (int pos = 0; pos < num_considered_relids; pos++) + { + Relids oldrelids = (Relids) list_nth(*considered_relids, pos); + + /* + * If either is a subset of the other, no new set is possible. + * This isn't a complete test for redundancy, but it's easy and + * cheap. get_join_index_paths will check more carefully if we + * already generated the same relids set. + */ + if (bms_subset_compare(clause_relids, oldrelids) != BMS_DIFFERENT) + continue; + + /* + * If this clause was derived from an equivalence class, the + * clause list may contain other clauses derived from the same + * eclass. We should not consider that combining this clause with + * one of those clauses generates a usefully different + * parameterization; so skip if any clause derived from the same + * eclass would already have been included when using oldrelids. + */ + if (parent_ec && + eclass_already_used(parent_ec, oldrelids, + indexjoinclauses)) + continue; + + /* + * If the number of relid sets considered exceeds our heuristic + * limit, stop considering combinations of clauses. We'll still + * consider the current clause alone, though (below this loop). + */ + if (list_length(*considered_relids) >= 10 * considered_clauses) + break; + + /* OK, try the union set */ + get_join_index_paths(root, rel, index, + rclauseset, jclauseset, eclauseset, + bitindexpaths, + bms_union(clause_relids, oldrelids), + considered_relids); + } + + /* Also try this set of relids by itself */ + get_join_index_paths(root, rel, index, + rclauseset, jclauseset, eclauseset, + bitindexpaths, + clause_relids, + considered_relids); + } +} + +/* + * get_join_index_paths + * Generate index paths using clauses from the specified outer relations. + * In addition to generating paths, relids is added to *considered_relids + * if not already present. + * + * Workhorse for consider_index_join_clauses; see notes therein for rationale. + * + * 'rel', 'index', 'rclauseset', 'jclauseset', 'eclauseset', + * 'bitindexpaths', 'considered_relids' as above + * 'relids' is the current set of relids to consider (the target rel plus + * one or more outer rels) + */ +static void +get_join_index_paths(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, + IndexClauseSet *rclauseset, + IndexClauseSet *jclauseset, + IndexClauseSet *eclauseset, + List **bitindexpaths, + Relids relids, + List **considered_relids) +{ + IndexClauseSet clauseset; + int indexcol; + + /* If we already considered this relids set, don't repeat the work */ + if (bms_equal_any(relids, *considered_relids)) + return; + + /* Identify indexclauses usable with this relids set */ + MemSet(&clauseset, 0, sizeof(clauseset)); + + for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++) + { + ListCell *lc; + + /* First find applicable simple join clauses */ + foreach(lc, jclauseset->indexclauses[indexcol]) + { + IndexClause *iclause = (IndexClause *) lfirst(lc); + + if (bms_is_subset(iclause->rinfo->clause_relids, relids)) + clauseset.indexclauses[indexcol] = + lappend(clauseset.indexclauses[indexcol], iclause); + } + + /* + * Add applicable eclass join clauses. The clauses generated for each + * column are redundant (cf generate_implied_equalities_for_column), + * so we need at most one. This is the only exception to the general + * rule of using all available index clauses. + */ + foreach(lc, eclauseset->indexclauses[indexcol]) + { + IndexClause *iclause = (IndexClause *) lfirst(lc); + + if (bms_is_subset(iclause->rinfo->clause_relids, relids)) + { + clauseset.indexclauses[indexcol] = + lappend(clauseset.indexclauses[indexcol], iclause); + break; + } + } + + /* Add restriction clauses */ + clauseset.indexclauses[indexcol] = + list_concat(clauseset.indexclauses[indexcol], + rclauseset->indexclauses[indexcol]); + + if (clauseset.indexclauses[indexcol] != NIL) + clauseset.nonempty = true; + } + + /* We should have found something, else caller passed silly relids */ + Assert(clauseset.nonempty); + + /* Build index path(s) using the collected set of clauses */ + get_index_paths(root, rel, index, &clauseset, bitindexpaths); + + /* + * Remember we considered paths for this set of relids. + */ + *considered_relids = lappend(*considered_relids, relids); +} + +/* + * eclass_already_used + * True if any join clause usable with oldrelids was generated from + * the specified equivalence class. + */ +static bool +eclass_already_used(EquivalenceClass *parent_ec, Relids oldrelids, + List *indexjoinclauses) +{ + ListCell *lc; + + foreach(lc, indexjoinclauses) + { + IndexClause *iclause = (IndexClause *) lfirst(lc); + RestrictInfo *rinfo = iclause->rinfo; + + if (rinfo->parent_ec == parent_ec && + bms_is_subset(rinfo->clause_relids, oldrelids)) + return true; + } + return false; +} + +/* + * bms_equal_any + * True if relids is bms_equal to any member of relids_list + * + * Perhaps this should be in bitmapset.c someday. + */ +static bool +bms_equal_any(Relids relids, List *relids_list) +{ + ListCell *lc; + + foreach(lc, relids_list) + { + if (bms_equal(relids, (Relids) lfirst(lc))) + return true; + } + return false; +} + + +/* + * get_index_paths + * Given an index and a set of index clauses for it, construct IndexPaths. + * + * Plain indexpaths are sent directly to add_path, while potential + * bitmap indexpaths are added to *bitindexpaths for later processing. + * + * This is a fairly simple frontend to build_index_paths(). Its reason for + * existence is mainly to handle ScalarArrayOpExpr quals properly. If the + * index AM supports them natively, we should just include them in simple + * index paths. If not, we should exclude them while building simple index + * paths, and then make a separate attempt to include them in bitmap paths. + * Furthermore, we should consider excluding lower-order ScalarArrayOpExpr + * quals so as to create ordered paths. + */ +static void +get_index_paths(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, IndexClauseSet *clauses, + List **bitindexpaths) +{ + List *indexpaths; + bool skip_nonnative_saop = false; + bool skip_lower_saop = false; + ListCell *lc; + + /* + * Build simple index paths using the clauses. Allow ScalarArrayOpExpr + * clauses only if the index AM supports them natively, and skip any such + * clauses for index columns after the first (so that we produce ordered + * paths if possible). + */ + indexpaths = build_index_paths(root, rel, + index, clauses, + index->predOK, + ST_ANYSCAN, + &skip_nonnative_saop, + &skip_lower_saop); + + /* + * If we skipped any lower-order ScalarArrayOpExprs on an index with an AM + * that supports them, then try again including those clauses. This will + * produce paths with more selectivity but no ordering. + */ + if (skip_lower_saop) + { + indexpaths = list_concat(indexpaths, + build_index_paths(root, rel, + index, clauses, + index->predOK, + ST_ANYSCAN, + &skip_nonnative_saop, + NULL)); + } + + /* + * Submit all the ones that can form plain IndexScan plans to add_path. (A + * plain IndexPath can represent either a plain IndexScan or an + * IndexOnlyScan, but for our purposes here that distinction does not + * matter. However, some of the indexes might support only bitmap scans, + * and those we mustn't submit to add_path here.) + * + * Also, pick out the ones that are usable as bitmap scans. For that, we + * must discard indexes that don't support bitmap scans, and we also are + * only interested in paths that have some selectivity; we should discard + * anything that was generated solely for ordering purposes. + */ + foreach(lc, indexpaths) + { + IndexPath *ipath = (IndexPath *) lfirst(lc); + + if (index->amhasgettuple) + add_path(rel, (Path *) ipath); + + if (index->amhasgetbitmap && + (ipath->path.pathkeys == NIL || + ipath->indexselectivity < 1.0)) + *bitindexpaths = lappend(*bitindexpaths, ipath); + } + + /* + * If there were ScalarArrayOpExpr clauses that the index can't handle + * natively, generate bitmap scan paths relying on executor-managed + * ScalarArrayOpExpr. + */ + if (skip_nonnative_saop) + { + indexpaths = build_index_paths(root, rel, + index, clauses, + false, + ST_BITMAPSCAN, + NULL, + NULL); + *bitindexpaths = list_concat(*bitindexpaths, indexpaths); + } +} + +/* + * build_index_paths + * Given an index and a set of index clauses for it, construct zero + * or more IndexPaths. It also constructs zero or more partial IndexPaths. + * + * We return a list of paths because (1) this routine checks some cases + * that should cause us to not generate any IndexPath, and (2) in some + * cases we want to consider both a forward and a backward scan, so as + * to obtain both sort orders. Note that the paths are just returned + * to the caller and not immediately fed to add_path(). + * + * At top level, useful_predicate should be exactly the index's predOK flag + * (ie, true if it has a predicate that was proven from the restriction + * clauses). When working on an arm of an OR clause, useful_predicate + * should be true if the predicate required the current OR list to be proven. + * Note that this routine should never be called at all if the index has an + * unprovable predicate. + * + * scantype indicates whether we want to create plain indexscans, bitmap + * indexscans, or both. When it's ST_BITMAPSCAN, we will not consider + * index ordering while deciding if a Path is worth generating. + * + * If skip_nonnative_saop is non-NULL, we ignore ScalarArrayOpExpr clauses + * unless the index AM supports them directly, and we set *skip_nonnative_saop + * to true if we found any such clauses (caller must initialize the variable + * to false). If it's NULL, we do not ignore ScalarArrayOpExpr clauses. + * + * If skip_lower_saop is non-NULL, we ignore ScalarArrayOpExpr clauses for + * non-first index columns, and we set *skip_lower_saop to true if we found + * any such clauses (caller must initialize the variable to false). If it's + * NULL, we do not ignore non-first ScalarArrayOpExpr clauses, but they will + * result in considering the scan's output to be unordered. + * + * 'rel' is the index's heap relation + * 'index' is the index for which we want to generate paths + * 'clauses' is the collection of indexable clauses (IndexClause nodes) + * 'useful_predicate' indicates whether the index has a useful predicate + * 'scantype' indicates whether we need plain or bitmap scan support + * 'skip_nonnative_saop' indicates whether to accept SAOP if index AM doesn't + * 'skip_lower_saop' indicates whether to accept non-first-column SAOP + */ +static List * +build_index_paths(PlannerInfo *root, RelOptInfo *rel, + IndexOptInfo *index, IndexClauseSet *clauses, + bool useful_predicate, + ScanTypeControl scantype, + bool *skip_nonnative_saop, + bool *skip_lower_saop) +{ + List *result = NIL; + IndexPath *ipath; + List *index_clauses; + Relids outer_relids; + double loop_count; + List *orderbyclauses; + List *orderbyclausecols; + List *index_pathkeys; + List *useful_pathkeys; + bool found_lower_saop_clause; + bool pathkeys_possibly_useful; + bool index_is_ordered; + bool index_only_scan; + int indexcol; + + /* + * Check that index supports the desired scan type(s) + */ + switch (scantype) + { + case ST_INDEXSCAN: + if (!index->amhasgettuple) + return NIL; + break; + case ST_BITMAPSCAN: + if (!index->amhasgetbitmap) + return NIL; + break; + case ST_ANYSCAN: + /* either or both are OK */ + break; + } + + /* + * 1. Combine the per-column IndexClause lists into an overall list. + * + * In the resulting list, clauses are ordered by index key, so that the + * column numbers form a nondecreasing sequence. (This order is depended + * on by btree and possibly other places.) The list can be empty, if the + * index AM allows that. + * + * found_lower_saop_clause is set true if we accept a ScalarArrayOpExpr + * index clause for a non-first index column. This prevents us from + * assuming that the scan result is ordered. (Actually, the result is + * still ordered if there are equality constraints for all earlier + * columns, but it seems too expensive and non-modular for this code to be + * aware of that refinement.) + * + * We also build a Relids set showing which outer rels are required by the + * selected clauses. Any lateral_relids are included in that, but not + * otherwise accounted for. + */ + index_clauses = NIL; + found_lower_saop_clause = false; + outer_relids = bms_copy(rel->lateral_relids); + for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++) + { + ListCell *lc; + + foreach(lc, clauses->indexclauses[indexcol]) + { + IndexClause *iclause = (IndexClause *) lfirst(lc); + RestrictInfo *rinfo = iclause->rinfo; + + /* We might need to omit ScalarArrayOpExpr clauses */ + if (IsA(rinfo->clause, ScalarArrayOpExpr)) + { + if (!index->amsearcharray) + { + if (skip_nonnative_saop) + { + /* Ignore because not supported by index */ + *skip_nonnative_saop = true; + continue; + } + /* Caller had better intend this only for bitmap scan */ + Assert(scantype == ST_BITMAPSCAN); + } + if (indexcol > 0) + { + if (skip_lower_saop) + { + /* Caller doesn't want to lose index ordering */ + *skip_lower_saop = true; + continue; + } + found_lower_saop_clause = true; + } + } + + /* OK to include this clause */ + index_clauses = lappend(index_clauses, iclause); + outer_relids = bms_add_members(outer_relids, + rinfo->clause_relids); + } + + /* + * If no clauses match the first index column, check for amoptionalkey + * restriction. We can't generate a scan over an index with + * amoptionalkey = false unless there's at least one index clause. + * (When working on columns after the first, this test cannot fail. It + * is always okay for columns after the first to not have any + * clauses.) + */ + if (index_clauses == NIL && !index->amoptionalkey) + return NIL; + } + + /* We do not want the index's rel itself listed in outer_relids */ + outer_relids = bms_del_member(outer_relids, rel->relid); + /* Enforce convention that outer_relids is exactly NULL if empty */ + if (bms_is_empty(outer_relids)) + outer_relids = NULL; + + /* Compute loop_count for cost estimation purposes */ + loop_count = get_loop_count(root, rel->relid, outer_relids); + + /* + * 2. Compute pathkeys describing index's ordering, if any, then see how + * many of them are actually useful for this query. This is not relevant + * if we are only trying to build bitmap indexscans, nor if we have to + * assume the scan is unordered. + */ + pathkeys_possibly_useful = (scantype != ST_BITMAPSCAN && + !found_lower_saop_clause && + has_useful_pathkeys(root, rel)); + index_is_ordered = (index->sortopfamily != NULL); + if (index_is_ordered && pathkeys_possibly_useful) + { + index_pathkeys = build_index_pathkeys(root, index, + ForwardScanDirection); + useful_pathkeys = truncate_useless_pathkeys(root, rel, + index_pathkeys); + orderbyclauses = NIL; + orderbyclausecols = NIL; + } + else if (index->amcanorderbyop && pathkeys_possibly_useful) + { + /* see if we can generate ordering operators for query_pathkeys */ + match_pathkeys_to_index(index, root->query_pathkeys, + &orderbyclauses, + &orderbyclausecols); + if (orderbyclauses) + useful_pathkeys = root->query_pathkeys; + else + useful_pathkeys = NIL; + } + else + { + useful_pathkeys = NIL; + orderbyclauses = NIL; + orderbyclausecols = NIL; + } + + /* + * 3. Check if an index-only scan is possible. If we're not building + * plain indexscans, this isn't relevant since bitmap scans don't support + * index data retrieval anyway. + */ + index_only_scan = (scantype != ST_BITMAPSCAN && + check_index_only(rel, index)); + + /* + * 4. Generate an indexscan path if there are relevant restriction clauses + * in the current clauses, OR the index ordering is potentially useful for + * later merging or final output ordering, OR the index has a useful + * predicate, OR an index-only scan is possible. + */ + if (index_clauses != NIL || useful_pathkeys != NIL || useful_predicate || + index_only_scan) + { + ipath = create_index_path(root, index, + index_clauses, + orderbyclauses, + orderbyclausecols, + useful_pathkeys, + index_is_ordered ? + ForwardScanDirection : + NoMovementScanDirection, + index_only_scan, + outer_relids, + loop_count, + false); + result = lappend(result, ipath); + + /* + * If appropriate, consider parallel index scan. We don't allow + * parallel index scan for bitmap index scans. + */ + if (index->amcanparallel && + rel->consider_parallel && outer_relids == NULL && + scantype != ST_BITMAPSCAN) + { + ipath = create_index_path(root, index, + index_clauses, + orderbyclauses, + orderbyclausecols, + useful_pathkeys, + index_is_ordered ? + ForwardScanDirection : + NoMovementScanDirection, + index_only_scan, + outer_relids, + loop_count, + true); + + /* + * if, after costing the path, we find that it's not worth using + * parallel workers, just free it. + */ + if (ipath->path.parallel_workers > 0) + add_partial_path(rel, (Path *) ipath); + else + pfree(ipath); + } + } + + /* + * 5. If the index is ordered, a backwards scan might be interesting. + */ + if (index_is_ordered && pathkeys_possibly_useful) + { + index_pathkeys = build_index_pathkeys(root, index, + BackwardScanDirection); + useful_pathkeys = truncate_useless_pathkeys(root, rel, + index_pathkeys); + if (useful_pathkeys != NIL) + { + ipath = create_index_path(root, index, + index_clauses, + NIL, + NIL, + useful_pathkeys, + BackwardScanDirection, + index_only_scan, + outer_relids, + loop_count, + false); + result = lappend(result, ipath); + + /* If appropriate, consider parallel index scan */ + if (index->amcanparallel && + rel->consider_parallel && outer_relids == NULL && + scantype != ST_BITMAPSCAN) + { + ipath = create_index_path(root, index, + index_clauses, + NIL, + NIL, + useful_pathkeys, + BackwardScanDirection, + index_only_scan, + outer_relids, + loop_count, + true); + + /* + * if, after costing the path, we find that it's not worth + * using parallel workers, just free it. + */ + if (ipath->path.parallel_workers > 0) + add_partial_path(rel, (Path *) ipath); + else + pfree(ipath); + } + } + } + + return result; +} + +/* + * build_paths_for_OR + * Given a list of restriction clauses from one arm of an OR clause, + * construct all matching IndexPaths for the relation. + * + * Here we must scan all indexes of the relation, since a bitmap OR tree + * can use multiple indexes. + * + * The caller actually supplies two lists of restriction clauses: some + * "current" ones and some "other" ones. Both lists can be used freely + * to match keys of the index, but an index must use at least one of the + * "current" clauses to be considered usable. The motivation for this is + * examples like + * WHERE (x = 42) AND (... OR (y = 52 AND z = 77) OR ....) + * While we are considering the y/z subclause of the OR, we can use "x = 42" + * as one of the available index conditions; but we shouldn't match the + * subclause to any index on x alone, because such a Path would already have + * been generated at the upper level. So we could use an index on x,y,z + * or an index on x,y for the OR subclause, but not an index on just x. + * When dealing with a partial index, a match of the index predicate to + * one of the "current" clauses also makes the index usable. + * + * 'rel' is the relation for which we want to generate index paths + * 'clauses' is the current list of clauses (RestrictInfo nodes) + * 'other_clauses' is the list of additional upper-level clauses + */ +static List * +build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel, + List *clauses, List *other_clauses) +{ + List *result = NIL; + List *all_clauses = NIL; /* not computed till needed */ + ListCell *lc; + + foreach(lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + IndexClauseSet clauseset; + List *indexpaths; + bool useful_predicate; + + /* Ignore index if it doesn't support bitmap scans */ + if (!index->amhasgetbitmap) + continue; + + /* + * Ignore partial indexes that do not match the query. If a partial + * index is marked predOK then we know it's OK. Otherwise, we have to + * test whether the added clauses are sufficient to imply the + * predicate. If so, we can use the index in the current context. + * + * We set useful_predicate to true iff the predicate was proven using + * the current set of clauses. This is needed to prevent matching a + * predOK index to an arm of an OR, which would be a legal but + * pointlessly inefficient plan. (A better plan will be generated by + * just scanning the predOK index alone, no OR.) + */ + useful_predicate = false; + if (index->indpred != NIL) + { + if (index->predOK) + { + /* Usable, but don't set useful_predicate */ + } + else + { + /* Form all_clauses if not done already */ + if (all_clauses == NIL) + all_clauses = list_concat_copy(clauses, other_clauses); + + if (!predicate_implied_by(index->indpred, all_clauses, false)) + continue; /* can't use it at all */ + + if (!predicate_implied_by(index->indpred, other_clauses, false)) + useful_predicate = true; + } + } + + /* + * Identify the restriction clauses that can match the index. + */ + MemSet(&clauseset, 0, sizeof(clauseset)); + match_clauses_to_index(root, clauses, index, &clauseset); + + /* + * If no matches so far, and the index predicate isn't useful, we + * don't want it. + */ + if (!clauseset.nonempty && !useful_predicate) + continue; + + /* + * Add "other" restriction clauses to the clauseset. + */ + match_clauses_to_index(root, other_clauses, index, &clauseset); + + /* + * Construct paths if possible. + */ + indexpaths = build_index_paths(root, rel, + index, &clauseset, + useful_predicate, + ST_BITMAPSCAN, + NULL, + NULL); + result = list_concat(result, indexpaths); + } + + return result; +} + +/* + * generate_bitmap_or_paths + * Look through the list of clauses to find OR clauses, and generate + * a BitmapOrPath for each one we can handle that way. Return a list + * of the generated BitmapOrPaths. + * + * other_clauses is a list of additional clauses that can be assumed true + * for the purpose of generating indexquals, but are not to be searched for + * ORs. (See build_paths_for_OR() for motivation.) + */ +static List * +generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, + List *clauses, List *other_clauses) +{ + List *result = NIL; + List *all_clauses; + ListCell *lc; + + /* + * We can use both the current and other clauses as context for + * build_paths_for_OR; no need to remove ORs from the lists. + */ + all_clauses = list_concat_copy(clauses, other_clauses); + + foreach(lc, clauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + List *pathlist; + Path *bitmapqual; + ListCell *j; + + /* Ignore RestrictInfos that aren't ORs */ + if (!restriction_is_or_clause(rinfo)) + continue; + + /* + * We must be able to match at least one index to each of the arms of + * the OR, else we can't use it. + */ + pathlist = NIL; + foreach(j, ((BoolExpr *) rinfo->orclause)->args) + { + Node *orarg = (Node *) lfirst(j); + List *indlist; + + /* OR arguments should be ANDs or sub-RestrictInfos */ + if (is_andclause(orarg)) + { + List *andargs = ((BoolExpr *) orarg)->args; + + indlist = build_paths_for_OR(root, rel, + andargs, + all_clauses); + + /* Recurse in case there are sub-ORs */ + indlist = list_concat(indlist, + generate_bitmap_or_paths(root, rel, + andargs, + all_clauses)); + } + else + { + RestrictInfo *rinfo = castNode(RestrictInfo, orarg); + List *orargs; + + Assert(!restriction_is_or_clause(rinfo)); + orargs = list_make1(rinfo); + + indlist = build_paths_for_OR(root, rel, + orargs, + all_clauses); + } + + /* + * If nothing matched this arm, we can't do anything with this OR + * clause. + */ + if (indlist == NIL) + { + pathlist = NIL; + break; + } + + /* + * OK, pick the most promising AND combination, and add it to + * pathlist. + */ + bitmapqual = choose_bitmap_and(root, rel, indlist); + pathlist = lappend(pathlist, bitmapqual); + } + + /* + * If we have a match for every arm, then turn them into a + * BitmapOrPath, and add to result list. + */ + if (pathlist != NIL) + { + bitmapqual = (Path *) create_bitmap_or_path(root, rel, pathlist); + result = lappend(result, bitmapqual); + } + } + + return result; +} + + +/* + * choose_bitmap_and + * Given a nonempty list of bitmap paths, AND them into one path. + * + * This is a nontrivial decision since we can legally use any subset of the + * given path set. We want to choose a good tradeoff between selectivity + * and cost of computing the bitmap. + * + * The result is either a single one of the inputs, or a BitmapAndPath + * combining multiple inputs. + */ +static Path * +choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths) +{ + int npaths = list_length(paths); + PathClauseUsage **pathinfoarray; + PathClauseUsage *pathinfo; + List *clauselist; + List *bestpaths = NIL; + Cost bestcost = 0; + int i, + j; + ListCell *l; + + Assert(npaths > 0); /* else caller error */ + if (npaths == 1) + return (Path *) linitial(paths); /* easy case */ + + /* + * In theory we should consider every nonempty subset of the given paths. + * In practice that seems like overkill, given the crude nature of the + * estimates, not to mention the possible effects of higher-level AND and + * OR clauses. Moreover, it's completely impractical if there are a large + * number of paths, since the work would grow as O(2^N). + * + * As a heuristic, we first check for paths using exactly the same sets of + * WHERE clauses + index predicate conditions, and reject all but the + * cheapest-to-scan in any such group. This primarily gets rid of indexes + * that include the interesting columns but also irrelevant columns. (In + * situations where the DBA has gone overboard on creating variant + * indexes, this can make for a very large reduction in the number of + * paths considered further.) + * + * We then sort the surviving paths with the cheapest-to-scan first, and + * for each path, consider using that path alone as the basis for a bitmap + * scan. Then we consider bitmap AND scans formed from that path plus + * each subsequent (higher-cost) path, adding on a subsequent path if it + * results in a reduction in the estimated total scan cost. This means we + * consider about O(N^2) rather than O(2^N) path combinations, which is + * quite tolerable, especially given than N is usually reasonably small + * because of the prefiltering step. The cheapest of these is returned. + * + * We will only consider AND combinations in which no two indexes use the + * same WHERE clause. This is a bit of a kluge: it's needed because + * costsize.c and clausesel.c aren't very smart about redundant clauses. + * They will usually double-count the redundant clauses, producing a + * too-small selectivity that makes a redundant AND step look like it + * reduces the total cost. Perhaps someday that code will be smarter and + * we can remove this limitation. (But note that this also defends + * against flat-out duplicate input paths, which can happen because + * match_join_clauses_to_index will find the same OR join clauses that + * extract_restriction_or_clauses has pulled OR restriction clauses out + * of.) + * + * For the same reason, we reject AND combinations in which an index + * predicate clause duplicates another clause. Here we find it necessary + * to be even stricter: we'll reject a partial index if any of its + * predicate clauses are implied by the set of WHERE clauses and predicate + * clauses used so far. This covers cases such as a condition "x = 42" + * used with a plain index, followed by a clauseless scan of a partial + * index "WHERE x >= 40 AND x < 50". The partial index has been accepted + * only because "x = 42" was present, and so allowing it would partially + * double-count selectivity. (We could use predicate_implied_by on + * regular qual clauses too, to have a more intelligent, but much more + * expensive, check for redundancy --- but in most cases simple equality + * seems to suffice.) + */ + + /* + * Extract clause usage info and detect any paths that use exactly the + * same set of clauses; keep only the cheapest-to-scan of any such groups. + * The surviving paths are put into an array for qsort'ing. + */ + pathinfoarray = (PathClauseUsage **) + palloc(npaths * sizeof(PathClauseUsage *)); + clauselist = NIL; + npaths = 0; + foreach(l, paths) + { + Path *ipath = (Path *) lfirst(l); + + pathinfo = classify_index_clause_usage(ipath, &clauselist); + + /* If it's unclassifiable, treat it as distinct from all others */ + if (pathinfo->unclassifiable) + { + pathinfoarray[npaths++] = pathinfo; + continue; + } + + for (i = 0; i < npaths; i++) + { + if (!pathinfoarray[i]->unclassifiable && + bms_equal(pathinfo->clauseids, pathinfoarray[i]->clauseids)) + break; + } + if (i < npaths) + { + /* duplicate clauseids, keep the cheaper one */ + Cost ncost; + Cost ocost; + Selectivity nselec; + Selectivity oselec; + + cost_bitmap_tree_node(pathinfo->path, &ncost, &nselec); + cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, &oselec); + if (ncost < ocost) + pathinfoarray[i] = pathinfo; + } + else + { + /* not duplicate clauseids, add to array */ + pathinfoarray[npaths++] = pathinfo; + } + } + + /* If only one surviving path, we're done */ + if (npaths == 1) + return pathinfoarray[0]->path; + + /* Sort the surviving paths by index access cost */ + qsort(pathinfoarray, npaths, sizeof(PathClauseUsage *), + path_usage_comparator); + + /* + * For each surviving index, consider it as an "AND group leader", and see + * whether adding on any of the later indexes results in an AND path with + * cheaper total cost than before. Then take the cheapest AND group. + * + * Note: paths that are either clauseless or unclassifiable will have + * empty clauseids, so that they will not be rejected by the clauseids + * filter here, nor will they cause later paths to be rejected by it. + */ + for (i = 0; i < npaths; i++) + { + Cost costsofar; + List *qualsofar; + Bitmapset *clauseidsofar; + + pathinfo = pathinfoarray[i]; + paths = list_make1(pathinfo->path); + costsofar = bitmap_scan_cost_est(root, rel, pathinfo->path); + qualsofar = list_concat_copy(pathinfo->quals, pathinfo->preds); + clauseidsofar = bms_copy(pathinfo->clauseids); + + for (j = i + 1; j < npaths; j++) + { + Cost newcost; + + pathinfo = pathinfoarray[j]; + /* Check for redundancy */ + if (bms_overlap(pathinfo->clauseids, clauseidsofar)) + continue; /* consider it redundant */ + if (pathinfo->preds) + { + bool redundant = false; + + /* we check each predicate clause separately */ + foreach(l, pathinfo->preds) + { + Node *np = (Node *) lfirst(l); + + if (predicate_implied_by(list_make1(np), qualsofar, false)) + { + redundant = true; + break; /* out of inner foreach loop */ + } + } + if (redundant) + continue; + } + /* tentatively add new path to paths, so we can estimate cost */ + paths = lappend(paths, pathinfo->path); + newcost = bitmap_and_cost_est(root, rel, paths); + if (newcost < costsofar) + { + /* keep new path in paths, update subsidiary variables */ + costsofar = newcost; + qualsofar = list_concat(qualsofar, pathinfo->quals); + qualsofar = list_concat(qualsofar, pathinfo->preds); + clauseidsofar = bms_add_members(clauseidsofar, + pathinfo->clauseids); + } + else + { + /* reject new path, remove it from paths list */ + paths = list_truncate(paths, list_length(paths) - 1); + } + } + + /* Keep the cheapest AND-group (or singleton) */ + if (i == 0 || costsofar < bestcost) + { + bestpaths = paths; + bestcost = costsofar; + } + + /* some easy cleanup (we don't try real hard though) */ + list_free(qualsofar); + } + + if (list_length(bestpaths) == 1) + return (Path *) linitial(bestpaths); /* no need for AND */ + return (Path *) create_bitmap_and_path(root, rel, bestpaths); +} + +/* qsort comparator to sort in increasing index access cost order */ +static int +path_usage_comparator(const void *a, const void *b) +{ + PathClauseUsage *pa = *(PathClauseUsage *const *) a; + PathClauseUsage *pb = *(PathClauseUsage *const *) b; + Cost acost; + Cost bcost; + Selectivity aselec; + Selectivity bselec; + + cost_bitmap_tree_node(pa->path, &acost, &aselec); + cost_bitmap_tree_node(pb->path, &bcost, &bselec); + + /* + * If costs are the same, sort by selectivity. + */ + if (acost < bcost) + return -1; + if (acost > bcost) + return 1; + + if (aselec < bselec) + return -1; + if (aselec > bselec) + return 1; + + return 0; +} + +/* + * Estimate the cost of actually executing a bitmap scan with a single + * index path (which could be a BitmapAnd or BitmapOr node). + */ +static Cost +bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, Path *ipath) +{ + BitmapHeapPath bpath; + + /* Set up a dummy BitmapHeapPath */ + bpath.path.type = T_BitmapHeapPath; + bpath.path.pathtype = T_BitmapHeapScan; + bpath.path.parent = rel; + bpath.path.pathtarget = rel->reltarget; + bpath.path.param_info = ipath->param_info; + bpath.path.pathkeys = NIL; + bpath.bitmapqual = ipath; + + /* + * Check the cost of temporary path without considering parallelism. + * Parallel bitmap heap path will be considered at later stage. + */ + bpath.path.parallel_workers = 0; + + /* Now we can do cost_bitmap_heap_scan */ + cost_bitmap_heap_scan(&bpath.path, root, rel, + bpath.path.param_info, + ipath, + get_loop_count(root, rel->relid, + PATH_REQ_OUTER(ipath))); + + return bpath.path.total_cost; +} + +/* + * Estimate the cost of actually executing a BitmapAnd scan with the given + * inputs. + */ +static Cost +bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths) +{ + BitmapAndPath *apath; + + /* + * Might as well build a real BitmapAndPath here, as the work is slightly + * too complicated to be worth repeating just to save one palloc. + */ + apath = create_bitmap_and_path(root, rel, paths); + + return bitmap_scan_cost_est(root, rel, (Path *) apath); +} + + +/* + * classify_index_clause_usage + * Construct a PathClauseUsage struct describing the WHERE clauses and + * index predicate clauses used by the given indexscan path. + * We consider two clauses the same if they are equal(). + * + * At some point we might want to migrate this info into the Path data + * structure proper, but for the moment it's only needed within + * choose_bitmap_and(). + * + * *clauselist is used and expanded as needed to identify all the distinct + * clauses seen across successive calls. Caller must initialize it to NIL + * before first call of a set. + */ +static PathClauseUsage * +classify_index_clause_usage(Path *path, List **clauselist) +{ + PathClauseUsage *result; + Bitmapset *clauseids; + ListCell *lc; + + result = (PathClauseUsage *) palloc(sizeof(PathClauseUsage)); + result->path = path; + + /* Recursively find the quals and preds used by the path */ + result->quals = NIL; + result->preds = NIL; + find_indexpath_quals(path, &result->quals, &result->preds); + + /* + * Some machine-generated queries have outlandish numbers of qual clauses. + * To avoid getting into O(N^2) behavior even in this preliminary + * classification step, we want to limit the number of entries we can + * accumulate in *clauselist. Treat any path with more than 100 quals + + * preds as unclassifiable, which will cause calling code to consider it + * distinct from all other paths. + */ + if (list_length(result->quals) + list_length(result->preds) > 100) + { + result->clauseids = NULL; + result->unclassifiable = true; + return result; + } + + /* Build up a bitmapset representing the quals and preds */ + clauseids = NULL; + foreach(lc, result->quals) + { + Node *node = (Node *) lfirst(lc); + + clauseids = bms_add_member(clauseids, + find_list_position(node, clauselist)); + } + foreach(lc, result->preds) + { + Node *node = (Node *) lfirst(lc); + + clauseids = bms_add_member(clauseids, + find_list_position(node, clauselist)); + } + result->clauseids = clauseids; + result->unclassifiable = false; + + return result; +} + + +/* + * find_indexpath_quals + * + * Given the Path structure for a plain or bitmap indexscan, extract lists + * of all the index clauses and index predicate conditions used in the Path. + * These are appended to the initial contents of *quals and *preds (hence + * caller should initialize those to NIL). + * + * Note we are not trying to produce an accurate representation of the AND/OR + * semantics of the Path, but just find out all the base conditions used. + * + * The result lists contain pointers to the expressions used in the Path, + * but all the list cells are freshly built, so it's safe to destructively + * modify the lists (eg, by concat'ing with other lists). + */ +static void +find_indexpath_quals(Path *bitmapqual, List **quals, List **preds) +{ + if (IsA(bitmapqual, BitmapAndPath)) + { + BitmapAndPath *apath = (BitmapAndPath *) bitmapqual; + ListCell *l; + + foreach(l, apath->bitmapquals) + { + find_indexpath_quals((Path *) lfirst(l), quals, preds); + } + } + else if (IsA(bitmapqual, BitmapOrPath)) + { + BitmapOrPath *opath = (BitmapOrPath *) bitmapqual; + ListCell *l; + + foreach(l, opath->bitmapquals) + { + find_indexpath_quals((Path *) lfirst(l), quals, preds); + } + } + else if (IsA(bitmapqual, IndexPath)) + { + IndexPath *ipath = (IndexPath *) bitmapqual; + ListCell *l; + + foreach(l, ipath->indexclauses) + { + IndexClause *iclause = (IndexClause *) lfirst(l); + + *quals = lappend(*quals, iclause->rinfo->clause); + } + *preds = list_concat(*preds, ipath->indexinfo->indpred); + } + else + elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual)); +} + + +/* + * find_list_position + * Return the given node's position (counting from 0) in the given + * list of nodes. If it's not equal() to any existing list member, + * add it at the end, and return that position. + */ +static int +find_list_position(Node *node, List **nodelist) +{ + int i; + ListCell *lc; + + i = 0; + foreach(lc, *nodelist) + { + Node *oldnode = (Node *) lfirst(lc); + + if (equal(node, oldnode)) + return i; + i++; + } + + *nodelist = lappend(*nodelist, node); + + return i; +} + + +/* + * check_index_only + * Determine whether an index-only scan is possible for this index. + */ +static bool +check_index_only(RelOptInfo *rel, IndexOptInfo *index) +{ + bool result; + Bitmapset *attrs_used = NULL; + Bitmapset *index_canreturn_attrs = NULL; + Bitmapset *index_cannotreturn_attrs = NULL; + ListCell *lc; + int i; + + /* Index-only scans must be enabled */ + if (!enable_indexonlyscan) + return false; + + /* + * Check that all needed attributes of the relation are available from the + * index. + */ + + /* + * First, identify all the attributes needed for joins or final output. + * Note: we must look at rel's targetlist, not the attr_needed data, + * because attr_needed isn't computed for inheritance child rels. + */ + pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &attrs_used); + + /* + * Add all the attributes used by restriction clauses; but consider only + * those clauses not implied by the index predicate, since ones that are + * so implied don't need to be checked explicitly in the plan. + * + * Note: attributes used only in index quals would not be needed at + * runtime either, if we are certain that the index is not lossy. However + * it'd be complicated to account for that accurately, and it doesn't + * matter in most cases, since we'd conclude that such attributes are + * available from the index anyway. + */ + foreach(lc, index->indrestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used); + } + + /* + * Construct a bitmapset of columns that the index can return back in an + * index-only scan. If there are multiple index columns containing the + * same attribute, all of them must be capable of returning the value, + * since we might recheck operators on any of them. (Potentially we could + * be smarter about that, but it's such a weird situation that it doesn't + * seem worth spending a lot of sweat on.) + */ + for (i = 0; i < index->ncolumns; i++) + { + int attno = index->indexkeys[i]; + + /* + * For the moment, we just ignore index expressions. It might be nice + * to do something with them, later. + */ + if (attno == 0) + continue; + + if (index->canreturn[i]) + index_canreturn_attrs = + bms_add_member(index_canreturn_attrs, + attno - FirstLowInvalidHeapAttributeNumber); + else + index_cannotreturn_attrs = + bms_add_member(index_cannotreturn_attrs, + attno - FirstLowInvalidHeapAttributeNumber); + } + + index_canreturn_attrs = bms_del_members(index_canreturn_attrs, + index_cannotreturn_attrs); + + /* Do we have all the necessary attributes? */ + result = bms_is_subset(attrs_used, index_canreturn_attrs); + + bms_free(attrs_used); + bms_free(index_canreturn_attrs); + bms_free(index_cannotreturn_attrs); + + return result; +} + +/* + * get_loop_count + * Choose the loop count estimate to use for costing a parameterized path + * with the given set of outer relids. + * + * Since we produce parameterized paths before we've begun to generate join + * relations, it's impossible to predict exactly how many times a parameterized + * path will be iterated; we don't know the size of the relation that will be + * on the outside of the nestloop. However, we should try to account for + * multiple iterations somehow in costing the path. The heuristic embodied + * here is to use the rowcount of the smallest other base relation needed in + * the join clauses used by the path. (We could alternatively consider the + * largest one, but that seems too optimistic.) This is of course the right + * answer for single-other-relation cases, and it seems like a reasonable + * zero-order approximation for multiway-join cases. + * + * In addition, we check to see if the other side of each join clause is on + * the inside of some semijoin that the current relation is on the outside of. + * If so, the only way that a parameterized path could be used is if the + * semijoin RHS has been unique-ified, so we should use the number of unique + * RHS rows rather than using the relation's raw rowcount. + * + * Note: for this to work, allpaths.c must establish all baserel size + * estimates before it begins to compute paths, or at least before it + * calls create_index_paths(). + */ +static double +get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids) +{ + double result; + int outer_relid; + + /* For a non-parameterized path, just return 1.0 quickly */ + if (outer_relids == NULL) + return 1.0; + + result = 0.0; + outer_relid = -1; + while ((outer_relid = bms_next_member(outer_relids, outer_relid)) >= 0) + { + RelOptInfo *outer_rel; + double rowcount; + + /* Paranoia: ignore bogus relid indexes */ + if (outer_relid >= root->simple_rel_array_size) + continue; + outer_rel = root->simple_rel_array[outer_relid]; + if (outer_rel == NULL) + continue; + Assert(outer_rel->relid == outer_relid); /* sanity check on array */ + + /* Other relation could be proven empty, if so ignore */ + if (IS_DUMMY_REL(outer_rel)) + continue; + + /* Otherwise, rel's rows estimate should be valid by now */ + Assert(outer_rel->rows > 0); + + /* Check to see if rel is on the inside of any semijoins */ + rowcount = adjust_rowcount_for_semijoins(root, + cur_relid, + outer_relid, + outer_rel->rows); + + /* Remember smallest row count estimate among the outer rels */ + if (result == 0.0 || result > rowcount) + result = rowcount; + } + /* Return 1.0 if we found no valid relations (shouldn't happen) */ + return (result > 0.0) ? result : 1.0; +} + +/* + * Check to see if outer_relid is on the inside of any semijoin that cur_relid + * is on the outside of. If so, replace rowcount with the estimated number of + * unique rows from the semijoin RHS (assuming that's smaller, which it might + * not be). The estimate is crude but it's the best we can do at this stage + * of the proceedings. + */ +static double +adjust_rowcount_for_semijoins(PlannerInfo *root, + Index cur_relid, + Index outer_relid, + double rowcount) +{ + ListCell *lc; + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + + if (sjinfo->jointype == JOIN_SEMI && + bms_is_member(cur_relid, sjinfo->syn_lefthand) && + bms_is_member(outer_relid, sjinfo->syn_righthand)) + { + /* Estimate number of unique-ified rows */ + double nraw; + double nunique; + + nraw = approximate_joinrel_size(root, sjinfo->syn_righthand); + nunique = estimate_num_groups(root, + sjinfo->semi_rhs_exprs, + nraw, + NULL, + NULL); + if (rowcount > nunique) + rowcount = nunique; + } + } + return rowcount; +} + +/* + * Make an approximate estimate of the size of a joinrel. + * + * We don't have enough info at this point to get a good estimate, so we + * just multiply the base relation sizes together. Fortunately, this is + * the right answer anyway for the most common case with a single relation + * on the RHS of a semijoin. Also, estimate_num_groups() has only a weak + * dependency on its input_rows argument (it basically uses it as a clamp). + * So we might be able to get a fairly decent end result even with a severe + * overestimate of the RHS's raw size. + */ +static double +approximate_joinrel_size(PlannerInfo *root, Relids relids) +{ + double rowcount = 1.0; + int relid; + + relid = -1; + while ((relid = bms_next_member(relids, relid)) >= 0) + { + RelOptInfo *rel; + + /* Paranoia: ignore bogus relid indexes */ + if (relid >= root->simple_rel_array_size) + continue; + rel = root->simple_rel_array[relid]; + if (rel == NULL) + continue; + Assert(rel->relid == relid); /* sanity check on array */ + + /* Relation could be proven empty, if so ignore */ + if (IS_DUMMY_REL(rel)) + continue; + + /* Otherwise, rel's rows estimate should be valid by now */ + Assert(rel->rows > 0); + + /* Accumulate product */ + rowcount *= rel->rows; + } + return rowcount; +} + + +/**************************************************************************** + * ---- ROUTINES TO CHECK QUERY CLAUSES ---- + ****************************************************************************/ + +/* + * match_restriction_clauses_to_index + * Identify restriction clauses for the rel that match the index. + * Matching clauses are added to *clauseset. + */ +static void +match_restriction_clauses_to_index(PlannerInfo *root, + IndexOptInfo *index, + IndexClauseSet *clauseset) +{ + /* We can ignore clauses that are implied by the index predicate */ + match_clauses_to_index(root, index->indrestrictinfo, index, clauseset); +} + +/* + * match_join_clauses_to_index + * Identify join clauses for the rel that match the index. + * Matching clauses are added to *clauseset. + * Also, add any potentially usable join OR clauses to *joinorclauses. + */ +static void +match_join_clauses_to_index(PlannerInfo *root, + RelOptInfo *rel, IndexOptInfo *index, + IndexClauseSet *clauseset, + List **joinorclauses) +{ + ListCell *lc; + + /* Scan the rel's join clauses */ + foreach(lc, rel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* Check if clause can be moved to this rel */ + if (!join_clause_is_movable_to(rinfo, rel)) + continue; + + /* Potentially usable, so see if it matches the index or is an OR */ + if (restriction_is_or_clause(rinfo)) + *joinorclauses = lappend(*joinorclauses, rinfo); + else + match_clause_to_index(root, rinfo, index, clauseset); + } +} + +/* + * match_eclass_clauses_to_index + * Identify EquivalenceClass join clauses for the rel that match the index. + * Matching clauses are added to *clauseset. + */ +static void +match_eclass_clauses_to_index(PlannerInfo *root, IndexOptInfo *index, + IndexClauseSet *clauseset) +{ + int indexcol; + + /* No work if rel is not in any such ECs */ + if (!index->rel->has_eclass_joins) + return; + + for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++) + { + ec_member_matches_arg arg; + List *clauses; + + /* Generate clauses, skipping any that join to lateral_referencers */ + arg.index = index; + arg.indexcol = indexcol; + clauses = generate_implied_equalities_for_column(root, + index->rel, + ec_member_matches_indexcol, + (void *) &arg, + index->rel->lateral_referencers); + + /* + * We have to check whether the results actually do match the index, + * since for non-btree indexes the EC's equality operators might not + * be in the index opclass (cf ec_member_matches_indexcol). + */ + match_clauses_to_index(root, clauses, index, clauseset); + } +} + +/* + * match_clauses_to_index + * Perform match_clause_to_index() for each clause in a list. + * Matching clauses are added to *clauseset. + */ +static void +match_clauses_to_index(PlannerInfo *root, + List *clauses, + IndexOptInfo *index, + IndexClauseSet *clauseset) +{ + ListCell *lc; + + foreach(lc, clauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + match_clause_to_index(root, rinfo, index, clauseset); + } +} + +/* + * match_clause_to_index + * Test whether a qual clause can be used with an index. + * + * If the clause is usable, add an IndexClause entry for it to the appropriate + * list in *clauseset. (*clauseset must be initialized to zeroes before first + * call.) + * + * Note: in some circumstances we may find the same RestrictInfos coming from + * multiple places. Defend against redundant outputs by refusing to add a + * clause twice (pointer equality should be a good enough check for this). + * + * Note: it's possible that a badly-defined index could have multiple matching + * columns. We always select the first match if so; this avoids scenarios + * wherein we get an inflated idea of the index's selectivity by using the + * same clause multiple times with different index columns. + */ +static void +match_clause_to_index(PlannerInfo *root, + RestrictInfo *rinfo, + IndexOptInfo *index, + IndexClauseSet *clauseset) +{ + int indexcol; + + /* + * Never match pseudoconstants to indexes. (Normally a match could not + * happen anyway, since a pseudoconstant clause couldn't contain a Var, + * but what if someone builds an expression index on a constant? It's not + * totally unreasonable to do so with a partial index, either.) + */ + if (rinfo->pseudoconstant) + return; + + /* + * If clause can't be used as an indexqual because it must wait till after + * some lower-security-level restriction clause, reject it. + */ + if (!restriction_is_securely_promotable(rinfo, index->rel)) + return; + + /* OK, check each index key column for a match */ + for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++) + { + IndexClause *iclause; + ListCell *lc; + + /* Ignore duplicates */ + foreach(lc, clauseset->indexclauses[indexcol]) + { + IndexClause *iclause = (IndexClause *) lfirst(lc); + + if (iclause->rinfo == rinfo) + return; + } + + /* OK, try to match the clause to the index column */ + iclause = match_clause_to_indexcol(root, + rinfo, + indexcol, + index); + if (iclause) + { + /* Success, so record it */ + clauseset->indexclauses[indexcol] = + lappend(clauseset->indexclauses[indexcol], iclause); + clauseset->nonempty = true; + return; + } + } +} + +/* + * match_clause_to_indexcol() + * Determine whether a restriction clause matches a column of an index, + * and if so, build an IndexClause node describing the details. + * + * To match an index normally, an operator clause: + * + * (1) must be in the form (indexkey op const) or (const op indexkey); + * and + * (2) must contain an operator which is in the index's operator family + * for this column; and + * (3) must match the collation of the index, if collation is relevant. + * + * Our definition of "const" is exceedingly liberal: we allow anything that + * doesn't involve a volatile function or a Var of the index's relation. + * In particular, Vars belonging to other relations of the query are + * accepted here, since a clause of that form can be used in a + * parameterized indexscan. It's the responsibility of higher code levels + * to manage restriction and join clauses appropriately. + * + * Note: we do need to check for Vars of the index's relation on the + * "const" side of the clause, since clauses like (a.f1 OP (b.f2 OP a.f3)) + * are not processable by a parameterized indexscan on a.f1, whereas + * something like (a.f1 OP (b.f2 OP c.f3)) is. + * + * Presently, the executor can only deal with indexquals that have the + * indexkey on the left, so we can only use clauses that have the indexkey + * on the right if we can commute the clause to put the key on the left. + * We handle that by generating an IndexClause with the correctly-commuted + * opclause as a derived indexqual. + * + * If the index has a collation, the clause must have the same collation. + * For collation-less indexes, we assume it doesn't matter; this is + * necessary for cases like "hstore ? text", wherein hstore's operators + * don't care about collation but the clause will get marked with a + * collation anyway because of the text argument. (This logic is + * embodied in the macro IndexCollMatchesExprColl.) + * + * It is also possible to match RowCompareExpr clauses to indexes (but + * currently, only btree indexes handle this). + * + * It is also possible to match ScalarArrayOpExpr clauses to indexes, when + * the clause is of the form "indexkey op ANY (arrayconst)". + * + * For boolean indexes, it is also possible to match the clause directly + * to the indexkey; or perhaps the clause is (NOT indexkey). + * + * And, last but not least, some operators and functions can be processed + * to derive (typically lossy) indexquals from a clause that isn't in + * itself indexable. If we see that any operand of an OpExpr or FuncExpr + * matches the index key, and the function has a planner support function + * attached to it, we'll invoke the support function to see if such an + * indexqual can be built. + * + * 'rinfo' is the clause to be tested (as a RestrictInfo node). + * 'indexcol' is a column number of 'index' (counting from 0). + * 'index' is the index of interest. + * + * Returns an IndexClause if the clause can be used with this index key, + * or NULL if not. + * + * NOTE: returns NULL if clause is an OR or AND clause; it is the + * responsibility of higher-level routines to cope with those. + */ +static IndexClause * +match_clause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index) +{ + IndexClause *iclause; + Expr *clause = rinfo->clause; + Oid opfamily; + + Assert(indexcol < index->nkeycolumns); + + /* + * Historically this code has coped with NULL clauses. That's probably + * not possible anymore, but we might as well continue to cope. + */ + if (clause == NULL) + return NULL; + + /* First check for boolean-index cases. */ + opfamily = index->opfamily[indexcol]; + if (IsBooleanOpfamily(opfamily)) + { + iclause = match_boolean_index_clause(root, rinfo, indexcol, index); + if (iclause) + return iclause; + } + + /* + * Clause must be an opclause, funcclause, ScalarArrayOpExpr, or + * RowCompareExpr. Or, if the index supports it, we can handle IS + * NULL/NOT NULL clauses. + */ + if (IsA(clause, OpExpr)) + { + return match_opclause_to_indexcol(root, rinfo, indexcol, index); + } + else if (IsA(clause, FuncExpr)) + { + return match_funcclause_to_indexcol(root, rinfo, indexcol, index); + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + return match_saopclause_to_indexcol(root, rinfo, indexcol, index); + } + else if (IsA(clause, RowCompareExpr)) + { + return match_rowcompare_to_indexcol(root, rinfo, indexcol, index); + } + else if (index->amsearchnulls && IsA(clause, NullTest)) + { + NullTest *nt = (NullTest *) clause; + + if (!nt->argisrow && + match_index_to_operand((Node *) nt->arg, indexcol, index)) + { + iclause = makeNode(IndexClause); + iclause->rinfo = rinfo; + iclause->indexquals = list_make1(rinfo); + iclause->lossy = false; + iclause->indexcol = indexcol; + iclause->indexcols = NIL; + return iclause; + } + } + + return NULL; +} + +/* + * match_boolean_index_clause + * Recognize restriction clauses that can be matched to a boolean index. + * + * The idea here is that, for an index on a boolean column that supports the + * BooleanEqualOperator, we can transform a plain reference to the indexkey + * into "indexkey = true", or "NOT indexkey" into "indexkey = false", etc, + * so as to make the expression indexable using the index's "=" operator. + * Since Postgres 8.1, we must do this because constant simplification does + * the reverse transformation; without this code there'd be no way to use + * such an index at all. + * + * This should be called only when IsBooleanOpfamily() recognizes the + * index's operator family. We check to see if the clause matches the + * index's key, and if so, build a suitable IndexClause. + */ +static IndexClause * +match_boolean_index_clause(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index) +{ + Node *clause = (Node *) rinfo->clause; + Expr *op = NULL; + + /* Direct match? */ + if (match_index_to_operand(clause, indexcol, index)) + { + /* convert to indexkey = TRUE */ + op = make_opclause(BooleanEqualOperator, BOOLOID, false, + (Expr *) clause, + (Expr *) makeBoolConst(true, false), + InvalidOid, InvalidOid); + } + /* NOT clause? */ + else if (is_notclause(clause)) + { + Node *arg = (Node *) get_notclausearg((Expr *) clause); + + if (match_index_to_operand(arg, indexcol, index)) + { + /* convert to indexkey = FALSE */ + op = make_opclause(BooleanEqualOperator, BOOLOID, false, + (Expr *) arg, + (Expr *) makeBoolConst(false, false), + InvalidOid, InvalidOid); + } + } + + /* + * Since we only consider clauses at top level of WHERE, we can convert + * indexkey IS TRUE and indexkey IS FALSE to index searches as well. The + * different meaning for NULL isn't important. + */ + else if (clause && IsA(clause, BooleanTest)) + { + BooleanTest *btest = (BooleanTest *) clause; + Node *arg = (Node *) btest->arg; + + if (btest->booltesttype == IS_TRUE && + match_index_to_operand(arg, indexcol, index)) + { + /* convert to indexkey = TRUE */ + op = make_opclause(BooleanEqualOperator, BOOLOID, false, + (Expr *) arg, + (Expr *) makeBoolConst(true, false), + InvalidOid, InvalidOid); + } + else if (btest->booltesttype == IS_FALSE && + match_index_to_operand(arg, indexcol, index)) + { + /* convert to indexkey = FALSE */ + op = make_opclause(BooleanEqualOperator, BOOLOID, false, + (Expr *) arg, + (Expr *) makeBoolConst(false, false), + InvalidOid, InvalidOid); + } + } + + /* + * If we successfully made an operator clause from the given qual, we must + * wrap it in an IndexClause. It's not lossy. + */ + if (op) + { + IndexClause *iclause = makeNode(IndexClause); + + iclause->rinfo = rinfo; + iclause->indexquals = list_make1(make_simple_restrictinfo(root, op)); + iclause->lossy = false; + iclause->indexcol = indexcol; + iclause->indexcols = NIL; + return iclause; + } + + return NULL; +} + +/* + * match_opclause_to_indexcol() + * Handles the OpExpr case for match_clause_to_indexcol(), + * which see for comments. + */ +static IndexClause * +match_opclause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index) +{ + IndexClause *iclause; + OpExpr *clause = (OpExpr *) rinfo->clause; + Node *leftop, + *rightop; + Oid expr_op; + Oid expr_coll; + Index index_relid; + Oid opfamily; + Oid idxcollation; + + /* + * Only binary operators need apply. (In theory, a planner support + * function could do something with a unary operator, but it seems + * unlikely to be worth the cycles to check.) + */ + if (list_length(clause->args) != 2) + return NULL; + + leftop = (Node *) linitial(clause->args); + rightop = (Node *) lsecond(clause->args); + expr_op = clause->opno; + expr_coll = clause->inputcollid; + + index_relid = index->rel->relid; + opfamily = index->opfamily[indexcol]; + idxcollation = index->indexcollations[indexcol]; + + /* + * Check for clauses of the form: (indexkey operator constant) or + * (constant operator indexkey). See match_clause_to_indexcol's notes + * about const-ness. + * + * Note that we don't ask the support function about clauses that don't + * have one of these forms. Again, in principle it might be possible to + * do something, but it seems unlikely to be worth the cycles to check. + */ + if (match_index_to_operand(leftop, indexcol, index) && + !bms_is_member(index_relid, rinfo->right_relids) && + !contain_volatile_functions(rightop)) + { + if (IndexCollMatchesExprColl(idxcollation, expr_coll) && + op_in_opfamily(expr_op, opfamily)) + { + iclause = makeNode(IndexClause); + iclause->rinfo = rinfo; + iclause->indexquals = list_make1(rinfo); + iclause->lossy = false; + iclause->indexcol = indexcol; + iclause->indexcols = NIL; + return iclause; + } + + /* + * If we didn't find a member of the index's opfamily, try the support + * function for the operator's underlying function. + */ + set_opfuncid(clause); /* make sure we have opfuncid */ + return get_index_clause_from_support(root, + rinfo, + clause->opfuncid, + 0, /* indexarg on left */ + indexcol, + index); + } + + if (match_index_to_operand(rightop, indexcol, index) && + !bms_is_member(index_relid, rinfo->left_relids) && + !contain_volatile_functions(leftop)) + { + if (IndexCollMatchesExprColl(idxcollation, expr_coll)) + { + Oid comm_op = get_commutator(expr_op); + + if (OidIsValid(comm_op) && + op_in_opfamily(comm_op, opfamily)) + { + RestrictInfo *commrinfo; + + /* Build a commuted OpExpr and RestrictInfo */ + commrinfo = commute_restrictinfo(rinfo, comm_op); + + /* Make an IndexClause showing that as a derived qual */ + iclause = makeNode(IndexClause); + iclause->rinfo = rinfo; + iclause->indexquals = list_make1(commrinfo); + iclause->lossy = false; + iclause->indexcol = indexcol; + iclause->indexcols = NIL; + return iclause; + } + } + + /* + * If we didn't find a member of the index's opfamily, try the support + * function for the operator's underlying function. + */ + set_opfuncid(clause); /* make sure we have opfuncid */ + return get_index_clause_from_support(root, + rinfo, + clause->opfuncid, + 1, /* indexarg on right */ + indexcol, + index); + } + + return NULL; +} + +/* + * match_funcclause_to_indexcol() + * Handles the FuncExpr case for match_clause_to_indexcol(), + * which see for comments. + */ +static IndexClause * +match_funcclause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index) +{ + FuncExpr *clause = (FuncExpr *) rinfo->clause; + int indexarg; + ListCell *lc; + + /* + * We have no built-in intelligence about function clauses, but if there's + * a planner support function, it might be able to do something. But, to + * cut down on wasted planning cycles, only call the support function if + * at least one argument matches the target index column. + * + * Note that we don't insist on the other arguments being pseudoconstants; + * the support function has to check that. This is to allow cases where + * only some of the other arguments need to be included in the indexqual. + */ + indexarg = 0; + foreach(lc, clause->args) + { + Node *op = (Node *) lfirst(lc); + + if (match_index_to_operand(op, indexcol, index)) + { + return get_index_clause_from_support(root, + rinfo, + clause->funcid, + indexarg, + indexcol, + index); + } + + indexarg++; + } + + return NULL; +} + +/* + * get_index_clause_from_support() + * If the function has a planner support function, try to construct + * an IndexClause using indexquals created by the support function. + */ +static IndexClause * +get_index_clause_from_support(PlannerInfo *root, + RestrictInfo *rinfo, + Oid funcid, + int indexarg, + int indexcol, + IndexOptInfo *index) +{ + Oid prosupport = get_func_support(funcid); + SupportRequestIndexCondition req; + List *sresult; + + if (!OidIsValid(prosupport)) + return NULL; + + req.type = T_SupportRequestIndexCondition; + req.root = root; + req.funcid = funcid; + req.node = (Node *) rinfo->clause; + req.indexarg = indexarg; + req.index = index; + req.indexcol = indexcol; + req.opfamily = index->opfamily[indexcol]; + req.indexcollation = index->indexcollations[indexcol]; + + req.lossy = true; /* default assumption */ + + sresult = (List *) + DatumGetPointer(OidFunctionCall1(prosupport, + PointerGetDatum(&req))); + + if (sresult != NIL) + { + IndexClause *iclause = makeNode(IndexClause); + List *indexquals = NIL; + ListCell *lc; + + /* + * The support function API says it should just give back bare + * clauses, so here we must wrap each one in a RestrictInfo. + */ + foreach(lc, sresult) + { + Expr *clause = (Expr *) lfirst(lc); + + indexquals = lappend(indexquals, + make_simple_restrictinfo(root, clause)); + } + + iclause->rinfo = rinfo; + iclause->indexquals = indexquals; + iclause->lossy = req.lossy; + iclause->indexcol = indexcol; + iclause->indexcols = NIL; + + return iclause; + } + + return NULL; +} + +/* + * match_saopclause_to_indexcol() + * Handles the ScalarArrayOpExpr case for match_clause_to_indexcol(), + * which see for comments. + */ +static IndexClause * +match_saopclause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index) +{ + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause; + Node *leftop, + *rightop; + Relids right_relids; + Oid expr_op; + Oid expr_coll; + Index index_relid; + Oid opfamily; + Oid idxcollation; + + /* We only accept ANY clauses, not ALL */ + if (!saop->useOr) + return NULL; + leftop = (Node *) linitial(saop->args); + rightop = (Node *) lsecond(saop->args); + right_relids = pull_varnos(root, rightop); + expr_op = saop->opno; + expr_coll = saop->inputcollid; + + index_relid = index->rel->relid; + opfamily = index->opfamily[indexcol]; + idxcollation = index->indexcollations[indexcol]; + + /* + * We must have indexkey on the left and a pseudo-constant array argument. + */ + if (match_index_to_operand(leftop, indexcol, index) && + !bms_is_member(index_relid, right_relids) && + !contain_volatile_functions(rightop)) + { + if (IndexCollMatchesExprColl(idxcollation, expr_coll) && + op_in_opfamily(expr_op, opfamily)) + { + IndexClause *iclause = makeNode(IndexClause); + + iclause->rinfo = rinfo; + iclause->indexquals = list_make1(rinfo); + iclause->lossy = false; + iclause->indexcol = indexcol; + iclause->indexcols = NIL; + return iclause; + } + + /* + * We do not currently ask support functions about ScalarArrayOpExprs, + * though in principle we could. + */ + } + + return NULL; +} + +/* + * match_rowcompare_to_indexcol() + * Handles the RowCompareExpr case for match_clause_to_indexcol(), + * which see for comments. + * + * In this routine we check whether the first column of the row comparison + * matches the target index column. This is sufficient to guarantee that some + * index condition can be constructed from the RowCompareExpr --- the rest + * is handled by expand_indexqual_rowcompare(). + */ +static IndexClause * +match_rowcompare_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index) +{ + RowCompareExpr *clause = (RowCompareExpr *) rinfo->clause; + Index index_relid; + Oid opfamily; + Oid idxcollation; + Node *leftop, + *rightop; + bool var_on_left; + Oid expr_op; + Oid expr_coll; + + /* Forget it if we're not dealing with a btree index */ + if (index->relam != BTREE_AM_OID) + return NULL; + + index_relid = index->rel->relid; + opfamily = index->opfamily[indexcol]; + idxcollation = index->indexcollations[indexcol]; + + /* + * We could do the matching on the basis of insisting that the opfamily + * shown in the RowCompareExpr be the same as the index column's opfamily, + * but that could fail in the presence of reverse-sort opfamilies: it'd be + * a matter of chance whether RowCompareExpr had picked the forward or + * reverse-sort family. So look only at the operator, and match if it is + * a member of the index's opfamily (after commutation, if the indexkey is + * on the right). We'll worry later about whether any additional + * operators are matchable to the index. + */ + leftop = (Node *) linitial(clause->largs); + rightop = (Node *) linitial(clause->rargs); + expr_op = linitial_oid(clause->opnos); + expr_coll = linitial_oid(clause->inputcollids); + + /* Collations must match, if relevant */ + if (!IndexCollMatchesExprColl(idxcollation, expr_coll)) + return NULL; + + /* + * These syntactic tests are the same as in match_opclause_to_indexcol() + */ + if (match_index_to_operand(leftop, indexcol, index) && + !bms_is_member(index_relid, pull_varnos(root, rightop)) && + !contain_volatile_functions(rightop)) + { + /* OK, indexkey is on left */ + var_on_left = true; + } + else if (match_index_to_operand(rightop, indexcol, index) && + !bms_is_member(index_relid, pull_varnos(root, leftop)) && + !contain_volatile_functions(leftop)) + { + /* indexkey is on right, so commute the operator */ + expr_op = get_commutator(expr_op); + if (expr_op == InvalidOid) + return NULL; + var_on_left = false; + } + else + return NULL; + + /* We're good if the operator is the right type of opfamily member */ + switch (get_op_opfamily_strategy(expr_op, opfamily)) + { + case BTLessStrategyNumber: + case BTLessEqualStrategyNumber: + case BTGreaterEqualStrategyNumber: + case BTGreaterStrategyNumber: + return expand_indexqual_rowcompare(root, + rinfo, + indexcol, + index, + expr_op, + var_on_left); + } + + return NULL; +} + +/* + * expand_indexqual_rowcompare --- expand a single indexqual condition + * that is a RowCompareExpr + * + * It's already known that the first column of the row comparison matches + * the specified column of the index. We can use additional columns of the + * row comparison as index qualifications, so long as they match the index + * in the "same direction", ie, the indexkeys are all on the same side of the + * clause and the operators are all the same-type members of the opfamilies. + * + * If all the columns of the RowCompareExpr match in this way, we just use it + * as-is, except for possibly commuting it to put the indexkeys on the left. + * + * Otherwise, we build a shortened RowCompareExpr (if more than one + * column matches) or a simple OpExpr (if the first-column match is all + * there is). In these cases the modified clause is always "<=" or ">=" + * even when the original was "<" or ">" --- this is necessary to match all + * the rows that could match the original. (We are building a lossy version + * of the row comparison when we do this, so we set lossy = true.) + * + * Note: this is really just the last half of match_rowcompare_to_indexcol, + * but we split it out for comprehensibility. + */ +static IndexClause * +expand_indexqual_rowcompare(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index, + Oid expr_op, + bool var_on_left) +{ + IndexClause *iclause = makeNode(IndexClause); + RowCompareExpr *clause = (RowCompareExpr *) rinfo->clause; + int op_strategy; + Oid op_lefttype; + Oid op_righttype; + int matching_cols; + List *expr_ops; + List *opfamilies; + List *lefttypes; + List *righttypes; + List *new_ops; + List *var_args; + List *non_var_args; + + iclause->rinfo = rinfo; + iclause->indexcol = indexcol; + + if (var_on_left) + { + var_args = clause->largs; + non_var_args = clause->rargs; + } + else + { + var_args = clause->rargs; + non_var_args = clause->largs; + } + + get_op_opfamily_properties(expr_op, index->opfamily[indexcol], false, + &op_strategy, + &op_lefttype, + &op_righttype); + + /* Initialize returned list of which index columns are used */ + iclause->indexcols = list_make1_int(indexcol); + + /* Build lists of ops, opfamilies and operator datatypes in case needed */ + expr_ops = list_make1_oid(expr_op); + opfamilies = list_make1_oid(index->opfamily[indexcol]); + lefttypes = list_make1_oid(op_lefttype); + righttypes = list_make1_oid(op_righttype); + + /* + * See how many of the remaining columns match some index column in the + * same way. As in match_clause_to_indexcol(), the "other" side of any + * potential index condition is OK as long as it doesn't use Vars from the + * indexed relation. + */ + matching_cols = 1; + + while (matching_cols < list_length(var_args)) + { + Node *varop = (Node *) list_nth(var_args, matching_cols); + Node *constop = (Node *) list_nth(non_var_args, matching_cols); + int i; + + expr_op = list_nth_oid(clause->opnos, matching_cols); + if (!var_on_left) + { + /* indexkey is on right, so commute the operator */ + expr_op = get_commutator(expr_op); + if (expr_op == InvalidOid) + break; /* operator is not usable */ + } + if (bms_is_member(index->rel->relid, pull_varnos(root, constop))) + break; /* no good, Var on wrong side */ + if (contain_volatile_functions(constop)) + break; /* no good, volatile comparison value */ + + /* + * The Var side can match any key column of the index. + */ + for (i = 0; i < index->nkeycolumns; i++) + { + if (match_index_to_operand(varop, i, index) && + get_op_opfamily_strategy(expr_op, + index->opfamily[i]) == op_strategy && + IndexCollMatchesExprColl(index->indexcollations[i], + list_nth_oid(clause->inputcollids, + matching_cols))) + break; + } + if (i >= index->nkeycolumns) + break; /* no match found */ + + /* Add column number to returned list */ + iclause->indexcols = lappend_int(iclause->indexcols, i); + + /* Add operator info to lists */ + get_op_opfamily_properties(expr_op, index->opfamily[i], false, + &op_strategy, + &op_lefttype, + &op_righttype); + expr_ops = lappend_oid(expr_ops, expr_op); + opfamilies = lappend_oid(opfamilies, index->opfamily[i]); + lefttypes = lappend_oid(lefttypes, op_lefttype); + righttypes = lappend_oid(righttypes, op_righttype); + + /* This column matches, keep scanning */ + matching_cols++; + } + + /* Result is non-lossy if all columns are usable as index quals */ + iclause->lossy = (matching_cols != list_length(clause->opnos)); + + /* + * We can use rinfo->clause as-is if we have var on left and it's all + * usable as index quals. + */ + if (var_on_left && !iclause->lossy) + iclause->indexquals = list_make1(rinfo); + else + { + /* + * We have to generate a modified rowcompare (possibly just one + * OpExpr). The painful part of this is changing < to <= or > to >=, + * so deal with that first. + */ + if (!iclause->lossy) + { + /* very easy, just use the commuted operators */ + new_ops = expr_ops; + } + else if (op_strategy == BTLessEqualStrategyNumber || + op_strategy == BTGreaterEqualStrategyNumber) + { + /* easy, just use the same (possibly commuted) operators */ + new_ops = list_truncate(expr_ops, matching_cols); + } + else + { + ListCell *opfamilies_cell; + ListCell *lefttypes_cell; + ListCell *righttypes_cell; + + if (op_strategy == BTLessStrategyNumber) + op_strategy = BTLessEqualStrategyNumber; + else if (op_strategy == BTGreaterStrategyNumber) + op_strategy = BTGreaterEqualStrategyNumber; + else + elog(ERROR, "unexpected strategy number %d", op_strategy); + new_ops = NIL; + forthree(opfamilies_cell, opfamilies, + lefttypes_cell, lefttypes, + righttypes_cell, righttypes) + { + Oid opfam = lfirst_oid(opfamilies_cell); + Oid lefttype = lfirst_oid(lefttypes_cell); + Oid righttype = lfirst_oid(righttypes_cell); + + expr_op = get_opfamily_member(opfam, lefttype, righttype, + op_strategy); + if (!OidIsValid(expr_op)) /* should not happen */ + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + op_strategy, lefttype, righttype, opfam); + new_ops = lappend_oid(new_ops, expr_op); + } + } + + /* If we have more than one matching col, create a subset rowcompare */ + if (matching_cols > 1) + { + RowCompareExpr *rc = makeNode(RowCompareExpr); + + rc->rctype = (RowCompareType) op_strategy; + rc->opnos = new_ops; + rc->opfamilies = list_truncate(list_copy(clause->opfamilies), + matching_cols); + rc->inputcollids = list_truncate(list_copy(clause->inputcollids), + matching_cols); + rc->largs = list_truncate(copyObject(var_args), + matching_cols); + rc->rargs = list_truncate(copyObject(non_var_args), + matching_cols); + iclause->indexquals = list_make1(make_simple_restrictinfo(root, + (Expr *) rc)); + } + else + { + Expr *op; + + /* We don't report an index column list in this case */ + iclause->indexcols = NIL; + + op = make_opclause(linitial_oid(new_ops), BOOLOID, false, + copyObject(linitial(var_args)), + copyObject(linitial(non_var_args)), + InvalidOid, + linitial_oid(clause->inputcollids)); + iclause->indexquals = list_make1(make_simple_restrictinfo(root, op)); + } + } + + return iclause; +} + + +/**************************************************************************** + * ---- ROUTINES TO CHECK ORDERING OPERATORS ---- + ****************************************************************************/ + +/* + * match_pathkeys_to_index + * Test whether an index can produce output ordered according to the + * given pathkeys using "ordering operators". + * + * If it can, return a list of suitable ORDER BY expressions, each of the form + * "indexedcol operator pseudoconstant", along with an integer list of the + * index column numbers (zero based) that each clause would be used with. + * NIL lists are returned if the ordering is not achievable this way. + * + * On success, the result list is ordered by pathkeys, and in fact is + * one-to-one with the requested pathkeys. + */ +static void +match_pathkeys_to_index(IndexOptInfo *index, List *pathkeys, + List **orderby_clauses_p, + List **clause_columns_p) +{ + List *orderby_clauses = NIL; + List *clause_columns = NIL; + ListCell *lc1; + + *orderby_clauses_p = NIL; /* set default results */ + *clause_columns_p = NIL; + + /* Only indexes with the amcanorderbyop property are interesting here */ + if (!index->amcanorderbyop) + return; + + foreach(lc1, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc1); + bool found = false; + ListCell *lc2; + + /* + * Note: for any failure to match, we just return NIL immediately. + * There is no value in matching just some of the pathkeys. + */ + + /* Pathkey must request default sort order for the target opfamily */ + if (pathkey->pk_strategy != BTLessStrategyNumber || + pathkey->pk_nulls_first) + return; + + /* If eclass is volatile, no hope of using an indexscan */ + if (pathkey->pk_eclass->ec_has_volatile) + return; + + /* + * Try to match eclass member expression(s) to index. Note that child + * EC members are considered, but only when they belong to the target + * relation. (Unlike regular members, the same expression could be a + * child member of more than one EC. Therefore, the same index could + * be considered to match more than one pathkey list, which is OK + * here. See also get_eclass_for_sort_expr.) + */ + foreach(lc2, pathkey->pk_eclass->ec_members) + { + EquivalenceMember *member = (EquivalenceMember *) lfirst(lc2); + int indexcol; + + /* No possibility of match if it references other relations */ + if (!bms_equal(member->em_relids, index->rel->relids)) + continue; + + /* + * We allow any column of the index to match each pathkey; they + * don't have to match left-to-right as you might expect. This is + * correct for GiST, and it doesn't matter for SP-GiST because + * that doesn't handle multiple columns anyway, and no other + * existing AMs support amcanorderbyop. We might need different + * logic in future for other implementations. + */ + for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++) + { + Expr *expr; + + expr = match_clause_to_ordering_op(index, + indexcol, + member->em_expr, + pathkey->pk_opfamily); + if (expr) + { + orderby_clauses = lappend(orderby_clauses, expr); + clause_columns = lappend_int(clause_columns, indexcol); + found = true; + break; + } + } + + if (found) /* don't want to look at remaining members */ + break; + } + + if (!found) /* fail if no match for this pathkey */ + return; + } + + *orderby_clauses_p = orderby_clauses; /* success! */ + *clause_columns_p = clause_columns; +} + +/* + * match_clause_to_ordering_op + * Determines whether an ordering operator expression matches an + * index column. + * + * This is similar to, but simpler than, match_clause_to_indexcol. + * We only care about simple OpExpr cases. The input is a bare + * expression that is being ordered by, which must be of the form + * (indexkey op const) or (const op indexkey) where op is an ordering + * operator for the column's opfamily. + * + * 'index' is the index of interest. + * 'indexcol' is a column number of 'index' (counting from 0). + * 'clause' is the ordering expression to be tested. + * 'pk_opfamily' is the btree opfamily describing the required sort order. + * + * Note that we currently do not consider the collation of the ordering + * operator's result. In practical cases the result type will be numeric + * and thus have no collation, and it's not very clear what to match to + * if it did have a collation. The index's collation should match the + * ordering operator's input collation, not its result. + * + * If successful, return 'clause' as-is if the indexkey is on the left, + * otherwise a commuted copy of 'clause'. If no match, return NULL. + */ +static Expr * +match_clause_to_ordering_op(IndexOptInfo *index, + int indexcol, + Expr *clause, + Oid pk_opfamily) +{ + Oid opfamily; + Oid idxcollation; + Node *leftop, + *rightop; + Oid expr_op; + Oid expr_coll; + Oid sortfamily; + bool commuted; + + Assert(indexcol < index->nkeycolumns); + + opfamily = index->opfamily[indexcol]; + idxcollation = index->indexcollations[indexcol]; + + /* + * Clause must be a binary opclause. + */ + if (!is_opclause(clause)) + return NULL; + leftop = get_leftop(clause); + rightop = get_rightop(clause); + if (!leftop || !rightop) + return NULL; + expr_op = ((OpExpr *) clause)->opno; + expr_coll = ((OpExpr *) clause)->inputcollid; + + /* + * We can forget the whole thing right away if wrong collation. + */ + if (!IndexCollMatchesExprColl(idxcollation, expr_coll)) + return NULL; + + /* + * Check for clauses of the form: (indexkey operator constant) or + * (constant operator indexkey). + */ + if (match_index_to_operand(leftop, indexcol, index) && + !contain_var_clause(rightop) && + !contain_volatile_functions(rightop)) + { + commuted = false; + } + else if (match_index_to_operand(rightop, indexcol, index) && + !contain_var_clause(leftop) && + !contain_volatile_functions(leftop)) + { + /* Might match, but we need a commuted operator */ + expr_op = get_commutator(expr_op); + if (expr_op == InvalidOid) + return NULL; + commuted = true; + } + else + return NULL; + + /* + * Is the (commuted) operator an ordering operator for the opfamily? And + * if so, does it yield the right sorting semantics? + */ + sortfamily = get_op_opfamily_sortfamily(expr_op, opfamily); + if (sortfamily != pk_opfamily) + return NULL; + + /* We have a match. Return clause or a commuted version thereof. */ + if (commuted) + { + OpExpr *newclause = makeNode(OpExpr); + + /* flat-copy all the fields of clause */ + memcpy(newclause, clause, sizeof(OpExpr)); + + /* commute it */ + newclause->opno = expr_op; + newclause->opfuncid = InvalidOid; + newclause->args = list_make2(rightop, leftop); + + clause = (Expr *) newclause; + } + + return clause; +} + + +/**************************************************************************** + * ---- ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS ---- + ****************************************************************************/ + +/* + * check_index_predicates + * Set the predicate-derived IndexOptInfo fields for each index + * of the specified relation. + * + * predOK is set true if the index is partial and its predicate is satisfied + * for this query, ie the query's WHERE clauses imply the predicate. + * + * indrestrictinfo is set to the relation's baserestrictinfo list less any + * conditions that are implied by the index's predicate. (Obviously, for a + * non-partial index, this is the same as baserestrictinfo.) Such conditions + * can be dropped from the plan when using the index, in certain cases. + * + * At one time it was possible for this to get re-run after adding more + * restrictions to the rel, thus possibly letting us prove more indexes OK. + * That doesn't happen any more (at least not in the core code's usage), + * but this code still supports it in case extensions want to mess with the + * baserestrictinfo list. We assume that adding more restrictions can't make + * an index not predOK. We must recompute indrestrictinfo each time, though, + * to make sure any newly-added restrictions get into it if needed. + */ +void +check_index_predicates(PlannerInfo *root, RelOptInfo *rel) +{ + List *clauselist; + bool have_partial; + bool is_target_rel; + Relids otherrels; + ListCell *lc; + + /* Indexes are available only on base or "other" member relations. */ + Assert(IS_SIMPLE_REL(rel)); + + /* + * Initialize the indrestrictinfo lists to be identical to + * baserestrictinfo, and check whether there are any partial indexes. If + * not, this is all we need to do. + */ + have_partial = false; + foreach(lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + + index->indrestrictinfo = rel->baserestrictinfo; + if (index->indpred) + have_partial = true; + } + if (!have_partial) + return; + + /* + * Construct a list of clauses that we can assume true for the purpose of + * proving the index(es) usable. Restriction clauses for the rel are + * always usable, and so are any join clauses that are "movable to" this + * rel. Also, we can consider any EC-derivable join clauses (which must + * be "movable to" this rel, by definition). + */ + clauselist = list_copy(rel->baserestrictinfo); + + /* Scan the rel's join clauses */ + foreach(lc, rel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* Check if clause can be moved to this rel */ + if (!join_clause_is_movable_to(rinfo, rel)) + continue; + + clauselist = lappend(clauselist, rinfo); + } + + /* + * Add on any equivalence-derivable join clauses. Computing the correct + * relid sets for generate_join_implied_equalities is slightly tricky + * because the rel could be a child rel rather than a true baserel, and in + * that case we must remove its parents' relid(s) from all_baserels. + */ + if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL) + otherrels = bms_difference(root->all_baserels, + find_childrel_parents(root, rel)); + else + otherrels = bms_difference(root->all_baserels, rel->relids); + + if (!bms_is_empty(otherrels)) + clauselist = + list_concat(clauselist, + generate_join_implied_equalities(root, + bms_union(rel->relids, + otherrels), + otherrels, + rel)); + + /* + * Normally we remove quals that are implied by a partial index's + * predicate from indrestrictinfo, indicating that they need not be + * checked explicitly by an indexscan plan using this index. However, if + * the rel is a target relation of UPDATE/DELETE/SELECT FOR UPDATE, we + * cannot remove such quals from the plan, because they need to be in the + * plan so that they will be properly rechecked by EvalPlanQual testing. + * Some day we might want to remove such quals from the main plan anyway + * and pass them through to EvalPlanQual via a side channel; but for now, + * we just don't remove implied quals at all for target relations. + */ + is_target_rel = (bms_is_member(rel->relid, root->all_result_relids) || + get_plan_rowmark(root->rowMarks, rel->relid) != NULL); + + /* + * Now try to prove each index predicate true, and compute the + * indrestrictinfo lists for partial indexes. Note that we compute the + * indrestrictinfo list even for non-predOK indexes; this might seem + * wasteful, but we may be able to use such indexes in OR clauses, cf + * generate_bitmap_or_paths(). + */ + foreach(lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + ListCell *lcr; + + if (index->indpred == NIL) + continue; /* ignore non-partial indexes here */ + + if (!index->predOK) /* don't repeat work if already proven OK */ + index->predOK = predicate_implied_by(index->indpred, clauselist, + false); + + /* If rel is an update target, leave indrestrictinfo as set above */ + if (is_target_rel) + continue; + + /* Else compute indrestrictinfo as the non-implied quals */ + index->indrestrictinfo = NIL; + foreach(lcr, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lcr); + + /* predicate_implied_by() assumes first arg is immutable */ + if (contain_mutable_functions((Node *) rinfo->clause) || + !predicate_implied_by(list_make1(rinfo->clause), + index->indpred, false)) + index->indrestrictinfo = lappend(index->indrestrictinfo, rinfo); + } + } +} + +/**************************************************************************** + * ---- ROUTINES TO CHECK EXTERNALLY-VISIBLE CONDITIONS ---- + ****************************************************************************/ + +/* + * ec_member_matches_indexcol + * Test whether an EquivalenceClass member matches an index column. + * + * This is a callback for use by generate_implied_equalities_for_column. + */ +static bool +ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, + EquivalenceClass *ec, EquivalenceMember *em, + void *arg) +{ + IndexOptInfo *index = ((ec_member_matches_arg *) arg)->index; + int indexcol = ((ec_member_matches_arg *) arg)->indexcol; + Oid curFamily; + Oid curCollation; + + Assert(indexcol < index->nkeycolumns); + + curFamily = index->opfamily[indexcol]; + curCollation = index->indexcollations[indexcol]; + + /* + * If it's a btree index, we can reject it if its opfamily isn't + * compatible with the EC, since no clause generated from the EC could be + * used with the index. For non-btree indexes, we can't easily tell + * whether clauses generated from the EC could be used with the index, so + * don't check the opfamily. This might mean we return "true" for a + * useless EC, so we have to recheck the results of + * generate_implied_equalities_for_column; see + * match_eclass_clauses_to_index. + */ + if (index->relam == BTREE_AM_OID && + !list_member_oid(ec->ec_opfamilies, curFamily)) + return false; + + /* We insist on collation match for all index types, though */ + if (!IndexCollMatchesExprColl(curCollation, ec->ec_collation)) + return false; + + return match_index_to_operand((Node *) em->em_expr, indexcol, index); +} + +/* + * relation_has_unique_index_for + * Determine whether the relation provably has at most one row satisfying + * a set of equality conditions, because the conditions constrain all + * columns of some unique index. + * + * The conditions can be represented in either or both of two ways: + * 1. A list of RestrictInfo nodes, where the caller has already determined + * that each condition is a mergejoinable equality with an expression in + * this relation on one side, and an expression not involving this relation + * on the other. The transient outer_is_left flag is used to identify which + * side we should look at: left side if outer_is_left is false, right side + * if it is true. + * 2. A list of expressions in this relation, and a corresponding list of + * equality operators. The caller must have already checked that the operators + * represent equality. (Note: the operators could be cross-type; the + * expressions should correspond to their RHS inputs.) + * + * The caller need only supply equality conditions arising from joins; + * this routine automatically adds in any usable baserestrictinfo clauses. + * (Note that the passed-in restrictlist will be destructively modified!) + */ +bool +relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, + List *restrictlist, + List *exprlist, List *oprlist) +{ + ListCell *ic; + + Assert(list_length(exprlist) == list_length(oprlist)); + + /* Short-circuit if no indexes... */ + if (rel->indexlist == NIL) + return false; + + /* + * Examine the rel's restriction clauses for usable var = const clauses + * that we can add to the restrictlist. + */ + foreach(ic, rel->baserestrictinfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(ic); + + /* + * Note: can_join won't be set for a restriction clause, but + * mergeopfamilies will be if it has a mergejoinable operator and + * doesn't contain volatile functions. + */ + if (restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * The clause certainly doesn't refer to anything but the given rel. + * If either side is pseudoconstant then we can use it. + */ + if (bms_is_empty(restrictinfo->left_relids)) + { + /* righthand side is inner */ + restrictinfo->outer_is_left = true; + } + else if (bms_is_empty(restrictinfo->right_relids)) + { + /* lefthand side is inner */ + restrictinfo->outer_is_left = false; + } + else + continue; + + /* OK, add to list */ + restrictlist = lappend(restrictlist, restrictinfo); + } + + /* Short-circuit the easy case */ + if (restrictlist == NIL && exprlist == NIL) + return false; + + /* Examine each index of the relation ... */ + foreach(ic, rel->indexlist) + { + IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic); + int c; + + /* + * If the index is not unique, or not immediately enforced, or if it's + * a partial index that doesn't match the query, it's useless here. + */ + if (!ind->unique || !ind->immediate || + (ind->indpred != NIL && !ind->predOK)) + continue; + + /* + * Try to find each index column in the lists of conditions. This is + * O(N^2) or worse, but we expect all the lists to be short. + */ + for (c = 0; c < ind->nkeycolumns; c++) + { + bool matched = false; + ListCell *lc; + ListCell *lc2; + + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + Node *rexpr; + + /* + * The condition's equality operator must be a member of the + * index opfamily, else it is not asserting the right kind of + * equality behavior for this index. We check this first + * since it's probably cheaper than match_index_to_operand(). + */ + if (!list_member_oid(rinfo->mergeopfamilies, ind->opfamily[c])) + continue; + + /* + * XXX at some point we may need to check collations here too. + * For the moment we assume all collations reduce to the same + * notion of equality. + */ + + /* OK, see if the condition operand matches the index key */ + if (rinfo->outer_is_left) + rexpr = get_rightop(rinfo->clause); + else + rexpr = get_leftop(rinfo->clause); + + if (match_index_to_operand(rexpr, c, ind)) + { + matched = true; /* column is unique */ + break; + } + } + + if (matched) + continue; + + forboth(lc, exprlist, lc2, oprlist) + { + Node *expr = (Node *) lfirst(lc); + Oid opr = lfirst_oid(lc2); + + /* See if the expression matches the index key */ + if (!match_index_to_operand(expr, c, ind)) + continue; + + /* + * The equality operator must be a member of the index + * opfamily, else it is not asserting the right kind of + * equality behavior for this index. We assume the caller + * determined it is an equality operator, so we don't need to + * check any more tightly than this. + */ + if (!op_in_opfamily(opr, ind->opfamily[c])) + continue; + + /* + * XXX at some point we may need to check collations here too. + * For the moment we assume all collations reduce to the same + * notion of equality. + */ + + matched = true; /* column is unique */ + break; + } + + if (!matched) + break; /* no match; this index doesn't help us */ + } + + /* Matched all key columns of this index? */ + if (c == ind->nkeycolumns) + return true; + } + + return false; +} + +/* + * indexcol_is_bool_constant_for_query + * + * If an index column is constrained to have a constant value by the query's + * WHERE conditions, then it's irrelevant for sort-order considerations. + * Usually that means we have a restriction clause WHERE indexcol = constant, + * which gets turned into an EquivalenceClass containing a constant, which + * is recognized as redundant by build_index_pathkeys(). But if the index + * column is a boolean variable (or expression), then we are not going to + * see WHERE indexcol = constant, because expression preprocessing will have + * simplified that to "WHERE indexcol" or "WHERE NOT indexcol". So we are not + * going to have a matching EquivalenceClass (unless the query also contains + * "ORDER BY indexcol"). To allow such cases to work the same as they would + * for non-boolean values, this function is provided to detect whether the + * specified index column matches a boolean restriction clause. + */ +bool +indexcol_is_bool_constant_for_query(PlannerInfo *root, + IndexOptInfo *index, + int indexcol) +{ + ListCell *lc; + + /* If the index isn't boolean, we can't possibly get a match */ + if (!IsBooleanOpfamily(index->opfamily[indexcol])) + return false; + + /* Check each restriction clause for the index's rel */ + foreach(lc, index->rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* + * As in match_clause_to_indexcol, never match pseudoconstants to + * indexes. (It might be semantically okay to do so here, but the + * odds of getting a match are negligible, so don't waste the cycles.) + */ + if (rinfo->pseudoconstant) + continue; + + /* See if we can match the clause's expression to the index column */ + if (match_boolean_index_clause(root, rinfo, indexcol, index)) + return true; + } + + return false; +} + + +/**************************************************************************** + * ---- ROUTINES TO CHECK OPERANDS ---- + ****************************************************************************/ + +/* + * match_index_to_operand() + * Generalized test for a match between an index's key + * and the operand on one side of a restriction or join clause. + * + * operand: the nodetree to be compared to the index + * indexcol: the column number of the index (counting from 0) + * index: the index of interest + * + * Note that we aren't interested in collations here; the caller must check + * for a collation match, if it's dealing with an operator where that matters. + * + * This is exported for use in selfuncs.c. + */ +bool +match_index_to_operand(Node *operand, + int indexcol, + IndexOptInfo *index) +{ + int indkey; + + /* + * Ignore any RelabelType node above the operand. This is needed to be + * able to apply indexscanning in binary-compatible-operator cases. Note: + * we can assume there is at most one RelabelType node; + * eval_const_expressions() will have simplified if more than one. + */ + if (operand && IsA(operand, RelabelType)) + operand = (Node *) ((RelabelType *) operand)->arg; + + indkey = index->indexkeys[indexcol]; + if (indkey != 0) + { + /* + * Simple index column; operand must be a matching Var. + */ + if (operand && IsA(operand, Var) && + index->rel->relid == ((Var *) operand)->varno && + indkey == ((Var *) operand)->varattno) + return true; + } + else + { + /* + * Index expression; find the correct expression. (This search could + * be avoided, at the cost of complicating all the callers of this + * routine; doesn't seem worth it.) + */ + ListCell *indexpr_item; + int i; + Node *indexkey; + + indexpr_item = list_head(index->indexprs); + for (i = 0; i < indexcol; i++) + { + if (index->indexkeys[i] == 0) + { + if (indexpr_item == NULL) + elog(ERROR, "wrong number of index expressions"); + indexpr_item = lnext(index->indexprs, indexpr_item); + } + } + if (indexpr_item == NULL) + elog(ERROR, "wrong number of index expressions"); + indexkey = (Node *) lfirst(indexpr_item); + + /* + * Does it match the operand? Again, strip any relabeling. + */ + if (indexkey && IsA(indexkey, RelabelType)) + indexkey = (Node *) ((RelabelType *) indexkey)->arg; + + if (equal(indexkey, operand)) + return true; + } + + return false; +} + +/* + * is_pseudo_constant_for_index() + * Test whether the given expression can be used as an indexscan + * comparison value. + * + * An indexscan comparison value must not contain any volatile functions, + * and it can't contain any Vars of the index's own table. Vars of + * other tables are okay, though; in that case we'd be producing an + * indexqual usable in a parameterized indexscan. This is, therefore, + * a weaker condition than is_pseudo_constant_clause(). + * + * This function is exported for use by planner support functions, + * which will have available the IndexOptInfo, but not any RestrictInfo + * infrastructure. It is making the same test made by functions above + * such as match_opclause_to_indexcol(), but those rely where possible + * on RestrictInfo information about variable membership. + * + * expr: the nodetree to be checked + * index: the index of interest + */ +bool +is_pseudo_constant_for_index(PlannerInfo *root, Node *expr, IndexOptInfo *index) +{ + /* pull_varnos is cheaper than volatility check, so do that first */ + if (bms_is_member(index->rel->relid, pull_varnos(root, expr))) + return false; /* no good, contains Var of table */ + if (contain_volatile_functions(expr)) + return false; /* no good, volatile comparison value */ + return true; +} diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c new file mode 100644 index 0000000..d23cd0e --- /dev/null +++ b/src/backend/optimizer/path/joinpath.c @@ -0,0 +1,2304 @@ +/*------------------------------------------------------------------------- + * + * joinpath.c + * Routines to find all possible paths for processing a set of joins + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/path/joinpath.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "executor/executor.h" +#include "foreign/fdwapi.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/planmain.h" +#include "utils/typcache.h" + +/* Hook for plugins to get control in add_paths_to_joinrel() */ +set_join_pathlist_hook_type set_join_pathlist_hook = NULL; + +/* + * Paths parameterized by the parent can be considered to be parameterized by + * any of its child. + */ +#define PATH_PARAM_BY_PARENT(path, rel) \ + ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \ + (rel)->top_parent_relids)) +#define PATH_PARAM_BY_REL_SELF(path, rel) \ + ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) + +#define PATH_PARAM_BY_REL(path, rel) \ + (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel)) + +static void try_partial_mergejoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *pathkeys, + List *mergeclauses, + List *outersortkeys, + List *innersortkeys, + JoinType jointype, + JoinPathExtraData *extra); +static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + JoinType jointype, JoinPathExtraData *extra); +static inline bool clause_sides_match_join(RestrictInfo *rinfo, + RelOptInfo *outerrel, + RelOptInfo *innerrel); +static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + JoinType jointype, JoinPathExtraData *extra); +static void consider_parallel_nestloop(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra); +static void consider_parallel_mergejoin(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra, + Path *inner_cheapest_total); +static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + JoinType jointype, JoinPathExtraData *extra); +static List *select_mergejoin_clauses(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype, + bool *mergejoin_allowed); +static void generate_mergejoin_paths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + Path *outerpath, + JoinType jointype, + JoinPathExtraData *extra, + bool useallclauses, + Path *inner_cheapest_total, + List *merge_pathkeys, + bool is_partial); + + +/* + * add_paths_to_joinrel + * Given a join relation and two component rels from which it can be made, + * consider all possible paths that use the two component rels as outer + * and inner rel respectively. Add these paths to the join rel's pathlist + * if they survive comparison with other paths (and remove any existing + * paths that are dominated by these paths). + * + * Modifies the pathlist field of the joinrel node to contain the best + * paths found so far. + * + * jointype is not necessarily the same as sjinfo->jointype; it might be + * "flipped around" if we are considering joining the rels in the opposite + * direction from what's indicated in sjinfo. + * + * Also, this routine and others in this module accept the special JoinTypes + * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should + * unique-ify the outer or inner relation and then apply a regular inner + * join. These values are not allowed to propagate outside this module, + * however. Path cost estimation code may need to recognize that it's + * dealing with such a case --- the combination of nominal jointype INNER + * with sjinfo->jointype == JOIN_SEMI indicates that. + */ +void +add_paths_to_joinrel(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + SpecialJoinInfo *sjinfo, + List *restrictlist) +{ + JoinPathExtraData extra; + bool mergejoin_allowed = true; + ListCell *lc; + Relids joinrelids; + + /* + * PlannerInfo doesn't contain the SpecialJoinInfos created for joins + * between child relations, even if there is a SpecialJoinInfo node for + * the join between the topmost parents. So, while calculating Relids set + * representing the restriction, consider relids of topmost parent of + * partitions. + */ + if (joinrel->reloptkind == RELOPT_OTHER_JOINREL) + joinrelids = joinrel->top_parent_relids; + else + joinrelids = joinrel->relids; + + extra.restrictlist = restrictlist; + extra.mergeclause_list = NIL; + extra.sjinfo = sjinfo; + extra.param_source_rels = NULL; + + /* + * See if the inner relation is provably unique for this outer rel. + * + * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't + * matter since the executor can make the equivalent optimization anyway; + * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we + * must be considering a semijoin whose inner side is not provably unique + * (else reduce_unique_semijoins would've simplified it), so there's no + * point in calling innerrel_is_unique. However, if the LHS covers all of + * the semijoin's min_lefthand, then it's appropriate to set inner_unique + * because the path produced by create_unique_path will be unique relative + * to the LHS. (If we have an LHS that's only part of the min_lefthand, + * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid + * letting that value escape this module. + */ + switch (jointype) + { + case JOIN_SEMI: + case JOIN_ANTI: + + /* + * XXX it may be worth proving this to allow a Memoize to be + * considered for Nested Loop Semi/Anti Joins. + */ + extra.inner_unique = false; /* well, unproven */ + break; + case JOIN_UNIQUE_INNER: + extra.inner_unique = bms_is_subset(sjinfo->min_lefthand, + outerrel->relids); + break; + case JOIN_UNIQUE_OUTER: + extra.inner_unique = innerrel_is_unique(root, + joinrel->relids, + outerrel->relids, + innerrel, + JOIN_INNER, + restrictlist, + false); + break; + default: + extra.inner_unique = innerrel_is_unique(root, + joinrel->relids, + outerrel->relids, + innerrel, + jointype, + restrictlist, + false); + break; + } + + /* + * Find potential mergejoin clauses. We can skip this if we are not + * interested in doing a mergejoin. However, mergejoin may be our only + * way of implementing a full outer join, so override enable_mergejoin if + * it's a full join. + */ + if (enable_mergejoin || jointype == JOIN_FULL) + extra.mergeclause_list = select_mergejoin_clauses(root, + joinrel, + outerrel, + innerrel, + restrictlist, + jointype, + &mergejoin_allowed); + + /* + * If it's SEMI, ANTI, or inner_unique join, compute correction factors + * for cost estimation. These will be the same for all paths. + */ + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique) + compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel, + jointype, sjinfo, restrictlist, + &extra.semifactors); + + /* + * Decide whether it's sensible to generate parameterized paths for this + * joinrel, and if so, which relations such paths should require. There + * is usually no need to create a parameterized result path unless there + * is a join order restriction that prevents joining one of our input rels + * directly to the parameter source rel instead of joining to the other + * input rel. (But see allow_star_schema_join().) This restriction + * reduces the number of parameterized paths we have to deal with at + * higher join levels, without compromising the quality of the resulting + * plan. We express the restriction as a Relids set that must overlap the + * parameterization of any proposed join path. + */ + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc); + + /* + * SJ is relevant to this join if we have some part of its RHS + * (possibly not all of it), and haven't yet joined to its LHS. (This + * test is pretty simplistic, but should be sufficient considering the + * join has already been proven legal.) If the SJ is relevant, it + * presents constraints for joining to anything not in its RHS. + */ + if (bms_overlap(joinrelids, sjinfo2->min_righthand) && + !bms_overlap(joinrelids, sjinfo2->min_lefthand)) + extra.param_source_rels = bms_join(extra.param_source_rels, + bms_difference(root->all_baserels, + sjinfo2->min_righthand)); + + /* full joins constrain both sides symmetrically */ + if (sjinfo2->jointype == JOIN_FULL && + bms_overlap(joinrelids, sjinfo2->min_lefthand) && + !bms_overlap(joinrelids, sjinfo2->min_righthand)) + extra.param_source_rels = bms_join(extra.param_source_rels, + bms_difference(root->all_baserels, + sjinfo2->min_lefthand)); + } + + /* + * However, when a LATERAL subquery is involved, there will simply not be + * any paths for the joinrel that aren't parameterized by whatever the + * subquery is parameterized by, unless its parameterization is resolved + * within the joinrel. So we might as well allow additional dependencies + * on whatever residual lateral dependencies the joinrel will have. + */ + extra.param_source_rels = bms_add_members(extra.param_source_rels, + joinrel->lateral_relids); + + /* + * 1. Consider mergejoin paths where both relations must be explicitly + * sorted. Skip this if we can't mergejoin. + */ + if (mergejoin_allowed) + sort_inner_and_outer(root, joinrel, outerrel, innerrel, + jointype, &extra); + + /* + * 2. Consider paths where the outer relation need not be explicitly + * sorted. This includes both nestloops and mergejoins where the outer + * path is already ordered. Again, skip this if we can't mergejoin. + * (That's okay because we know that nestloop can't handle right/full + * joins at all, so it wouldn't work in the prohibited cases either.) + */ + if (mergejoin_allowed) + match_unsorted_outer(root, joinrel, outerrel, innerrel, + jointype, &extra); + +#ifdef NOT_USED + + /* + * 3. Consider paths where the inner relation need not be explicitly + * sorted. This includes mergejoins only (nestloops were already built in + * match_unsorted_outer). + * + * Diked out as redundant 2/13/2000 -- tgl. There isn't any really + * significant difference between the inner and outer side of a mergejoin, + * so match_unsorted_inner creates no paths that aren't equivalent to + * those made by match_unsorted_outer when add_paths_to_joinrel() is + * invoked with the two rels given in the other order. + */ + if (mergejoin_allowed) + match_unsorted_inner(root, joinrel, outerrel, innerrel, + jointype, &extra); +#endif + + /* + * 4. Consider paths where both outer and inner relations must be hashed + * before being joined. As above, disregard enable_hashjoin for full + * joins, because there may be no other alternative. + */ + if (enable_hashjoin || jointype == JOIN_FULL) + hash_inner_and_outer(root, joinrel, outerrel, innerrel, + jointype, &extra); + + /* + * 5. If inner and outer relations are foreign tables (or joins) belonging + * to the same server and assigned to the same user to check access + * permissions as, give the FDW a chance to push down joins. + */ + if (joinrel->fdwroutine && + joinrel->fdwroutine->GetForeignJoinPaths) + joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel, + outerrel, innerrel, + jointype, &extra); + + /* + * 6. Finally, give extensions a chance to manipulate the path list. + */ + if (set_join_pathlist_hook) + set_join_pathlist_hook(root, joinrel, outerrel, innerrel, + jointype, &extra); +} + +/* + * We override the param_source_rels heuristic to accept nestloop paths in + * which the outer rel satisfies some but not all of the inner path's + * parameterization. This is necessary to get good plans for star-schema + * scenarios, in which a parameterized path for a large table may require + * parameters from multiple small tables that will not get joined directly to + * each other. We can handle that by stacking nestloops that have the small + * tables on the outside; but this breaks the rule the param_source_rels + * heuristic is based on, namely that parameters should not be passed down + * across joins unless there's a join-order-constraint-based reason to do so. + * So we ignore the param_source_rels restriction when this case applies. + * + * allow_star_schema_join() returns true if the param_source_rels restriction + * should be overridden, ie, it's okay to perform this join. + */ +static inline bool +allow_star_schema_join(PlannerInfo *root, + Relids outerrelids, + Relids inner_paramrels) +{ + /* + * It's a star-schema case if the outer rel provides some but not all of + * the inner rel's parameterization. + */ + return (bms_overlap(inner_paramrels, outerrelids) && + bms_nonempty_difference(inner_paramrels, outerrelids)); +} + +/* + * paraminfo_get_equal_hashops + * Determine if param_info and innerrel's lateral_vars can be hashed. + * Returns true the hashing is possible, otherwise return false. + * + * Additionally we also collect the outer exprs and the hash operators for + * each parameter to innerrel. These set in 'param_exprs', 'operators' and + * 'binary_mode' when we return true. + */ +static bool +paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List **param_exprs, List **operators, + bool *binary_mode) + +{ + ListCell *lc; + + *param_exprs = NIL; + *operators = NIL; + *binary_mode = false; + + if (param_info != NULL) + { + List *clauses = param_info->ppi_clauses; + + foreach(lc, clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + OpExpr *opexpr; + Node *expr; + + /* can't use a memoize node without a valid hash equals operator */ + if (!OidIsValid(rinfo->hasheqoperator) || + !clause_sides_match_join(rinfo, outerrel, innerrel)) + { + list_free(*operators); + list_free(*param_exprs); + return false; + } + + /* + * We already checked that this is an OpExpr with 2 args when + * setting hasheqoperator. + */ + opexpr = (OpExpr *) rinfo->clause; + if (rinfo->outer_is_left) + expr = (Node *) linitial(opexpr->args); + else + expr = (Node *) lsecond(opexpr->args); + + *operators = lappend_oid(*operators, rinfo->hasheqoperator); + *param_exprs = lappend(*param_exprs, expr); + + /* + * When the join operator is not hashable then it's possible that + * the operator will be able to distinguish something that the + * hash equality operator could not. For example with floating + * point types -0.0 and +0.0 are classed as equal by the hash + * function and equality function, but some other operator may be + * able to tell those values apart. This means that we must put + * memoize into binary comparison mode so that it does bit-by-bit + * comparisons rather than a "logical" comparison as it would + * using the hash equality operator. + */ + if (!OidIsValid(rinfo->hashjoinoperator)) + *binary_mode = true; + } + } + + /* Now add any lateral vars to the cache key too */ + foreach(lc, innerrel->lateral_vars) + { + Node *expr = (Node *) lfirst(lc); + TypeCacheEntry *typentry; + + /* Reject if there are any volatile functions */ + if (contain_volatile_functions(expr)) + { + list_free(*operators); + list_free(*param_exprs); + return false; + } + + typentry = lookup_type_cache(exprType(expr), + TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR); + + /* can't use a memoize node without a valid hash equals operator */ + if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr)) + { + list_free(*operators); + list_free(*param_exprs); + return false; + } + + *operators = lappend_oid(*operators, typentry->eq_opr); + *param_exprs = lappend(*param_exprs, expr); + + /* + * We must go into binary mode as we don't have too much of an idea of + * how these lateral Vars are being used. See comment above when we + * set *binary_mode for the non-lateral Var case. This could be + * relaxed a bit if we had the RestrictInfos and knew the operators + * being used, however for cases like Vars that are arguments to + * functions we must operate in binary mode as we don't have + * visibility into what the function is doing with the Vars. + */ + *binary_mode = true; + } + + /* We're okay to use memoize */ + return true; +} + +/* + * get_memoize_path + * If possible, make and return a Memoize path atop of 'inner_path'. + * Otherwise return NULL. + */ +static Path * +get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, + RelOptInfo *outerrel, Path *inner_path, + Path *outer_path, JoinType jointype, + JoinPathExtraData *extra) +{ + List *param_exprs; + List *hash_operators; + ListCell *lc; + bool binary_mode; + + /* Obviously not if it's disabled */ + if (!enable_memoize) + return NULL; + + /* + * We can safely not bother with all this unless we expect to perform more + * than one inner scan. The first scan is always going to be a cache + * miss. This would likely fail later anyway based on costs, so this is + * really just to save some wasted effort. + */ + if (outer_path->parent->rows < 2) + return NULL; + + /* + * We can only have a memoize node when there's some kind of cache key, + * either parameterized path clauses or lateral Vars. No cache key sounds + * more like something a Materialize node might be more useful for. + */ + if ((inner_path->param_info == NULL || + inner_path->param_info->ppi_clauses == NIL) && + innerrel->lateral_vars == NIL) + return NULL; + + /* + * Currently we don't do this for SEMI and ANTI joins unless they're + * marked as inner_unique. This is because nested loop SEMI/ANTI joins + * don't scan the inner node to completion, which will mean memoize cannot + * mark the cache entry as complete. + * + * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique + * = true. Should we? See add_paths_to_joinrel() + */ + if (!extra->inner_unique && (jointype == JOIN_SEMI || + jointype == JOIN_ANTI)) + return NULL; + + /* + * Memoize normally marks cache entries as complete when it runs out of + * tuples to read from its subplan. However, with unique joins, Nested + * Loop will skip to the next outer tuple after finding the first matching + * inner tuple. This means that we may not read the inner side of the + * join to completion which leaves no opportunity to mark the cache entry + * as complete. To work around that, when the join is unique we + * automatically mark cache entries as complete after fetching the first + * tuple. This works when the entire join condition is parameterized. + * Otherwise, when the parameterization is only a subset of the join + * condition, we can't be sure which part of it causes the join to be + * unique. This means there are no guarantees that only 1 tuple will be + * read. We cannot mark the cache entry as complete after reading the + * first tuple without that guarantee. This means the scope of Memoize + * node's usefulness is limited to only outer rows that have no join + * partner as this is the only case where Nested Loop would exhaust the + * inner scan of a unique join. Since the scope is limited to that, we + * just don't bother making a memoize path in this case. + * + * Lateral vars needn't be considered here as they're not considered when + * determining if the join is unique. + * + * XXX this could be enabled if the remaining join quals were made part of + * the inner scan's filter instead of the join filter. Maybe it's worth + * considering doing that? + */ + if (extra->inner_unique && + (inner_path->param_info == NULL || + list_length(inner_path->param_info->ppi_clauses) < + list_length(extra->restrictlist))) + return NULL; + + /* + * We can't use a memoize node if there are volatile functions in the + * inner rel's target list or restrict list. A cache hit could reduce the + * number of calls to these functions. + */ + if (contain_volatile_functions((Node *) innerrel->reltarget)) + return NULL; + + foreach(lc, innerrel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (contain_volatile_functions((Node *) rinfo)) + return NULL; + } + + /* Check if we have hash ops for each parameter to the path */ + if (paraminfo_get_equal_hashops(root, + inner_path->param_info, + outerrel, + innerrel, + ¶m_exprs, + &hash_operators, + &binary_mode)) + { + return (Path *) create_memoize_path(root, + innerrel, + inner_path, + param_exprs, + hash_operators, + extra->inner_unique, + binary_mode, + outer_path->rows); + } + + return NULL; +} + +/* + * try_nestloop_path + * Consider a nestloop join path; if it appears useful, push it into + * the joinrel's pathlist via add_path(). + */ +static void +try_nestloop_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *pathkeys, + JoinType jointype, + JoinPathExtraData *extra) +{ + Relids required_outer; + JoinCostWorkspace workspace; + RelOptInfo *innerrel = inner_path->parent; + RelOptInfo *outerrel = outer_path->parent; + Relids innerrelids; + Relids outerrelids; + Relids inner_paramrels = PATH_REQ_OUTER(inner_path); + Relids outer_paramrels = PATH_REQ_OUTER(outer_path); + + /* + * Paths are parameterized by top-level parents, so run parameterization + * tests on the parent relids. + */ + if (innerrel->top_parent_relids) + innerrelids = innerrel->top_parent_relids; + else + innerrelids = innerrel->relids; + + if (outerrel->top_parent_relids) + outerrelids = outerrel->top_parent_relids; + else + outerrelids = outerrel->relids; + + /* + * Check to see if proposed path is still parameterized, and reject if the + * parameterization wouldn't be sensible --- unless allow_star_schema_join + * says to allow it anyway. Also, we must reject if have_dangerous_phv + * doesn't like the look of it, which could only happen if the nestloop is + * still parameterized. + */ + required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels, + innerrelids, inner_paramrels); + if (required_outer && + ((!bms_overlap(required_outer, extra->param_source_rels) && + !allow_star_schema_join(root, outerrelids, inner_paramrels)) || + have_dangerous_phv(root, outerrelids, inner_paramrels))) + { + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + return; + } + + /* + * Do a precheck to quickly eliminate obviously-inferior paths. We + * calculate a cheap lower bound on the path's cost and then use + * add_path_precheck() to see if the path is clearly going to be dominated + * by some existing path for the joinrel. If not, do the full pushup with + * creating a fully valid path structure and submitting it to add_path(). + * The latter two steps are expensive enough to make this two-phase + * methodology worthwhile. + */ + initial_cost_nestloop(root, &workspace, jointype, + outer_path, inner_path, extra); + + if (add_path_precheck(joinrel, + workspace.startup_cost, workspace.total_cost, + pathkeys, required_outer)) + { + /* + * If the inner path is parameterized, it is parameterized by the + * topmost parent of the outer rel, not the outer rel itself. Fix + * that. + */ + if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent)) + { + inner_path = reparameterize_path_by_child(root, inner_path, + outer_path->parent); + + /* + * If we could not translate the path, we can't create nest loop + * path. + */ + if (!inner_path) + { + bms_free(required_outer); + return; + } + } + + add_path(joinrel, (Path *) + create_nestloop_path(root, + joinrel, + jointype, + &workspace, + extra, + outer_path, + inner_path, + extra->restrictlist, + pathkeys, + required_outer)); + } + else + { + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + } +} + +/* + * try_partial_nestloop_path + * Consider a partial nestloop join path; if it appears useful, push it into + * the joinrel's partial_pathlist via add_partial_path(). + */ +static void +try_partial_nestloop_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *pathkeys, + JoinType jointype, + JoinPathExtraData *extra) +{ + JoinCostWorkspace workspace; + + /* + * If the inner path is parameterized, the parameterization must be fully + * satisfied by the proposed outer path. Parameterized partial paths are + * not supported. The caller should already have verified that no lateral + * rels are required here. + */ + Assert(bms_is_empty(joinrel->lateral_relids)); + if (inner_path->param_info != NULL) + { + Relids inner_paramrels = inner_path->param_info->ppi_req_outer; + RelOptInfo *outerrel = outer_path->parent; + Relids outerrelids; + + /* + * The inner and outer paths are parameterized, if at all, by the top + * level parents, not the child relations, so we must use those relids + * for our parameterization tests. + */ + if (outerrel->top_parent_relids) + outerrelids = outerrel->top_parent_relids; + else + outerrelids = outerrel->relids; + + if (!bms_is_subset(inner_paramrels, outerrelids)) + return; + } + + /* + * Before creating a path, get a quick lower bound on what it is likely to + * cost. Bail out right away if it looks terrible. + */ + initial_cost_nestloop(root, &workspace, jointype, + outer_path, inner_path, extra); + if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) + return; + + /* + * If the inner path is parameterized, it is parameterized by the topmost + * parent of the outer rel, not the outer rel itself. Fix that. + */ + if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent)) + { + inner_path = reparameterize_path_by_child(root, inner_path, + outer_path->parent); + + /* + * If we could not translate the path, we can't create nest loop path. + */ + if (!inner_path) + return; + } + + /* Might be good enough to be worth trying, so let's try it. */ + add_partial_path(joinrel, (Path *) + create_nestloop_path(root, + joinrel, + jointype, + &workspace, + extra, + outer_path, + inner_path, + extra->restrictlist, + pathkeys, + NULL)); +} + +/* + * try_mergejoin_path + * Consider a merge join path; if it appears useful, push it into + * the joinrel's pathlist via add_path(). + */ +static void +try_mergejoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *pathkeys, + List *mergeclauses, + List *outersortkeys, + List *innersortkeys, + JoinType jointype, + JoinPathExtraData *extra, + bool is_partial) +{ + Relids required_outer; + JoinCostWorkspace workspace; + + if (is_partial) + { + try_partial_mergejoin_path(root, + joinrel, + outer_path, + inner_path, + pathkeys, + mergeclauses, + outersortkeys, + innersortkeys, + jointype, + extra); + return; + } + + /* + * Check to see if proposed path is still parameterized, and reject if the + * parameterization wouldn't be sensible. + */ + required_outer = calc_non_nestloop_required_outer(outer_path, + inner_path); + if (required_outer && + !bms_overlap(required_outer, extra->param_source_rels)) + { + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + return; + } + + /* + * If the given paths are already well enough ordered, we can skip doing + * an explicit sort. + */ + if (outersortkeys && + pathkeys_contained_in(outersortkeys, outer_path->pathkeys)) + outersortkeys = NIL; + if (innersortkeys && + pathkeys_contained_in(innersortkeys, inner_path->pathkeys)) + innersortkeys = NIL; + + /* + * See comments in try_nestloop_path(). + */ + initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, + outer_path, inner_path, + outersortkeys, innersortkeys, + extra); + + if (add_path_precheck(joinrel, + workspace.startup_cost, workspace.total_cost, + pathkeys, required_outer)) + { + add_path(joinrel, (Path *) + create_mergejoin_path(root, + joinrel, + jointype, + &workspace, + extra, + outer_path, + inner_path, + extra->restrictlist, + pathkeys, + required_outer, + mergeclauses, + outersortkeys, + innersortkeys)); + } + else + { + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + } +} + +/* + * try_partial_mergejoin_path + * Consider a partial merge join path; if it appears useful, push it into + * the joinrel's pathlist via add_partial_path(). + */ +static void +try_partial_mergejoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *pathkeys, + List *mergeclauses, + List *outersortkeys, + List *innersortkeys, + JoinType jointype, + JoinPathExtraData *extra) +{ + JoinCostWorkspace workspace; + + /* + * See comments in try_partial_hashjoin_path(). + */ + Assert(bms_is_empty(joinrel->lateral_relids)); + if (inner_path->param_info != NULL) + { + Relids inner_paramrels = inner_path->param_info->ppi_req_outer; + + if (!bms_is_empty(inner_paramrels)) + return; + } + + /* + * If the given paths are already well enough ordered, we can skip doing + * an explicit sort. + */ + if (outersortkeys && + pathkeys_contained_in(outersortkeys, outer_path->pathkeys)) + outersortkeys = NIL; + if (innersortkeys && + pathkeys_contained_in(innersortkeys, inner_path->pathkeys)) + innersortkeys = NIL; + + /* + * See comments in try_partial_nestloop_path(). + */ + initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, + outer_path, inner_path, + outersortkeys, innersortkeys, + extra); + + if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) + return; + + /* Might be good enough to be worth trying, so let's try it. */ + add_partial_path(joinrel, (Path *) + create_mergejoin_path(root, + joinrel, + jointype, + &workspace, + extra, + outer_path, + inner_path, + extra->restrictlist, + pathkeys, + NULL, + mergeclauses, + outersortkeys, + innersortkeys)); +} + +/* + * try_hashjoin_path + * Consider a hash join path; if it appears useful, push it into + * the joinrel's pathlist via add_path(). + */ +static void +try_hashjoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *hashclauses, + JoinType jointype, + JoinPathExtraData *extra) +{ + Relids required_outer; + JoinCostWorkspace workspace; + + /* + * Check to see if proposed path is still parameterized, and reject if the + * parameterization wouldn't be sensible. + */ + required_outer = calc_non_nestloop_required_outer(outer_path, + inner_path); + if (required_outer && + !bms_overlap(required_outer, extra->param_source_rels)) + { + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + return; + } + + /* + * See comments in try_nestloop_path(). Also note that hashjoin paths + * never have any output pathkeys, per comments in create_hashjoin_path. + */ + initial_cost_hashjoin(root, &workspace, jointype, hashclauses, + outer_path, inner_path, extra, false); + + if (add_path_precheck(joinrel, + workspace.startup_cost, workspace.total_cost, + NIL, required_outer)) + { + add_path(joinrel, (Path *) + create_hashjoin_path(root, + joinrel, + jointype, + &workspace, + extra, + outer_path, + inner_path, + false, /* parallel_hash */ + extra->restrictlist, + required_outer, + hashclauses)); + } + else + { + /* Waste no memory when we reject a path here */ + bms_free(required_outer); + } +} + +/* + * try_partial_hashjoin_path + * Consider a partial hashjoin join path; if it appears useful, push it into + * the joinrel's partial_pathlist via add_partial_path(). + * The outer side is partial. If parallel_hash is true, then the inner path + * must be partial and will be run in parallel to create one or more shared + * hash tables; otherwise the inner path must be complete and a copy of it + * is run in every process to create separate identical private hash tables. + */ +static void +try_partial_hashjoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *hashclauses, + JoinType jointype, + JoinPathExtraData *extra, + bool parallel_hash) +{ + JoinCostWorkspace workspace; + + /* + * If the inner path is parameterized, the parameterization must be fully + * satisfied by the proposed outer path. Parameterized partial paths are + * not supported. The caller should already have verified that no lateral + * rels are required here. + */ + Assert(bms_is_empty(joinrel->lateral_relids)); + if (inner_path->param_info != NULL) + { + Relids inner_paramrels = inner_path->param_info->ppi_req_outer; + + if (!bms_is_empty(inner_paramrels)) + return; + } + + /* + * Before creating a path, get a quick lower bound on what it is likely to + * cost. Bail out right away if it looks terrible. + */ + initial_cost_hashjoin(root, &workspace, jointype, hashclauses, + outer_path, inner_path, extra, parallel_hash); + if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL)) + return; + + /* Might be good enough to be worth trying, so let's try it. */ + add_partial_path(joinrel, (Path *) + create_hashjoin_path(root, + joinrel, + jointype, + &workspace, + extra, + outer_path, + inner_path, + parallel_hash, + extra->restrictlist, + NULL, + hashclauses)); +} + +/* + * clause_sides_match_join + * Determine whether a join clause is of the right form to use in this join. + * + * We already know that the clause is a binary opclause referencing only the + * rels in the current join. The point here is to check whether it has the + * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr", + * rather than mixing outer and inner vars on either side. If it matches, + * we set the transient flag outer_is_left to identify which side is which. + */ +static inline bool +clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, + RelOptInfo *innerrel) +{ + if (bms_is_subset(rinfo->left_relids, outerrel->relids) && + bms_is_subset(rinfo->right_relids, innerrel->relids)) + { + /* lefthand side is outer */ + rinfo->outer_is_left = true; + return true; + } + else if (bms_is_subset(rinfo->left_relids, innerrel->relids) && + bms_is_subset(rinfo->right_relids, outerrel->relids)) + { + /* righthand side is outer */ + rinfo->outer_is_left = false; + return true; + } + return false; /* no good for these input relations */ +} + +/* + * sort_inner_and_outer + * Create mergejoin join paths by explicitly sorting both the outer and + * inner join relations on each available merge ordering. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'jointype' is the type of join to do + * 'extra' contains additional input values + */ +static void +sort_inner_and_outer(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra) +{ + JoinType save_jointype = jointype; + Path *outer_path; + Path *inner_path; + Path *cheapest_partial_outer = NULL; + Path *cheapest_safe_inner = NULL; + List *all_pathkeys; + ListCell *l; + + /* + * We only consider the cheapest-total-cost input paths, since we are + * assuming here that a sort is required. We will consider + * cheapest-startup-cost input paths later, and only if they don't need a + * sort. + * + * This function intentionally does not consider parameterized input + * paths, except when the cheapest-total is parameterized. If we did so, + * we'd have a combinatorial explosion of mergejoin paths of dubious + * value. This interacts with decisions elsewhere that also discriminate + * against mergejoins with parameterized inputs; see comments in + * src/backend/optimizer/README. + */ + outer_path = outerrel->cheapest_total_path; + inner_path = innerrel->cheapest_total_path; + + /* + * If either cheapest-total path is parameterized by the other rel, we + * can't use a mergejoin. (There's no use looking for alternative input + * paths, since these should already be the least-parameterized available + * paths.) + */ + if (PATH_PARAM_BY_REL(outer_path, innerrel) || + PATH_PARAM_BY_REL(inner_path, outerrel)) + return; + + /* + * If unique-ification is requested, do it and then handle as a plain + * inner join. + */ + if (jointype == JOIN_UNIQUE_OUTER) + { + outer_path = (Path *) create_unique_path(root, outerrel, + outer_path, extra->sjinfo); + Assert(outer_path); + jointype = JOIN_INNER; + } + else if (jointype == JOIN_UNIQUE_INNER) + { + inner_path = (Path *) create_unique_path(root, innerrel, + inner_path, extra->sjinfo); + Assert(inner_path); + jointype = JOIN_INNER; + } + + /* + * If the joinrel is parallel-safe, we may be able to consider a partial + * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the + * outer path will be partial, and therefore we won't be able to properly + * guarantee uniqueness. Similarly, we can't handle JOIN_FULL and + * JOIN_RIGHT, because they can produce false null extended rows. Also, + * the resulting path must not be parameterized. + */ + if (joinrel->consider_parallel && + save_jointype != JOIN_UNIQUE_OUTER && + save_jointype != JOIN_FULL && + save_jointype != JOIN_RIGHT && + outerrel->partial_pathlist != NIL && + bms_is_empty(joinrel->lateral_relids)) + { + cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist); + + if (inner_path->parallel_safe) + cheapest_safe_inner = inner_path; + else if (save_jointype != JOIN_UNIQUE_INNER) + cheapest_safe_inner = + get_cheapest_parallel_safe_total_inner(innerrel->pathlist); + } + + /* + * Each possible ordering of the available mergejoin clauses will generate + * a differently-sorted result path at essentially the same cost. We have + * no basis for choosing one over another at this level of joining, but + * some sort orders may be more useful than others for higher-level + * mergejoins, so it's worth considering multiple orderings. + * + * Actually, it's not quite true that every mergeclause ordering will + * generate a different path order, because some of the clauses may be + * partially redundant (refer to the same EquivalenceClasses). Therefore, + * what we do is convert the mergeclause list to a list of canonical + * pathkeys, and then consider different orderings of the pathkeys. + * + * Generating a path for *every* permutation of the pathkeys doesn't seem + * like a winning strategy; the cost in planning time is too high. For + * now, we generate one path for each pathkey, listing that pathkey first + * and the rest in random order. This should allow at least a one-clause + * mergejoin without re-sorting against any other possible mergejoin + * partner path. But if we've not guessed the right ordering of secondary + * keys, we may end up evaluating clauses as qpquals when they could have + * been done as mergeclauses. (In practice, it's rare that there's more + * than two or three mergeclauses, so expending a huge amount of thought + * on that is probably not worth it.) + * + * The pathkey order returned by select_outer_pathkeys_for_merge() has + * some heuristics behind it (see that function), so be sure to try it + * exactly as-is as well as making variants. + */ + all_pathkeys = select_outer_pathkeys_for_merge(root, + extra->mergeclause_list, + joinrel); + + foreach(l, all_pathkeys) + { + PathKey *front_pathkey = (PathKey *) lfirst(l); + List *cur_mergeclauses; + List *outerkeys; + List *innerkeys; + List *merge_pathkeys; + + /* Make a pathkey list with this guy first */ + if (l != list_head(all_pathkeys)) + outerkeys = lcons(front_pathkey, + list_delete_nth_cell(list_copy(all_pathkeys), + foreach_current_index(l))); + else + outerkeys = all_pathkeys; /* no work at first one... */ + + /* Sort the mergeclauses into the corresponding ordering */ + cur_mergeclauses = + find_mergeclauses_for_outer_pathkeys(root, + outerkeys, + extra->mergeclause_list); + + /* Should have used them all... */ + Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list)); + + /* Build sort pathkeys for the inner side */ + innerkeys = make_inner_pathkeys_for_merge(root, + cur_mergeclauses, + outerkeys); + + /* Build pathkeys representing output sort order */ + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, + outerkeys); + + /* + * And now we can make the path. + * + * Note: it's possible that the cheapest paths will already be sorted + * properly. try_mergejoin_path will detect that case and suppress an + * explicit sort step, so we needn't do so here. + */ + try_mergejoin_path(root, + joinrel, + outer_path, + inner_path, + merge_pathkeys, + cur_mergeclauses, + outerkeys, + innerkeys, + jointype, + extra, + false); + + /* + * If we have partial outer and parallel safe inner path then try + * partial mergejoin path. + */ + if (cheapest_partial_outer && cheapest_safe_inner) + try_partial_mergejoin_path(root, + joinrel, + cheapest_partial_outer, + cheapest_safe_inner, + merge_pathkeys, + cur_mergeclauses, + outerkeys, + innerkeys, + jointype, + extra); + } +} + +/* + * generate_mergejoin_paths + * Creates possible mergejoin paths for input outerpath. + * + * We generate mergejoins if mergejoin clauses are available. We have + * two ways to generate the inner path for a mergejoin: sort the cheapest + * inner path, or use an inner path that is already suitably ordered for the + * merge. If we have several mergeclauses, it could be that there is no inner + * path (or only a very expensive one) for the full list of mergeclauses, but + * better paths exist if we truncate the mergeclause list (thereby discarding + * some sort key requirements). So, we consider truncations of the + * mergeclause list as well as the full list. (Ideally we'd consider all + * subsets of the mergeclause list, but that seems way too expensive.) + */ +static void +generate_mergejoin_paths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + Path *outerpath, + JoinType jointype, + JoinPathExtraData *extra, + bool useallclauses, + Path *inner_cheapest_total, + List *merge_pathkeys, + bool is_partial) +{ + List *mergeclauses; + List *innersortkeys; + List *trialsortkeys; + Path *cheapest_startup_inner; + Path *cheapest_total_inner; + JoinType save_jointype = jointype; + int num_sortkeys; + int sortkeycnt; + + if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) + jointype = JOIN_INNER; + + /* Look for useful mergeclauses (if any) */ + mergeclauses = + find_mergeclauses_for_outer_pathkeys(root, + outerpath->pathkeys, + extra->mergeclause_list); + + /* + * Done with this outer path if no chance for a mergejoin. + * + * Special corner case: for "x FULL JOIN y ON true", there will be no join + * clauses at all. Ordinarily we'd generate a clauseless nestloop path, + * but since mergejoin is our only join type that supports FULL JOIN + * without any join clauses, it's necessary to generate a clauseless + * mergejoin path instead. + */ + if (mergeclauses == NIL) + { + if (jointype == JOIN_FULL) + /* okay to try for mergejoin */ ; + else + return; + } + if (useallclauses && + list_length(mergeclauses) != list_length(extra->mergeclause_list)) + return; + + /* Compute the required ordering of the inner path */ + innersortkeys = make_inner_pathkeys_for_merge(root, + mergeclauses, + outerpath->pathkeys); + + /* + * Generate a mergejoin on the basis of sorting the cheapest inner. Since + * a sort will be needed, only cheapest total cost matters. (But + * try_mergejoin_path will do the right thing if inner_cheapest_total is + * already correctly sorted.) + */ + try_mergejoin_path(root, + joinrel, + outerpath, + inner_cheapest_total, + merge_pathkeys, + mergeclauses, + NIL, + innersortkeys, + jointype, + extra, + is_partial); + + /* Can't do anything else if inner path needs to be unique'd */ + if (save_jointype == JOIN_UNIQUE_INNER) + return; + + /* + * Look for presorted inner paths that satisfy the innersortkey list --- + * or any truncation thereof, if we are allowed to build a mergejoin using + * a subset of the merge clauses. Here, we consider both cheap startup + * cost and cheap total cost. + * + * Currently we do not consider parameterized inner paths here. This + * interacts with decisions elsewhere that also discriminate against + * mergejoins with parameterized inputs; see comments in + * src/backend/optimizer/README. + * + * As we shorten the sortkey list, we should consider only paths that are + * strictly cheaper than (in particular, not the same as) any path found + * in an earlier iteration. Otherwise we'd be intentionally using fewer + * merge keys than a given path allows (treating the rest as plain + * joinquals), which is unlikely to be a good idea. Also, eliminating + * paths here on the basis of compare_path_costs is a lot cheaper than + * building the mergejoin path only to throw it away. + * + * If inner_cheapest_total is well enough sorted to have not required a + * sort in the path made above, we shouldn't make a duplicate path with + * it, either. We handle that case with the same logic that handles the + * previous consideration, by initializing the variables that track + * cheapest-so-far properly. Note that we do NOT reject + * inner_cheapest_total if we find it matches some shorter set of + * pathkeys. That case corresponds to using fewer mergekeys to avoid + * sorting inner_cheapest_total, whereas we did sort it above, so the + * plans being considered are different. + */ + if (pathkeys_contained_in(innersortkeys, + inner_cheapest_total->pathkeys)) + { + /* inner_cheapest_total didn't require a sort */ + cheapest_startup_inner = inner_cheapest_total; + cheapest_total_inner = inner_cheapest_total; + } + else + { + /* it did require a sort, at least for the full set of keys */ + cheapest_startup_inner = NULL; + cheapest_total_inner = NULL; + } + num_sortkeys = list_length(innersortkeys); + if (num_sortkeys > 1 && !useallclauses) + trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */ + else + trialsortkeys = innersortkeys; /* won't really truncate */ + + for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) + { + Path *innerpath; + List *newclauses = NIL; + + /* + * Look for an inner path ordered well enough for the first + * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified + * destructively, which is why we made a copy... + */ + trialsortkeys = list_truncate(trialsortkeys, sortkeycnt); + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + TOTAL_COST, + is_partial); + if (innerpath != NULL && + (cheapest_total_inner == NULL || + compare_path_costs(innerpath, cheapest_total_inner, + TOTAL_COST) < 0)) + { + /* Found a cheap (or even-cheaper) sorted path */ + /* Select the right mergeclauses, if we didn't already */ + if (sortkeycnt < num_sortkeys) + { + newclauses = + trim_mergeclauses_for_inner_pathkeys(root, + mergeclauses, + trialsortkeys); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; + try_mergejoin_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + newclauses, + NIL, + NIL, + jointype, + extra, + is_partial); + cheapest_total_inner = innerpath; + } + /* Same on the basis of cheapest startup cost ... */ + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + STARTUP_COST, + is_partial); + if (innerpath != NULL && + (cheapest_startup_inner == NULL || + compare_path_costs(innerpath, cheapest_startup_inner, + STARTUP_COST) < 0)) + { + /* Found a cheap (or even-cheaper) sorted path */ + if (innerpath != cheapest_total_inner) + { + /* + * Avoid rebuilding clause list if we already made one; saves + * memory in big join trees... + */ + if (newclauses == NIL) + { + if (sortkeycnt < num_sortkeys) + { + newclauses = + trim_mergeclauses_for_inner_pathkeys(root, + mergeclauses, + trialsortkeys); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; + } + try_mergejoin_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + newclauses, + NIL, + NIL, + jointype, + extra, + is_partial); + } + cheapest_startup_inner = innerpath; + } + + /* + * Don't consider truncated sortkeys if we need all clauses. + */ + if (useallclauses) + break; + } +} + +/* + * match_unsorted_outer + * Creates possible join paths for processing a single join relation + * 'joinrel' by employing either iterative substitution or + * mergejoining on each of its possible outer paths (considering + * only outer paths that are already ordered well enough for merging). + * + * We always generate a nestloop path for each available outer path. + * In fact we may generate as many as five: one on the cheapest-total-cost + * inner path, one on the same with materialization, one on the + * cheapest-startup-cost inner path (if different), one on the + * cheapest-total inner-indexscan path (if any), and one on the + * cheapest-startup inner-indexscan path (if different). + * + * We also consider mergejoins if mergejoin clauses are available. See + * detailed comments in generate_mergejoin_paths. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'jointype' is the type of join to do + * 'extra' contains additional input values + */ +static void +match_unsorted_outer(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra) +{ + JoinType save_jointype = jointype; + bool nestjoinOK; + bool useallclauses; + Path *inner_cheapest_total = innerrel->cheapest_total_path; + Path *matpath = NULL; + ListCell *lc1; + + /* + * Nestloop only supports inner, left, semi, and anti joins. Also, if we + * are doing a right or full mergejoin, we must use *all* the mergeclauses + * as join clauses, else we will not have a valid plan. (Although these + * two flags are currently inverses, keep them separate for clarity and + * possible future changes.) + */ + switch (jointype) + { + case JOIN_INNER: + case JOIN_LEFT: + case JOIN_SEMI: + case JOIN_ANTI: + nestjoinOK = true; + useallclauses = false; + break; + case JOIN_RIGHT: + case JOIN_FULL: + nestjoinOK = false; + useallclauses = true; + break; + case JOIN_UNIQUE_OUTER: + case JOIN_UNIQUE_INNER: + jointype = JOIN_INNER; + nestjoinOK = true; + useallclauses = false; + break; + default: + elog(ERROR, "unrecognized join type: %d", + (int) jointype); + nestjoinOK = false; /* keep compiler quiet */ + useallclauses = false; + break; + } + + /* + * If inner_cheapest_total is parameterized by the outer rel, ignore it; + * we will consider it below as a member of cheapest_parameterized_paths, + * but the other possibilities considered in this routine aren't usable. + */ + if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel)) + inner_cheapest_total = NULL; + + /* + * If we need to unique-ify the inner path, we will consider only the + * cheapest-total inner. + */ + if (save_jointype == JOIN_UNIQUE_INNER) + { + /* No way to do this with an inner path parameterized by outer rel */ + if (inner_cheapest_total == NULL) + return; + inner_cheapest_total = (Path *) + create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo); + Assert(inner_cheapest_total); + } + else if (nestjoinOK) + { + /* + * Consider materializing the cheapest inner path, unless + * enable_material is off or the path in question materializes its + * output anyway. + */ + if (enable_material && inner_cheapest_total != NULL && + !ExecMaterializesOutput(inner_cheapest_total->pathtype)) + matpath = (Path *) + create_material_path(innerrel, inner_cheapest_total); + } + + foreach(lc1, outerrel->pathlist) + { + Path *outerpath = (Path *) lfirst(lc1); + List *merge_pathkeys; + + /* + * We cannot use an outer path that is parameterized by the inner rel. + */ + if (PATH_PARAM_BY_REL(outerpath, innerrel)) + continue; + + /* + * If we need to unique-ify the outer path, it's pointless to consider + * any but the cheapest outer. (XXX we don't consider parameterized + * outers, nor inners, for unique-ified cases. Should we?) + */ + if (save_jointype == JOIN_UNIQUE_OUTER) + { + if (outerpath != outerrel->cheapest_total_path) + continue; + outerpath = (Path *) create_unique_path(root, outerrel, + outerpath, extra->sjinfo); + Assert(outerpath); + } + + /* + * The result will have this sort order (even if it is implemented as + * a nestloop, and even if some of the mergeclauses are implemented by + * qpquals rather than as true mergeclauses): + */ + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, + outerpath->pathkeys); + + if (save_jointype == JOIN_UNIQUE_INNER) + { + /* + * Consider nestloop join, but only with the unique-ified cheapest + * inner path + */ + try_nestloop_path(root, + joinrel, + outerpath, + inner_cheapest_total, + merge_pathkeys, + jointype, + extra); + } + else if (nestjoinOK) + { + /* + * Consider nestloop joins using this outer path and various + * available paths for the inner relation. We consider the + * cheapest-total paths for each available parameterization of the + * inner relation, including the unparameterized case. + */ + ListCell *lc2; + + foreach(lc2, innerrel->cheapest_parameterized_paths) + { + Path *innerpath = (Path *) lfirst(lc2); + Path *mpath; + + try_nestloop_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + jointype, + extra); + + /* + * Try generating a memoize path and see if that makes the + * nested loop any cheaper. + */ + mpath = get_memoize_path(root, innerrel, outerrel, + innerpath, outerpath, jointype, + extra); + if (mpath != NULL) + try_nestloop_path(root, + joinrel, + outerpath, + mpath, + merge_pathkeys, + jointype, + extra); + } + + /* Also consider materialized form of the cheapest inner path */ + if (matpath != NULL) + try_nestloop_path(root, + joinrel, + outerpath, + matpath, + merge_pathkeys, + jointype, + extra); + } + + /* Can't do anything else if outer path needs to be unique'd */ + if (save_jointype == JOIN_UNIQUE_OUTER) + continue; + + /* Can't do anything else if inner rel is parameterized by outer */ + if (inner_cheapest_total == NULL) + continue; + + /* Generate merge join paths */ + generate_mergejoin_paths(root, joinrel, innerrel, outerpath, + save_jointype, extra, useallclauses, + inner_cheapest_total, merge_pathkeys, + false); + } + + /* + * Consider partial nestloop and mergejoin plan if outerrel has any + * partial path and the joinrel is parallel-safe. However, we can't + * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and + * therefore we won't be able to properly guarantee uniqueness. Nor can + * we handle joins needing lateral rels, since partial paths must not be + * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT, + * because they can produce false null extended rows. + */ + if (joinrel->consider_parallel && + save_jointype != JOIN_UNIQUE_OUTER && + save_jointype != JOIN_FULL && + save_jointype != JOIN_RIGHT && + outerrel->partial_pathlist != NIL && + bms_is_empty(joinrel->lateral_relids)) + { + if (nestjoinOK) + consider_parallel_nestloop(root, joinrel, outerrel, innerrel, + save_jointype, extra); + + /* + * If inner_cheapest_total is NULL or non parallel-safe then find the + * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we + * can't use any alternative inner path. + */ + if (inner_cheapest_total == NULL || + !inner_cheapest_total->parallel_safe) + { + if (save_jointype == JOIN_UNIQUE_INNER) + return; + + inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist); + } + + if (inner_cheapest_total) + consider_parallel_mergejoin(root, joinrel, outerrel, innerrel, + save_jointype, extra, + inner_cheapest_total); + } +} + +/* + * consider_parallel_mergejoin + * Try to build partial paths for a joinrel by joining a partial path + * for the outer relation to a complete path for the inner relation. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'jointype' is the type of join to do + * 'extra' contains additional input values + * 'inner_cheapest_total' cheapest total path for innerrel + */ +static void +consider_parallel_mergejoin(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra, + Path *inner_cheapest_total) +{ + ListCell *lc1; + + /* generate merge join path for each partial outer path */ + foreach(lc1, outerrel->partial_pathlist) + { + Path *outerpath = (Path *) lfirst(lc1); + List *merge_pathkeys; + + /* + * Figure out what useful ordering any paths we create will have. + */ + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, + outerpath->pathkeys); + + generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype, + extra, false, inner_cheapest_total, + merge_pathkeys, true); + } +} + +/* + * consider_parallel_nestloop + * Try to build partial paths for a joinrel by joining a partial path for the + * outer relation to a complete path for the inner relation. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'jointype' is the type of join to do + * 'extra' contains additional input values + */ +static void +consider_parallel_nestloop(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra) +{ + JoinType save_jointype = jointype; + ListCell *lc1; + + if (jointype == JOIN_UNIQUE_INNER) + jointype = JOIN_INNER; + + foreach(lc1, outerrel->partial_pathlist) + { + Path *outerpath = (Path *) lfirst(lc1); + List *pathkeys; + ListCell *lc2; + + /* Figure out what useful ordering any paths we create will have. */ + pathkeys = build_join_pathkeys(root, joinrel, jointype, + outerpath->pathkeys); + + /* + * Try the cheapest parameterized paths; only those which will produce + * an unparameterized path when joined to this outerrel will survive + * try_partial_nestloop_path. The cheapest unparameterized path is + * also in this list. + */ + foreach(lc2, innerrel->cheapest_parameterized_paths) + { + Path *innerpath = (Path *) lfirst(lc2); + Path *mpath; + + /* Can't join to an inner path that is not parallel-safe */ + if (!innerpath->parallel_safe) + continue; + + /* + * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's + * cheapest_total_path, and we have to unique-ify it. (We might + * be able to relax this to allow other safe, unparameterized + * inner paths, but right now create_unique_path is not on board + * with that.) + */ + if (save_jointype == JOIN_UNIQUE_INNER) + { + if (innerpath != innerrel->cheapest_total_path) + continue; + innerpath = (Path *) create_unique_path(root, innerrel, + innerpath, + extra->sjinfo); + Assert(innerpath); + } + + try_partial_nestloop_path(root, joinrel, outerpath, innerpath, + pathkeys, jointype, extra); + + /* + * Try generating a memoize path and see if that makes the nested + * loop any cheaper. + */ + mpath = get_memoize_path(root, innerrel, outerrel, + innerpath, outerpath, jointype, + extra); + if (mpath != NULL) + try_partial_nestloop_path(root, joinrel, outerpath, mpath, + pathkeys, jointype, extra); + } + } +} + +/* + * hash_inner_and_outer + * Create hashjoin join paths by explicitly hashing both the outer and + * inner keys of each available hash clause. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'jointype' is the type of join to do + * 'extra' contains additional input values + */ +static void +hash_inner_and_outer(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra) +{ + JoinType save_jointype = jointype; + bool isouterjoin = IS_OUTER_JOIN(jointype); + List *hashclauses; + ListCell *l; + + /* + * We need to build only one hashclauses list for any given pair of outer + * and inner relations; all of the hashable clauses will be used as keys. + * + * Scan the join's restrictinfo list to find hashjoinable clauses that are + * usable with this pair of sub-relations. + */ + hashclauses = NIL; + foreach(l, extra->restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* + * If processing an outer join, only use its own join clauses for + * hashing. For inner joins we need not be so picky. + */ + if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) + continue; + + if (!restrictinfo->can_join || + restrictinfo->hashjoinoperator == InvalidOid) + continue; /* not hashjoinable */ + + /* + * Check if clause has the form "outer op inner" or "inner op outer". + */ + if (!clause_sides_match_join(restrictinfo, outerrel, innerrel)) + continue; /* no good for these input relations */ + + hashclauses = lappend(hashclauses, restrictinfo); + } + + /* If we found any usable hashclauses, make paths */ + if (hashclauses) + { + /* + * We consider both the cheapest-total-cost and cheapest-startup-cost + * outer paths. There's no need to consider any but the + * cheapest-total-cost inner path, however. + */ + Path *cheapest_startup_outer = outerrel->cheapest_startup_path; + Path *cheapest_total_outer = outerrel->cheapest_total_path; + Path *cheapest_total_inner = innerrel->cheapest_total_path; + + /* + * If either cheapest-total path is parameterized by the other rel, we + * can't use a hashjoin. (There's no use looking for alternative + * input paths, since these should already be the least-parameterized + * available paths.) + */ + if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) || + PATH_PARAM_BY_REL(cheapest_total_inner, outerrel)) + return; + + /* Unique-ify if need be; we ignore parameterized possibilities */ + if (jointype == JOIN_UNIQUE_OUTER) + { + cheapest_total_outer = (Path *) + create_unique_path(root, outerrel, + cheapest_total_outer, extra->sjinfo); + Assert(cheapest_total_outer); + jointype = JOIN_INNER; + try_hashjoin_path(root, + joinrel, + cheapest_total_outer, + cheapest_total_inner, + hashclauses, + jointype, + extra); + /* no possibility of cheap startup here */ + } + else if (jointype == JOIN_UNIQUE_INNER) + { + cheapest_total_inner = (Path *) + create_unique_path(root, innerrel, + cheapest_total_inner, extra->sjinfo); + Assert(cheapest_total_inner); + jointype = JOIN_INNER; + try_hashjoin_path(root, + joinrel, + cheapest_total_outer, + cheapest_total_inner, + hashclauses, + jointype, + extra); + if (cheapest_startup_outer != NULL && + cheapest_startup_outer != cheapest_total_outer) + try_hashjoin_path(root, + joinrel, + cheapest_startup_outer, + cheapest_total_inner, + hashclauses, + jointype, + extra); + } + else + { + /* + * For other jointypes, we consider the cheapest startup outer + * together with the cheapest total inner, and then consider + * pairings of cheapest-total paths including parameterized ones. + * There is no use in generating parameterized paths on the basis + * of possibly cheap startup cost, so this is sufficient. + */ + ListCell *lc1; + ListCell *lc2; + + if (cheapest_startup_outer != NULL) + try_hashjoin_path(root, + joinrel, + cheapest_startup_outer, + cheapest_total_inner, + hashclauses, + jointype, + extra); + + foreach(lc1, outerrel->cheapest_parameterized_paths) + { + Path *outerpath = (Path *) lfirst(lc1); + + /* + * We cannot use an outer path that is parameterized by the + * inner rel. + */ + if (PATH_PARAM_BY_REL(outerpath, innerrel)) + continue; + + foreach(lc2, innerrel->cheapest_parameterized_paths) + { + Path *innerpath = (Path *) lfirst(lc2); + + /* + * We cannot use an inner path that is parameterized by + * the outer rel, either. + */ + if (PATH_PARAM_BY_REL(innerpath, outerrel)) + continue; + + if (outerpath == cheapest_startup_outer && + innerpath == cheapest_total_inner) + continue; /* already tried it */ + + try_hashjoin_path(root, + joinrel, + outerpath, + innerpath, + hashclauses, + jointype, + extra); + } + } + } + + /* + * If the joinrel is parallel-safe, we may be able to consider a + * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER, + * because the outer path will be partial, and therefore we won't be + * able to properly guarantee uniqueness. Similarly, we can't handle + * JOIN_FULL and JOIN_RIGHT, because they can produce false null + * extended rows. Also, the resulting path must not be parameterized. + * We would be able to support JOIN_FULL and JOIN_RIGHT for Parallel + * Hash, since in that case we're back to a single hash table with a + * single set of match bits for each batch, but that will require + * figuring out a deadlock-free way to wait for the probe to finish. + */ + if (joinrel->consider_parallel && + save_jointype != JOIN_UNIQUE_OUTER && + save_jointype != JOIN_FULL && + save_jointype != JOIN_RIGHT && + outerrel->partial_pathlist != NIL && + bms_is_empty(joinrel->lateral_relids)) + { + Path *cheapest_partial_outer; + Path *cheapest_partial_inner = NULL; + Path *cheapest_safe_inner = NULL; + + cheapest_partial_outer = + (Path *) linitial(outerrel->partial_pathlist); + + /* + * Can we use a partial inner plan too, so that we can build a + * shared hash table in parallel? We can't handle + * JOIN_UNIQUE_INNER because we can't guarantee uniqueness. + */ + if (innerrel->partial_pathlist != NIL && + save_jointype != JOIN_UNIQUE_INNER && + enable_parallel_hash) + { + cheapest_partial_inner = + (Path *) linitial(innerrel->partial_pathlist); + try_partial_hashjoin_path(root, joinrel, + cheapest_partial_outer, + cheapest_partial_inner, + hashclauses, jointype, extra, + true /* parallel_hash */ ); + } + + /* + * Normally, given that the joinrel is parallel-safe, the cheapest + * total inner path will also be parallel-safe, but if not, we'll + * have to search for the cheapest safe, unparameterized inner + * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative + * inner path. + */ + if (cheapest_total_inner->parallel_safe) + cheapest_safe_inner = cheapest_total_inner; + else if (save_jointype != JOIN_UNIQUE_INNER) + cheapest_safe_inner = + get_cheapest_parallel_safe_total_inner(innerrel->pathlist); + + if (cheapest_safe_inner != NULL) + try_partial_hashjoin_path(root, joinrel, + cheapest_partial_outer, + cheapest_safe_inner, + hashclauses, jointype, extra, + false /* parallel_hash */ ); + } + } +} + +/* + * select_mergejoin_clauses + * Select mergejoin clauses that are usable for a particular join. + * Returns a list of RestrictInfo nodes for those clauses. + * + * *mergejoin_allowed is normally set to true, but it is set to false if + * this is a right/full join and there are nonmergejoinable join clauses. + * The executor's mergejoin machinery cannot handle such cases, so we have + * to avoid generating a mergejoin plan. (Note that this flag does NOT + * consider whether there are actually any mergejoinable clauses. This is + * correct because in some cases we need to build a clauseless mergejoin. + * Simply returning NIL is therefore not enough to distinguish safe from + * unsafe cases.) + * + * We also mark each selected RestrictInfo to show which side is currently + * being considered as outer. These are transient markings that are only + * good for the duration of the current add_paths_to_joinrel() call! + * + * We examine each restrictinfo clause known for the join to see + * if it is mergejoinable and involves vars from the two sub-relations + * currently of interest. + */ +static List * +select_mergejoin_clauses(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype, + bool *mergejoin_allowed) +{ + List *result_list = NIL; + bool isouterjoin = IS_OUTER_JOIN(jointype); + bool have_nonmergeable_joinclause = false; + ListCell *l; + + foreach(l, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + + /* + * If processing an outer join, only use its own join clauses in the + * merge. For inner joins we can use pushed-down clauses too. (Note: + * we don't set have_nonmergeable_joinclause here because pushed-down + * clauses will become otherquals not joinquals.) + */ + if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) + continue; + + /* Check that clause is a mergeable operator clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + { + /* + * The executor can handle extra joinquals that are constants, but + * not anything else, when doing right/full merge join. (The + * reason to support constants is so we can do FULL JOIN ON + * FALSE.) + */ + if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const)) + have_nonmergeable_joinclause = true; + continue; /* not mergejoinable */ + } + + /* + * Check if clause has the form "outer op inner" or "inner op outer". + */ + if (!clause_sides_match_join(restrictinfo, outerrel, innerrel)) + { + have_nonmergeable_joinclause = true; + continue; /* no good for these input relations */ + } + + /* + * Insist that each side have a non-redundant eclass. This + * restriction is needed because various bits of the planner expect + * that each clause in a merge be associable with some pathkey in a + * canonical pathkey list, but redundant eclasses can't appear in + * canonical sort orderings. (XXX it might be worth relaxing this, + * but not enough time to address it for 8.3.) + * + * Note: it would be bad if this condition failed for an otherwise + * mergejoinable FULL JOIN clause, since that would result in + * undesirable planner failure. I believe that is not possible + * however; a variable involved in a full join could only appear in + * below_outer_join eclasses, which aren't considered redundant. + * + * This case *can* happen for left/right join clauses: the outer-side + * variable could be equated to a constant. Because we will propagate + * that constant across the join clause, the loss of ability to do a + * mergejoin is not really all that big a deal, and so it's not clear + * that improving this is important. + */ + update_mergeclause_eclasses(root, restrictinfo); + + if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) || + EC_MUST_BE_REDUNDANT(restrictinfo->right_ec)) + { + have_nonmergeable_joinclause = true; + continue; /* can't handle redundant eclasses */ + } + + result_list = lappend(result_list, restrictinfo); + } + + /* + * Report whether mergejoin is allowed (see comment at top of function). + */ + switch (jointype) + { + case JOIN_RIGHT: + case JOIN_FULL: + *mergejoin_allowed = !have_nonmergeable_joinclause; + break; + default: + *mergejoin_allowed = true; + break; + } + + return result_list; +} diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c new file mode 100644 index 0000000..e8c180c --- /dev/null +++ b/src/backend/optimizer/path/joinrels.c @@ -0,0 +1,1782 @@ +/*------------------------------------------------------------------------- + * + * joinrels.c + * Routines to determine which relations should be joined + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/path/joinrels.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "miscadmin.h" +#include "optimizer/appendinfo.h" +#include "optimizer/joininfo.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "partitioning/partbounds.h" +#include "utils/memutils.h" + + +static void make_rels_by_clause_joins(PlannerInfo *root, + RelOptInfo *old_rel, + List *other_rels_list, + ListCell *other_rels); +static void make_rels_by_clauseless_joins(PlannerInfo *root, + RelOptInfo *old_rel, + List *other_rels); +static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel); +static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel); +static bool restriction_is_constant_false(List *restrictlist, + RelOptInfo *joinrel, + bool only_pushed_down); +static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *sjinfo, List *restrictlist); +static void try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *parent_sjinfo, + List *parent_restrictlist); +static SpecialJoinInfo *build_child_join_sjinfo(PlannerInfo *root, + SpecialJoinInfo *parent_sjinfo, + Relids left_relids, Relids right_relids); +static void compute_partition_bounds(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *parent_sjinfo, + List **parts1, List **parts2); +static void get_matching_part_pairs(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *rel1, RelOptInfo *rel2, + List **parts1, List **parts2); + + +/* + * join_search_one_level + * Consider ways to produce join relations containing exactly 'level' + * jointree items. (This is one step of the dynamic-programming method + * embodied in standard_join_search.) Join rel nodes for each feasible + * combination of lower-level rels are created and returned in a list. + * Implementation paths are created for each such joinrel, too. + * + * level: level of rels we want to make this time + * root->join_rel_level[j], 1 <= j < level, is a list of rels containing j items + * + * The result is returned in root->join_rel_level[level]. + */ +void +join_search_one_level(PlannerInfo *root, int level) +{ + List **joinrels = root->join_rel_level; + ListCell *r; + int k; + + Assert(joinrels[level] == NIL); + + /* Set join_cur_level so that new joinrels are added to proper list */ + root->join_cur_level = level; + + /* + * First, consider left-sided and right-sided plans, in which rels of + * exactly level-1 member relations are joined against initial relations. + * We prefer to join using join clauses, but if we find a rel of level-1 + * members that has no join clauses, we will generate Cartesian-product + * joins against all initial rels not already contained in it. + */ + foreach(r, joinrels[level - 1]) + { + RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); + + if (old_rel->joininfo != NIL || old_rel->has_eclass_joins || + has_join_restriction(root, old_rel)) + { + /* + * There are join clauses or join order restrictions relevant to + * this rel, so consider joins between this rel and (only) those + * initial rels it is linked to by a clause or restriction. + * + * At level 2 this condition is symmetric, so there is no need to + * look at initial rels before this one in the list; we already + * considered such joins when we were at the earlier rel. (The + * mirror-image joins are handled automatically by make_join_rel.) + * In later passes (level > 2), we join rels of the previous level + * to each initial rel they don't already include but have a join + * clause or restriction with. + */ + List *other_rels_list; + ListCell *other_rels; + + if (level == 2) /* consider remaining initial rels */ + { + other_rels_list = joinrels[level - 1]; + other_rels = lnext(other_rels_list, r); + } + else /* consider all initial rels */ + { + other_rels_list = joinrels[1]; + other_rels = list_head(other_rels_list); + } + + make_rels_by_clause_joins(root, + old_rel, + other_rels_list, + other_rels); + } + else + { + /* + * Oops, we have a relation that is not joined to any other + * relation, either directly or by join-order restrictions. + * Cartesian product time. + * + * We consider a cartesian product with each not-already-included + * initial rel, whether it has other join clauses or not. At + * level 2, if there are two or more clauseless initial rels, we + * will redundantly consider joining them in both directions; but + * such cases aren't common enough to justify adding complexity to + * avoid the duplicated effort. + */ + make_rels_by_clauseless_joins(root, + old_rel, + joinrels[1]); + } + } + + /* + * Now, consider "bushy plans" in which relations of k initial rels are + * joined to relations of level-k initial rels, for 2 <= k <= level-2. + * + * We only consider bushy-plan joins for pairs of rels where there is a + * suitable join clause (or join order restriction), in order to avoid + * unreasonable growth of planning time. + */ + for (k = 2;; k++) + { + int other_level = level - k; + + /* + * Since make_join_rel(x, y) handles both x,y and y,x cases, we only + * need to go as far as the halfway point. + */ + if (k > other_level) + break; + + foreach(r, joinrels[k]) + { + RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); + List *other_rels_list; + ListCell *other_rels; + ListCell *r2; + + /* + * We can ignore relations without join clauses here, unless they + * participate in join-order restrictions --- then we might have + * to force a bushy join plan. + */ + if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins && + !has_join_restriction(root, old_rel)) + continue; + + if (k == other_level) + { + /* only consider remaining rels */ + other_rels_list = joinrels[k]; + other_rels = lnext(other_rels_list, r); + } + else + { + other_rels_list = joinrels[other_level]; + other_rels = list_head(other_rels_list); + } + + for_each_cell(r2, other_rels_list, other_rels) + { + RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2); + + if (!bms_overlap(old_rel->relids, new_rel->relids)) + { + /* + * OK, we can build a rel of the right level from this + * pair of rels. Do so if there is at least one relevant + * join clause or join order restriction. + */ + if (have_relevant_joinclause(root, old_rel, new_rel) || + have_join_order_restriction(root, old_rel, new_rel)) + { + (void) make_join_rel(root, old_rel, new_rel); + } + } + } + } + } + + /*---------- + * Last-ditch effort: if we failed to find any usable joins so far, force + * a set of cartesian-product joins to be generated. This handles the + * special case where all the available rels have join clauses but we + * cannot use any of those clauses yet. This can only happen when we are + * considering a join sub-problem (a sub-joinlist) and all the rels in the + * sub-problem have only join clauses with rels outside the sub-problem. + * An example is + * + * SELECT ... FROM a INNER JOIN b ON TRUE, c, d, ... + * WHERE a.w = c.x and b.y = d.z; + * + * If the "a INNER JOIN b" sub-problem does not get flattened into the + * upper level, we must be willing to make a cartesian join of a and b; + * but the code above will not have done so, because it thought that both + * a and b have joinclauses. We consider only left-sided and right-sided + * cartesian joins in this case (no bushy). + *---------- + */ + if (joinrels[level] == NIL) + { + /* + * This loop is just like the first one, except we always call + * make_rels_by_clauseless_joins(). + */ + foreach(r, joinrels[level - 1]) + { + RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); + + make_rels_by_clauseless_joins(root, + old_rel, + joinrels[1]); + } + + /*---------- + * When special joins are involved, there may be no legal way + * to make an N-way join for some values of N. For example consider + * + * SELECT ... FROM t1 WHERE + * x IN (SELECT ... FROM t2,t3 WHERE ...) AND + * y IN (SELECT ... FROM t4,t5 WHERE ...) + * + * We will flatten this query to a 5-way join problem, but there are + * no 4-way joins that join_is_legal() will consider legal. We have + * to accept failure at level 4 and go on to discover a workable + * bushy plan at level 5. + * + * However, if there are no special joins and no lateral references + * then join_is_legal() should never fail, and so the following sanity + * check is useful. + *---------- + */ + if (joinrels[level] == NIL && + root->join_info_list == NIL && + !root->hasLateralRTEs) + elog(ERROR, "failed to build any %d-way joins", level); + } +} + +/* + * make_rels_by_clause_joins + * Build joins between the given relation 'old_rel' and other relations + * that participate in join clauses that 'old_rel' also participates in + * (or participate in join-order restrictions with it). + * The join rels are returned in root->join_rel_level[join_cur_level]. + * + * Note: at levels above 2 we will generate the same joined relation in + * multiple ways --- for example (a join b) join c is the same RelOptInfo as + * (b join c) join a, though the second case will add a different set of Paths + * to it. This is the reason for using the join_rel_level mechanism, which + * automatically ensures that each new joinrel is only added to the list once. + * + * 'old_rel' is the relation entry for the relation to be joined + * 'other_rels_list': a list containing the other + * rels to be considered for joining + * 'other_rels': the first cell to be considered + * + * Currently, this is only used with initial rels in other_rels, but it + * will work for joining to joinrels too. + */ +static void +make_rels_by_clause_joins(PlannerInfo *root, + RelOptInfo *old_rel, + List *other_rels_list, + ListCell *other_rels) +{ + ListCell *l; + + for_each_cell(l, other_rels_list, other_rels) + { + RelOptInfo *other_rel = (RelOptInfo *) lfirst(l); + + if (!bms_overlap(old_rel->relids, other_rel->relids) && + (have_relevant_joinclause(root, old_rel, other_rel) || + have_join_order_restriction(root, old_rel, other_rel))) + { + (void) make_join_rel(root, old_rel, other_rel); + } + } +} + +/* + * make_rels_by_clauseless_joins + * Given a relation 'old_rel' and a list of other relations + * 'other_rels', create a join relation between 'old_rel' and each + * member of 'other_rels' that isn't already included in 'old_rel'. + * The join rels are returned in root->join_rel_level[join_cur_level]. + * + * 'old_rel' is the relation entry for the relation to be joined + * 'other_rels': a list containing the other rels to be considered for joining + * + * Currently, this is only used with initial rels in other_rels, but it would + * work for joining to joinrels too. + */ +static void +make_rels_by_clauseless_joins(PlannerInfo *root, + RelOptInfo *old_rel, + List *other_rels) +{ + ListCell *l; + + foreach(l, other_rels) + { + RelOptInfo *other_rel = (RelOptInfo *) lfirst(l); + + if (!bms_overlap(other_rel->relids, old_rel->relids)) + { + (void) make_join_rel(root, old_rel, other_rel); + } + } +} + + +/* + * join_is_legal + * Determine whether a proposed join is legal given the query's + * join order constraints; and if it is, determine the join type. + * + * Caller must supply not only the two rels, but the union of their relids. + * (We could simplify the API by computing joinrelids locally, but this + * would be redundant work in the normal path through make_join_rel.) + * + * On success, *sjinfo_p is set to NULL if this is to be a plain inner join, + * else it's set to point to the associated SpecialJoinInfo node. Also, + * *reversed_p is set true if the given relations need to be swapped to + * match the SpecialJoinInfo node. + */ +static bool +join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, + Relids joinrelids, + SpecialJoinInfo **sjinfo_p, bool *reversed_p) +{ + SpecialJoinInfo *match_sjinfo; + bool reversed; + bool unique_ified; + bool must_be_leftjoin; + ListCell *l; + + /* + * Ensure output params are set on failure return. This is just to + * suppress uninitialized-variable warnings from overly anal compilers. + */ + *sjinfo_p = NULL; + *reversed_p = false; + + /* + * If we have any special joins, the proposed join might be illegal; and + * in any case we have to determine its join type. Scan the join info + * list for matches and conflicts. + */ + match_sjinfo = NULL; + reversed = false; + unique_ified = false; + must_be_leftjoin = false; + + foreach(l, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); + + /* + * This special join is not relevant unless its RHS overlaps the + * proposed join. (Check this first as a fast path for dismissing + * most irrelevant SJs quickly.) + */ + if (!bms_overlap(sjinfo->min_righthand, joinrelids)) + continue; + + /* + * Also, not relevant if proposed join is fully contained within RHS + * (ie, we're still building up the RHS). + */ + if (bms_is_subset(joinrelids, sjinfo->min_righthand)) + continue; + + /* + * Also, not relevant if SJ is already done within either input. + */ + if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && + bms_is_subset(sjinfo->min_righthand, rel1->relids)) + continue; + if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) && + bms_is_subset(sjinfo->min_righthand, rel2->relids)) + continue; + + /* + * If it's a semijoin and we already joined the RHS to any other rels + * within either input, then we must have unique-ified the RHS at that + * point (see below). Therefore the semijoin is no longer relevant in + * this join path. + */ + if (sjinfo->jointype == JOIN_SEMI) + { + if (bms_is_subset(sjinfo->syn_righthand, rel1->relids) && + !bms_equal(sjinfo->syn_righthand, rel1->relids)) + continue; + if (bms_is_subset(sjinfo->syn_righthand, rel2->relids) && + !bms_equal(sjinfo->syn_righthand, rel2->relids)) + continue; + } + + /* + * If one input contains min_lefthand and the other contains + * min_righthand, then we can perform the SJ at this join. + * + * Reject if we get matches to more than one SJ; that implies we're + * considering something that's not really valid. + */ + if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && + bms_is_subset(sjinfo->min_righthand, rel2->relids)) + { + if (match_sjinfo) + return false; /* invalid join path */ + match_sjinfo = sjinfo; + reversed = false; + } + else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) && + bms_is_subset(sjinfo->min_righthand, rel1->relids)) + { + if (match_sjinfo) + return false; /* invalid join path */ + match_sjinfo = sjinfo; + reversed = true; + } + else if (sjinfo->jointype == JOIN_SEMI && + bms_equal(sjinfo->syn_righthand, rel2->relids) && + create_unique_path(root, rel2, rel2->cheapest_total_path, + sjinfo) != NULL) + { + /*---------- + * For a semijoin, we can join the RHS to anything else by + * unique-ifying the RHS (if the RHS can be unique-ified). + * We will only get here if we have the full RHS but less + * than min_lefthand on the LHS. + * + * The reason to consider such a join path is exemplified by + * SELECT ... FROM a,b WHERE (a.x,b.y) IN (SELECT c1,c2 FROM c) + * If we insist on doing this as a semijoin we will first have + * to form the cartesian product of A*B. But if we unique-ify + * C then the semijoin becomes a plain innerjoin and we can join + * in any order, eg C to A and then to B. When C is much smaller + * than A and B this can be a huge win. So we allow C to be + * joined to just A or just B here, and then make_join_rel has + * to handle the case properly. + * + * Note that actually we'll allow unique-ified C to be joined to + * some other relation D here, too. That is legal, if usually not + * very sane, and this routine is only concerned with legality not + * with whether the join is good strategy. + *---------- + */ + if (match_sjinfo) + return false; /* invalid join path */ + match_sjinfo = sjinfo; + reversed = false; + unique_ified = true; + } + else if (sjinfo->jointype == JOIN_SEMI && + bms_equal(sjinfo->syn_righthand, rel1->relids) && + create_unique_path(root, rel1, rel1->cheapest_total_path, + sjinfo) != NULL) + { + /* Reversed semijoin case */ + if (match_sjinfo) + return false; /* invalid join path */ + match_sjinfo = sjinfo; + reversed = true; + unique_ified = true; + } + else + { + /* + * Otherwise, the proposed join overlaps the RHS but isn't a valid + * implementation of this SJ. But don't panic quite yet: the RHS + * violation might have occurred previously, in one or both input + * relations, in which case we must have previously decided that + * it was OK to commute some other SJ with this one. If we need + * to perform this join to finish building up the RHS, rejecting + * it could lead to not finding any plan at all. (This can occur + * because of the heuristics elsewhere in this file that postpone + * clauseless joins: we might not consider doing a clauseless join + * within the RHS until after we've performed other, validly + * commutable SJs with one or both sides of the clauseless join.) + * This consideration boils down to the rule that if both inputs + * overlap the RHS, we can allow the join --- they are either + * fully within the RHS, or represent previously-allowed joins to + * rels outside it. + */ + if (bms_overlap(rel1->relids, sjinfo->min_righthand) && + bms_overlap(rel2->relids, sjinfo->min_righthand)) + continue; /* assume valid previous violation of RHS */ + + /* + * The proposed join could still be legal, but only if we're + * allowed to associate it into the RHS of this SJ. That means + * this SJ must be a LEFT join (not SEMI or ANTI, and certainly + * not FULL) and the proposed join must not overlap the LHS. + */ + if (sjinfo->jointype != JOIN_LEFT || + bms_overlap(joinrelids, sjinfo->min_lefthand)) + return false; /* invalid join path */ + + /* + * To be valid, the proposed join must be a LEFT join; otherwise + * it can't associate into this SJ's RHS. But we may not yet have + * found the SpecialJoinInfo matching the proposed join, so we + * can't test that yet. Remember the requirement for later. + */ + must_be_leftjoin = true; + } + } + + /* + * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the + * proposed join can't associate into an SJ's RHS. + * + * Also, fail if the proposed join's predicate isn't strict; we're + * essentially checking to see if we can apply outer-join identity 3, and + * that's a requirement. (This check may be redundant with checks in + * make_outerjoininfo, but I'm not quite sure, and it's cheap to test.) + */ + if (must_be_leftjoin && + (match_sjinfo == NULL || + match_sjinfo->jointype != JOIN_LEFT || + !match_sjinfo->lhs_strict)) + return false; /* invalid join path */ + + /* + * We also have to check for constraints imposed by LATERAL references. + */ + if (root->hasLateralRTEs) + { + bool lateral_fwd; + bool lateral_rev; + Relids join_lateral_rels; + + /* + * The proposed rels could each contain lateral references to the + * other, in which case the join is impossible. If there are lateral + * references in just one direction, then the join has to be done with + * a nestloop with the lateral referencer on the inside. If the join + * matches an SJ that cannot be implemented by such a nestloop, the + * join is impossible. + * + * Also, if the lateral reference is only indirect, we should reject + * the join; whatever rel(s) the reference chain goes through must be + * joined to first. + * + * Another case that might keep us from building a valid plan is the + * implementation restriction described by have_dangerous_phv(). + */ + lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids); + lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids); + if (lateral_fwd && lateral_rev) + return false; /* have lateral refs in both directions */ + if (lateral_fwd) + { + /* has to be implemented as nestloop with rel1 on left */ + if (match_sjinfo && + (reversed || + unique_ified || + match_sjinfo->jointype == JOIN_FULL)) + return false; /* not implementable as nestloop */ + /* check there is a direct reference from rel2 to rel1 */ + if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids)) + return false; /* only indirect refs, so reject */ + /* check we won't have a dangerous PHV */ + if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids)) + return false; /* might be unable to handle required PHV */ + } + else if (lateral_rev) + { + /* has to be implemented as nestloop with rel2 on left */ + if (match_sjinfo && + (!reversed || + unique_ified || + match_sjinfo->jointype == JOIN_FULL)) + return false; /* not implementable as nestloop */ + /* check there is a direct reference from rel1 to rel2 */ + if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids)) + return false; /* only indirect refs, so reject */ + /* check we won't have a dangerous PHV */ + if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids)) + return false; /* might be unable to handle required PHV */ + } + + /* + * LATERAL references could also cause problems later on if we accept + * this join: if the join's minimum parameterization includes any rels + * that would have to be on the inside of an outer join with this join + * rel, then it's never going to be possible to build the complete + * query using this join. We should reject this join not only because + * it'll save work, but because if we don't, the clauseless-join + * heuristics might think that legality of this join means that some + * other join rel need not be formed, and that could lead to failure + * to find any plan at all. We have to consider not only rels that + * are directly on the inner side of an OJ with the joinrel, but also + * ones that are indirectly so, so search to find all such rels. + */ + join_lateral_rels = min_join_parameterization(root, joinrelids, + rel1, rel2); + if (join_lateral_rels) + { + Relids join_plus_rhs = bms_copy(joinrelids); + bool more; + + do + { + more = false; + foreach(l, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); + + /* ignore full joins --- their ordering is predetermined */ + if (sjinfo->jointype == JOIN_FULL) + continue; + + if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) && + !bms_is_subset(sjinfo->min_righthand, join_plus_rhs)) + { + join_plus_rhs = bms_add_members(join_plus_rhs, + sjinfo->min_righthand); + more = true; + } + } + } while (more); + if (bms_overlap(join_plus_rhs, join_lateral_rels)) + return false; /* will not be able to join to some RHS rel */ + } + } + + /* Otherwise, it's a valid join */ + *sjinfo_p = match_sjinfo; + *reversed_p = reversed; + return true; +} + + +/* + * make_join_rel + * Find or create a join RelOptInfo that represents the join of + * the two given rels, and add to it path information for paths + * created with the two rels as outer and inner rel. + * (The join rel may already contain paths generated from other + * pairs of rels that add up to the same set of base rels.) + * + * NB: will return NULL if attempted join is not valid. This can happen + * when working with outer joins, or with IN or EXISTS clauses that have been + * turned into joins. + */ +RelOptInfo * +make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) +{ + Relids joinrelids; + SpecialJoinInfo *sjinfo; + bool reversed; + SpecialJoinInfo sjinfo_data; + RelOptInfo *joinrel; + List *restrictlist; + + /* We should never try to join two overlapping sets of rels. */ + Assert(!bms_overlap(rel1->relids, rel2->relids)); + + /* Construct Relids set that identifies the joinrel. */ + joinrelids = bms_union(rel1->relids, rel2->relids); + + /* Check validity and determine join type. */ + if (!join_is_legal(root, rel1, rel2, joinrelids, + &sjinfo, &reversed)) + { + /* invalid join path */ + bms_free(joinrelids); + return NULL; + } + + /* Swap rels if needed to match the join info. */ + if (reversed) + { + RelOptInfo *trel = rel1; + + rel1 = rel2; + rel2 = trel; + } + + /* + * If it's a plain inner join, then we won't have found anything in + * join_info_list. Make up a SpecialJoinInfo so that selectivity + * estimation functions will know what's being joined. + */ + if (sjinfo == NULL) + { + sjinfo = &sjinfo_data; + sjinfo->type = T_SpecialJoinInfo; + sjinfo->min_lefthand = rel1->relids; + sjinfo->min_righthand = rel2->relids; + sjinfo->syn_lefthand = rel1->relids; + sjinfo->syn_righthand = rel2->relids; + sjinfo->jointype = JOIN_INNER; + /* we don't bother trying to make the remaining fields valid */ + sjinfo->lhs_strict = false; + sjinfo->delay_upper_joins = false; + sjinfo->semi_can_btree = false; + sjinfo->semi_can_hash = false; + sjinfo->semi_operators = NIL; + sjinfo->semi_rhs_exprs = NIL; + } + + /* + * Find or build the join RelOptInfo, and compute the restrictlist that + * goes with this particular joining. + */ + joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo, + &restrictlist); + + /* + * If we've already proven this join is empty, we needn't consider any + * more paths for it. + */ + if (is_dummy_rel(joinrel)) + { + bms_free(joinrelids); + return joinrel; + } + + /* Add paths to the join relation. */ + populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo, + restrictlist); + + bms_free(joinrelids); + + return joinrel; +} + +/* + * populate_joinrel_with_paths + * Add paths to the given joinrel for given pair of joining relations. The + * SpecialJoinInfo provides details about the join and the restrictlist + * contains the join clauses and the other clauses applicable for given pair + * of the joining relations. + */ +static void +populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *sjinfo, List *restrictlist) +{ + /* + * Consider paths using each rel as both outer and inner. Depending on + * the join type, a provably empty outer or inner rel might mean the join + * is provably empty too; in which case throw away any previously computed + * paths and mark the join as dummy. (We do it this way since it's + * conceivable that dummy-ness of a multi-element join might only be + * noticeable for certain construction paths.) + * + * Also, a provably constant-false join restriction typically means that + * we can skip evaluating one or both sides of the join. We do this by + * marking the appropriate rel as dummy. For outer joins, a + * constant-false restriction that is pushed down still means the whole + * join is dummy, while a non-pushed-down one means that no inner rows + * will join so we can treat the inner rel as dummy. + * + * We need only consider the jointypes that appear in join_info_list, plus + * JOIN_INNER. + */ + switch (sjinfo->jointype) + { + case JOIN_INNER: + if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || + restriction_is_constant_false(restrictlist, joinrel, false)) + { + mark_dummy_rel(joinrel); + break; + } + add_paths_to_joinrel(root, joinrel, rel1, rel2, + JOIN_INNER, sjinfo, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, + JOIN_INNER, sjinfo, + restrictlist); + break; + case JOIN_LEFT: + if (is_dummy_rel(rel1) || + restriction_is_constant_false(restrictlist, joinrel, true)) + { + mark_dummy_rel(joinrel); + break; + } + if (restriction_is_constant_false(restrictlist, joinrel, false) && + bms_is_subset(rel2->relids, sjinfo->syn_righthand)) + mark_dummy_rel(rel2); + add_paths_to_joinrel(root, joinrel, rel1, rel2, + JOIN_LEFT, sjinfo, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, + JOIN_RIGHT, sjinfo, + restrictlist); + break; + case JOIN_FULL: + if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) || + restriction_is_constant_false(restrictlist, joinrel, true)) + { + mark_dummy_rel(joinrel); + break; + } + add_paths_to_joinrel(root, joinrel, rel1, rel2, + JOIN_FULL, sjinfo, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, + JOIN_FULL, sjinfo, + restrictlist); + + /* + * If there are join quals that aren't mergeable or hashable, we + * may not be able to build any valid plan. Complain here so that + * we can give a somewhat-useful error message. (Since we have no + * flexibility of planning for a full join, there's no chance of + * succeeding later with another pair of input rels.) + */ + if (joinrel->pathlist == NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions"))); + break; + case JOIN_SEMI: + + /* + * We might have a normal semijoin, or a case where we don't have + * enough rels to do the semijoin but can unique-ify the RHS and + * then do an innerjoin (see comments in join_is_legal). In the + * latter case we can't apply JOIN_SEMI joining. + */ + if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && + bms_is_subset(sjinfo->min_righthand, rel2->relids)) + { + if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || + restriction_is_constant_false(restrictlist, joinrel, false)) + { + mark_dummy_rel(joinrel); + break; + } + add_paths_to_joinrel(root, joinrel, rel1, rel2, + JOIN_SEMI, sjinfo, + restrictlist); + } + + /* + * If we know how to unique-ify the RHS and one input rel is + * exactly the RHS (not a superset) we can consider unique-ifying + * it and then doing a regular join. (The create_unique_path + * check here is probably redundant with what join_is_legal did, + * but if so the check is cheap because it's cached. So test + * anyway to be sure.) + */ + if (bms_equal(sjinfo->syn_righthand, rel2->relids) && + create_unique_path(root, rel2, rel2->cheapest_total_path, + sjinfo) != NULL) + { + if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || + restriction_is_constant_false(restrictlist, joinrel, false)) + { + mark_dummy_rel(joinrel); + break; + } + add_paths_to_joinrel(root, joinrel, rel1, rel2, + JOIN_UNIQUE_INNER, sjinfo, + restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, + JOIN_UNIQUE_OUTER, sjinfo, + restrictlist); + } + break; + case JOIN_ANTI: + if (is_dummy_rel(rel1) || + restriction_is_constant_false(restrictlist, joinrel, true)) + { + mark_dummy_rel(joinrel); + break; + } + if (restriction_is_constant_false(restrictlist, joinrel, false) && + bms_is_subset(rel2->relids, sjinfo->syn_righthand)) + mark_dummy_rel(rel2); + add_paths_to_joinrel(root, joinrel, rel1, rel2, + JOIN_ANTI, sjinfo, + restrictlist); + break; + default: + /* other values not expected here */ + elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype); + break; + } + + /* Apply partitionwise join technique, if possible. */ + try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist); +} + + +/* + * have_join_order_restriction + * Detect whether the two relations should be joined to satisfy + * a join-order restriction arising from special or lateral joins. + * + * In practice this is always used with have_relevant_joinclause(), and so + * could be merged with that function, but it seems clearer to separate the + * two concerns. We need this test because there are degenerate cases where + * a clauseless join must be performed to satisfy join-order restrictions. + * Also, if one rel has a lateral reference to the other, or both are needed + * to compute some PHV, we should consider joining them even if the join would + * be clauseless. + * + * Note: this is only a problem if one side of a degenerate outer join + * contains multiple rels, or a clauseless join is required within an + * IN/EXISTS RHS; else we will find a join path via the "last ditch" case in + * join_search_one_level(). We could dispense with this test if we were + * willing to try bushy plans in the "last ditch" case, but that seems much + * less efficient. + */ +bool +have_join_order_restriction(PlannerInfo *root, + RelOptInfo *rel1, RelOptInfo *rel2) +{ + bool result = false; + ListCell *l; + + /* + * If either side has a direct lateral reference to the other, attempt the + * join regardless of outer-join considerations. + */ + if (bms_overlap(rel1->relids, rel2->direct_lateral_relids) || + bms_overlap(rel2->relids, rel1->direct_lateral_relids)) + return true; + + /* + * Likewise, if both rels are needed to compute some PlaceHolderVar, + * attempt the join regardless of outer-join considerations. (This is not + * very desirable, because a PHV with a large eval_at set will cause a lot + * of probably-useless joins to be considered, but failing to do this can + * cause us to fail to construct a plan at all.) + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + if (bms_is_subset(rel1->relids, phinfo->ph_eval_at) && + bms_is_subset(rel2->relids, phinfo->ph_eval_at)) + return true; + } + + /* + * It's possible that the rels correspond to the left and right sides of a + * degenerate outer join, that is, one with no joinclause mentioning the + * non-nullable side; in which case we should force the join to occur. + * + * Also, the two rels could represent a clauseless join that has to be + * completed to build up the LHS or RHS of an outer join. + */ + foreach(l, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); + + /* ignore full joins --- other mechanisms handle them */ + if (sjinfo->jointype == JOIN_FULL) + continue; + + /* Can we perform the SJ with these rels? */ + if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && + bms_is_subset(sjinfo->min_righthand, rel2->relids)) + { + result = true; + break; + } + if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) && + bms_is_subset(sjinfo->min_righthand, rel1->relids)) + { + result = true; + break; + } + + /* + * Might we need to join these rels to complete the RHS? We have to + * use "overlap" tests since either rel might include a lower SJ that + * has been proven to commute with this one. + */ + if (bms_overlap(sjinfo->min_righthand, rel1->relids) && + bms_overlap(sjinfo->min_righthand, rel2->relids)) + { + result = true; + break; + } + + /* Likewise for the LHS. */ + if (bms_overlap(sjinfo->min_lefthand, rel1->relids) && + bms_overlap(sjinfo->min_lefthand, rel2->relids)) + { + result = true; + break; + } + } + + /* + * We do not force the join to occur if either input rel can legally be + * joined to anything else using joinclauses. This essentially means that + * clauseless bushy joins are put off as long as possible. The reason is + * that when there is a join order restriction high up in the join tree + * (that is, with many rels inside the LHS or RHS), we would otherwise + * expend lots of effort considering very stupid join combinations within + * its LHS or RHS. + */ + if (result) + { + if (has_legal_joinclause(root, rel1) || + has_legal_joinclause(root, rel2)) + result = false; + } + + return result; +} + + +/* + * has_join_restriction + * Detect whether the specified relation has join-order restrictions, + * due to being inside an outer join or an IN (sub-SELECT), + * or participating in any LATERAL references or multi-rel PHVs. + * + * Essentially, this tests whether have_join_order_restriction() could + * succeed with this rel and some other one. It's OK if we sometimes + * say "true" incorrectly. (Therefore, we don't bother with the relatively + * expensive has_legal_joinclause test.) + */ +static bool +has_join_restriction(PlannerInfo *root, RelOptInfo *rel) +{ + ListCell *l; + + if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL) + return true; + + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + if (bms_is_subset(rel->relids, phinfo->ph_eval_at) && + !bms_equal(rel->relids, phinfo->ph_eval_at)) + return true; + } + + foreach(l, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l); + + /* ignore full joins --- other mechanisms preserve their ordering */ + if (sjinfo->jointype == JOIN_FULL) + continue; + + /* ignore if SJ is already contained in rel */ + if (bms_is_subset(sjinfo->min_lefthand, rel->relids) && + bms_is_subset(sjinfo->min_righthand, rel->relids)) + continue; + + /* restricted if it overlaps LHS or RHS, but doesn't contain SJ */ + if (bms_overlap(sjinfo->min_lefthand, rel->relids) || + bms_overlap(sjinfo->min_righthand, rel->relids)) + return true; + } + + return false; +} + + +/* + * has_legal_joinclause + * Detect whether the specified relation can legally be joined + * to any other rels using join clauses. + * + * We consider only joins to single other relations in the current + * initial_rels list. This is sufficient to get a "true" result in most real + * queries, and an occasional erroneous "false" will only cost a bit more + * planning time. The reason for this limitation is that considering joins to + * other joins would require proving that the other join rel can legally be + * formed, which seems like too much trouble for something that's only a + * heuristic to save planning time. (Note: we must look at initial_rels + * and not all of the query, since when we are planning a sub-joinlist we + * may be forced to make clauseless joins within initial_rels even though + * there are join clauses linking to other parts of the query.) + */ +static bool +has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel) +{ + ListCell *lc; + + foreach(lc, root->initial_rels) + { + RelOptInfo *rel2 = (RelOptInfo *) lfirst(lc); + + /* ignore rels that are already in "rel" */ + if (bms_overlap(rel->relids, rel2->relids)) + continue; + + if (have_relevant_joinclause(root, rel, rel2)) + { + Relids joinrelids; + SpecialJoinInfo *sjinfo; + bool reversed; + + /* join_is_legal needs relids of the union */ + joinrelids = bms_union(rel->relids, rel2->relids); + + if (join_is_legal(root, rel, rel2, joinrelids, + &sjinfo, &reversed)) + { + /* Yes, this will work */ + bms_free(joinrelids); + return true; + } + + bms_free(joinrelids); + } + } + + return false; +} + + +/* + * There's a pitfall for creating parameterized nestloops: suppose the inner + * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's + * minimum eval_at set includes the outer rel (B) and some third rel (C). + * We might think we could create a B/A nestloop join that's parameterized by + * C. But we would end up with a plan in which the PHV's expression has to be + * evaluated as a nestloop parameter at the B/A join; and the executor is only + * set up to handle simple Vars as NestLoopParams. Rather than add complexity + * and overhead to the executor for such corner cases, it seems better to + * forbid the join. (Note that we can still make use of A's parameterized + * path with pre-joined B+C as the outer rel. have_join_order_restriction() + * ensures that we will consider making such a join even if there are not + * other reasons to do so.) + * + * So we check whether any PHVs used in the query could pose such a hazard. + * We don't have any simple way of checking whether a risky PHV would actually + * be used in the inner plan, and the case is so unusual that it doesn't seem + * worth working very hard on it. + * + * This needs to be checked in two places. If the inner rel's minimum + * parameterization would trigger the restriction, then join_is_legal() should + * reject the join altogether, because there will be no workable paths for it. + * But joinpath.c has to check again for every proposed nestloop path, because + * the inner path might have more than the minimum parameterization, causing + * some PHV to be dangerous for it that otherwise wouldn't be. + */ +bool +have_dangerous_phv(PlannerInfo *root, + Relids outer_relids, Relids inner_params) +{ + ListCell *lc; + + foreach(lc, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); + + if (!bms_is_subset(phinfo->ph_eval_at, inner_params)) + continue; /* ignore, could not be a nestloop param */ + if (!bms_overlap(phinfo->ph_eval_at, outer_relids)) + continue; /* ignore, not relevant to this join */ + if (bms_is_subset(phinfo->ph_eval_at, outer_relids)) + continue; /* safe, it can be eval'd within outerrel */ + /* Otherwise, it's potentially unsafe, so reject the join */ + return true; + } + + /* OK to perform the join */ + return false; +} + + +/* + * is_dummy_rel --- has relation been proven empty? + */ +bool +is_dummy_rel(RelOptInfo *rel) +{ + Path *path; + + /* + * A rel that is known dummy will have just one path that is a childless + * Append. (Even if somehow it has more paths, a childless Append will + * have cost zero and hence should be at the front of the pathlist.) + */ + if (rel->pathlist == NIL) + return false; + path = (Path *) linitial(rel->pathlist); + + /* + * Initially, a dummy path will just be a childless Append. But in later + * planning stages we might stick a ProjectSetPath and/or ProjectionPath + * on top, since Append can't project. Rather than make assumptions about + * which combinations can occur, just descend through whatever we find. + */ + for (;;) + { + if (IsA(path, ProjectionPath)) + path = ((ProjectionPath *) path)->subpath; + else if (IsA(path, ProjectSetPath)) + path = ((ProjectSetPath *) path)->subpath; + else + break; + } + if (IS_DUMMY_APPEND(path)) + return true; + return false; +} + +/* + * Mark a relation as proven empty. + * + * During GEQO planning, this can get invoked more than once on the same + * baserel struct, so it's worth checking to see if the rel is already marked + * dummy. + * + * Also, when called during GEQO join planning, we are in a short-lived + * memory context. We must make sure that the dummy path attached to a + * baserel survives the GEQO cycle, else the baserel is trashed for future + * GEQO cycles. On the other hand, when we are marking a joinrel during GEQO, + * we don't want the dummy path to clutter the main planning context. Upshot + * is that the best solution is to explicitly make the dummy path in the same + * context the given RelOptInfo is in. + */ +void +mark_dummy_rel(RelOptInfo *rel) +{ + MemoryContext oldcontext; + + /* Already marked? */ + if (is_dummy_rel(rel)) + return; + + /* No, so choose correct context to make the dummy path in */ + oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); + + /* Set dummy size estimate */ + rel->rows = 0; + + /* Evict any previously chosen paths */ + rel->pathlist = NIL; + rel->partial_pathlist = NIL; + + /* Set up the dummy path */ + add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, + NIL, rel->lateral_relids, + 0, false, -1)); + + /* Set or update cheapest_total_path and related fields */ + set_cheapest(rel); + + MemoryContextSwitchTo(oldcontext); +} + + +/* + * restriction_is_constant_false --- is a restrictlist just FALSE? + * + * In cases where a qual is provably constant FALSE, eval_const_expressions + * will generally have thrown away anything that's ANDed with it. In outer + * join situations this will leave us computing cartesian products only to + * decide there's no match for an outer row, which is pretty stupid. So, + * we need to detect the case. + * + * If only_pushed_down is true, then consider only quals that are pushed-down + * from the point of view of the joinrel. + */ +static bool +restriction_is_constant_false(List *restrictlist, + RelOptInfo *joinrel, + bool only_pushed_down) +{ + ListCell *lc; + + /* + * Despite the above comment, the restriction list we see here might + * possibly have other members besides the FALSE constant, since other + * quals could get "pushed down" to the outer join level. So we check + * each member of the list. + */ + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + if (only_pushed_down && !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) + continue; + + if (rinfo->clause && IsA(rinfo->clause, Const)) + { + Const *con = (Const *) rinfo->clause; + + /* constant NULL is as good as constant FALSE for our purposes */ + if (con->constisnull) + return true; + if (!DatumGetBool(con->constvalue)) + return true; + } + } + return false; +} + +/* + * Assess whether join between given two partitioned relations can be broken + * down into joins between matching partitions; a technique called + * "partitionwise join" + * + * Partitionwise join is possible when a. Joining relations have same + * partitioning scheme b. There exists an equi-join between the partition keys + * of the two relations. + * + * Partitionwise join is planned as follows (details: optimizer/README.) + * + * 1. Create the RelOptInfos for joins between matching partitions i.e + * child-joins and add paths to them. + * + * 2. Construct Append or MergeAppend paths across the set of child joins. + * This second phase is implemented by generate_partitionwise_join_paths(). + * + * The RelOptInfo, SpecialJoinInfo and restrictlist for each child join are + * obtained by translating the respective parent join structures. + */ +static void +try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, + RelOptInfo *joinrel, SpecialJoinInfo *parent_sjinfo, + List *parent_restrictlist) +{ + bool rel1_is_simple = IS_SIMPLE_REL(rel1); + bool rel2_is_simple = IS_SIMPLE_REL(rel2); + List *parts1 = NIL; + List *parts2 = NIL; + ListCell *lcr1 = NULL; + ListCell *lcr2 = NULL; + int cnt_parts; + + /* Guard against stack overflow due to overly deep partition hierarchy. */ + check_stack_depth(); + + /* Nothing to do, if the join relation is not partitioned. */ + if (joinrel->part_scheme == NULL || joinrel->nparts == 0) + return; + + /* The join relation should have consider_partitionwise_join set. */ + Assert(joinrel->consider_partitionwise_join); + + /* + * We can not perform partitionwise join if either of the joining + * relations is not partitioned. + */ + if (!IS_PARTITIONED_REL(rel1) || !IS_PARTITIONED_REL(rel2)) + return; + + Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2)); + + /* The joining relations should have consider_partitionwise_join set. */ + Assert(rel1->consider_partitionwise_join && + rel2->consider_partitionwise_join); + + /* + * The partition scheme of the join relation should match that of the + * joining relations. + */ + Assert(joinrel->part_scheme == rel1->part_scheme && + joinrel->part_scheme == rel2->part_scheme); + + Assert(!(joinrel->partbounds_merged && (joinrel->nparts <= 0))); + + compute_partition_bounds(root, rel1, rel2, joinrel, parent_sjinfo, + &parts1, &parts2); + + if (joinrel->partbounds_merged) + { + lcr1 = list_head(parts1); + lcr2 = list_head(parts2); + } + + /* + * Create child-join relations for this partitioned join, if those don't + * exist. Add paths to child-joins for a pair of child relations + * corresponding to the given pair of parent relations. + */ + for (cnt_parts = 0; cnt_parts < joinrel->nparts; cnt_parts++) + { + RelOptInfo *child_rel1; + RelOptInfo *child_rel2; + bool rel1_empty; + bool rel2_empty; + SpecialJoinInfo *child_sjinfo; + List *child_restrictlist; + RelOptInfo *child_joinrel; + Relids child_joinrelids; + AppendRelInfo **appinfos; + int nappinfos; + + if (joinrel->partbounds_merged) + { + child_rel1 = lfirst_node(RelOptInfo, lcr1); + child_rel2 = lfirst_node(RelOptInfo, lcr2); + lcr1 = lnext(parts1, lcr1); + lcr2 = lnext(parts2, lcr2); + } + else + { + child_rel1 = rel1->part_rels[cnt_parts]; + child_rel2 = rel2->part_rels[cnt_parts]; + } + + rel1_empty = (child_rel1 == NULL || IS_DUMMY_REL(child_rel1)); + rel2_empty = (child_rel2 == NULL || IS_DUMMY_REL(child_rel2)); + + /* + * Check for cases where we can prove that this segment of the join + * returns no rows, due to one or both inputs being empty (including + * inputs that have been pruned away entirely). If so just ignore it. + * These rules are equivalent to populate_joinrel_with_paths's rules + * for dummy input relations. + */ + switch (parent_sjinfo->jointype) + { + case JOIN_INNER: + case JOIN_SEMI: + if (rel1_empty || rel2_empty) + continue; /* ignore this join segment */ + break; + case JOIN_LEFT: + case JOIN_ANTI: + if (rel1_empty) + continue; /* ignore this join segment */ + break; + case JOIN_FULL: + if (rel1_empty && rel2_empty) + continue; /* ignore this join segment */ + break; + default: + /* other values not expected here */ + elog(ERROR, "unrecognized join type: %d", + (int) parent_sjinfo->jointype); + break; + } + + /* + * If a child has been pruned entirely then we can't generate paths + * for it, so we have to reject partitionwise joining unless we were + * able to eliminate this partition above. + */ + if (child_rel1 == NULL || child_rel2 == NULL) + { + /* + * Mark the joinrel as unpartitioned so that later functions treat + * it correctly. + */ + joinrel->nparts = 0; + return; + } + + /* + * If a leaf relation has consider_partitionwise_join=false, it means + * that it's a dummy relation for which we skipped setting up tlist + * expressions and adding EC members in set_append_rel_size(), so + * again we have to fail here. + */ + if (rel1_is_simple && !child_rel1->consider_partitionwise_join) + { + Assert(child_rel1->reloptkind == RELOPT_OTHER_MEMBER_REL); + Assert(IS_DUMMY_REL(child_rel1)); + joinrel->nparts = 0; + return; + } + if (rel2_is_simple && !child_rel2->consider_partitionwise_join) + { + Assert(child_rel2->reloptkind == RELOPT_OTHER_MEMBER_REL); + Assert(IS_DUMMY_REL(child_rel2)); + joinrel->nparts = 0; + return; + } + + /* We should never try to join two overlapping sets of rels. */ + Assert(!bms_overlap(child_rel1->relids, child_rel2->relids)); + child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids); + appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos); + + /* + * Construct SpecialJoinInfo from parent join relations's + * SpecialJoinInfo. + */ + child_sjinfo = build_child_join_sjinfo(root, parent_sjinfo, + child_rel1->relids, + child_rel2->relids); + + /* + * Construct restrictions applicable to the child join from those + * applicable to the parent join. + */ + child_restrictlist = + (List *) adjust_appendrel_attrs(root, + (Node *) parent_restrictlist, + nappinfos, appinfos); + pfree(appinfos); + + child_joinrel = joinrel->part_rels[cnt_parts]; + if (!child_joinrel) + { + child_joinrel = build_child_join_rel(root, child_rel1, child_rel2, + joinrel, child_restrictlist, + child_sjinfo, + child_sjinfo->jointype); + joinrel->part_rels[cnt_parts] = child_joinrel; + joinrel->all_partrels = bms_add_members(joinrel->all_partrels, + child_joinrel->relids); + } + + Assert(bms_equal(child_joinrel->relids, child_joinrelids)); + + populate_joinrel_with_paths(root, child_rel1, child_rel2, + child_joinrel, child_sjinfo, + child_restrictlist); + } +} + +/* + * Construct the SpecialJoinInfo for a child-join by translating + * SpecialJoinInfo for the join between parents. left_relids and right_relids + * are the relids of left and right side of the join respectively. + */ +static SpecialJoinInfo * +build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo, + Relids left_relids, Relids right_relids) +{ + SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo); + AppendRelInfo **left_appinfos; + int left_nappinfos; + AppendRelInfo **right_appinfos; + int right_nappinfos; + + memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo)); + left_appinfos = find_appinfos_by_relids(root, left_relids, + &left_nappinfos); + right_appinfos = find_appinfos_by_relids(root, right_relids, + &right_nappinfos); + + sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand, + left_nappinfos, left_appinfos); + sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand, + right_nappinfos, + right_appinfos); + sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand, + left_nappinfos, left_appinfos); + sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand, + right_nappinfos, + right_appinfos); + sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root, + (Node *) sjinfo->semi_rhs_exprs, + right_nappinfos, + right_appinfos); + + pfree(left_appinfos); + pfree(right_appinfos); + + return sjinfo; +} + +/* + * compute_partition_bounds + * Compute the partition bounds for a join rel from those for inputs + */ +static void +compute_partition_bounds(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *parent_sjinfo, + List **parts1, List **parts2) +{ + /* + * If we don't have the partition bounds for the join rel yet, try to + * compute those along with pairs of partitions to be joined. + */ + if (joinrel->nparts == -1) + { + PartitionScheme part_scheme = joinrel->part_scheme; + PartitionBoundInfo boundinfo = NULL; + int nparts = 0; + + Assert(joinrel->boundinfo == NULL); + Assert(joinrel->part_rels == NULL); + + /* + * See if the partition bounds for inputs are exactly the same, in + * which case we don't need to work hard: the join rel will have the + * same partition bounds as inputs, and the partitions with the same + * cardinal positions will form the pairs. + * + * Note: even in cases where one or both inputs have merged bounds, it + * would be possible for both the bounds to be exactly the same, but + * it seems unlikely to be worth the cycles to check. + */ + if (!rel1->partbounds_merged && + !rel2->partbounds_merged && + rel1->nparts == rel2->nparts && + partition_bounds_equal(part_scheme->partnatts, + part_scheme->parttyplen, + part_scheme->parttypbyval, + rel1->boundinfo, rel2->boundinfo)) + { + boundinfo = rel1->boundinfo; + nparts = rel1->nparts; + } + else + { + /* Try merging the partition bounds for inputs. */ + boundinfo = partition_bounds_merge(part_scheme->partnatts, + part_scheme->partsupfunc, + part_scheme->partcollation, + rel1, rel2, + parent_sjinfo->jointype, + parts1, parts2); + if (boundinfo == NULL) + { + joinrel->nparts = 0; + return; + } + nparts = list_length(*parts1); + joinrel->partbounds_merged = true; + } + + Assert(nparts > 0); + joinrel->boundinfo = boundinfo; + joinrel->nparts = nparts; + joinrel->part_rels = + (RelOptInfo **) palloc0(sizeof(RelOptInfo *) * nparts); + } + else + { + Assert(joinrel->nparts > 0); + Assert(joinrel->boundinfo); + Assert(joinrel->part_rels); + + /* + * If the join rel's partbounds_merged flag is true, it means inputs + * are not guaranteed to have the same partition bounds, therefore we + * can't assume that the partitions at the same cardinal positions + * form the pairs; let get_matching_part_pairs() generate the pairs. + * Otherwise, nothing to do since we can assume that. + */ + if (joinrel->partbounds_merged) + { + get_matching_part_pairs(root, joinrel, rel1, rel2, + parts1, parts2); + Assert(list_length(*parts1) == joinrel->nparts); + Assert(list_length(*parts2) == joinrel->nparts); + } + } +} + +/* + * get_matching_part_pairs + * Generate pairs of partitions to be joined from inputs + */ +static void +get_matching_part_pairs(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *rel1, RelOptInfo *rel2, + List **parts1, List **parts2) +{ + bool rel1_is_simple = IS_SIMPLE_REL(rel1); + bool rel2_is_simple = IS_SIMPLE_REL(rel2); + int cnt_parts; + + *parts1 = NIL; + *parts2 = NIL; + + for (cnt_parts = 0; cnt_parts < joinrel->nparts; cnt_parts++) + { + RelOptInfo *child_joinrel = joinrel->part_rels[cnt_parts]; + RelOptInfo *child_rel1; + RelOptInfo *child_rel2; + Relids child_relids1; + Relids child_relids2; + + /* + * If this segment of the join is empty, it means that this segment + * was ignored when previously creating child-join paths for it in + * try_partitionwise_join() as it would not contribute to the join + * result, due to one or both inputs being empty; add NULL to each of + * the given lists so that this segment will be ignored again in that + * function. + */ + if (!child_joinrel) + { + *parts1 = lappend(*parts1, NULL); + *parts2 = lappend(*parts2, NULL); + continue; + } + + /* + * Get a relids set of partition(s) involved in this join segment that + * are from the rel1 side. + */ + child_relids1 = bms_intersect(child_joinrel->relids, + rel1->all_partrels); + Assert(bms_num_members(child_relids1) == bms_num_members(rel1->relids)); + + /* + * Get a child rel for rel1 with the relids. Note that we should have + * the child rel even if rel1 is a join rel, because in that case the + * partitions specified in the relids would have matching/overlapping + * boundaries, so the specified partitions should be considered as + * ones to be joined when planning partitionwise joins of rel1, + * meaning that the child rel would have been built by the time we get + * here. + */ + if (rel1_is_simple) + { + int varno = bms_singleton_member(child_relids1); + + child_rel1 = find_base_rel(root, varno); + } + else + child_rel1 = find_join_rel(root, child_relids1); + Assert(child_rel1); + + /* + * Get a relids set of partition(s) involved in this join segment that + * are from the rel2 side. + */ + child_relids2 = bms_intersect(child_joinrel->relids, + rel2->all_partrels); + Assert(bms_num_members(child_relids2) == bms_num_members(rel2->relids)); + + /* + * Get a child rel for rel2 with the relids. See above comments. + */ + if (rel2_is_simple) + { + int varno = bms_singleton_member(child_relids2); + + child_rel2 = find_base_rel(root, varno); + } + else + child_rel2 = find_join_rel(root, child_relids2); + Assert(child_rel2); + + /* + * The join of rel1 and rel2 is legal, so is the join of the child + * rels obtained above; add them to the given lists as a join pair + * producing this join segment. + */ + *parts1 = lappend(*parts1, child_rel1); + *parts2 = lappend(*parts2, child_rel2); + } +} diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c new file mode 100644 index 0000000..bd9a176 --- /dev/null +++ b/src/backend/optimizer/path/pathkeys.c @@ -0,0 +1,1917 @@ +/*------------------------------------------------------------------------- + * + * pathkeys.c + * Utilities for matching and building path keys + * + * See src/backend/optimizer/README for a great deal of information about + * the nature and use of path keys. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/optimizer/path/pathkeys.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/stratnum.h" +#include "catalog/pg_opfamily.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/plannodes.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "partitioning/partbounds.h" +#include "utils/lsyscache.h" + + +static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); +static bool matches_boolean_partition_clause(RestrictInfo *rinfo, + RelOptInfo *partrel, + int partkeycol); +static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle); +static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); + + +/**************************************************************************** + * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING + ****************************************************************************/ + +/* + * make_canonical_pathkey + * Given the parameters for a PathKey, find any pre-existing matching + * pathkey in the query's list of "canonical" pathkeys. Make a new + * entry if there's not one already. + * + * Note that this function must not be used until after we have completed + * merging EquivalenceClasses. + */ +PathKey * +make_canonical_pathkey(PlannerInfo *root, + EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first) +{ + PathKey *pk; + ListCell *lc; + MemoryContext oldcontext; + + /* Can't make canonical pathkeys if the set of ECs might still change */ + if (!root->ec_merging_done) + elog(ERROR, "too soon to build canonical pathkeys"); + + /* The passed eclass might be non-canonical, so chase up to the top */ + while (eclass->ec_merged) + eclass = eclass->ec_merged; + + foreach(lc, root->canon_pathkeys) + { + pk = (PathKey *) lfirst(lc); + if (eclass == pk->pk_eclass && + opfamily == pk->pk_opfamily && + strategy == pk->pk_strategy && + nulls_first == pk->pk_nulls_first) + return pk; + } + + /* + * Be sure canonical pathkeys are allocated in the main planning context. + * Not an issue in normal planning, but it is for GEQO. + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + pk = makeNode(PathKey); + pk->pk_eclass = eclass; + pk->pk_opfamily = opfamily; + pk->pk_strategy = strategy; + pk->pk_nulls_first = nulls_first; + + root->canon_pathkeys = lappend(root->canon_pathkeys, pk); + + MemoryContextSwitchTo(oldcontext); + + return pk; +} + +/* + * pathkey_is_redundant + * Is a pathkey redundant with one already in the given list? + * + * We detect two cases: + * + * 1. If the new pathkey's equivalence class contains a constant, and isn't + * below an outer join, then we can disregard it as a sort key. An example: + * SELECT ... WHERE x = 42 ORDER BY x, y; + * We may as well just sort by y. Note that because of opfamily matching, + * this is semantically correct: we know that the equality constraint is one + * that actually binds the variable to a single value in the terms of any + * ordering operator that might go with the eclass. This rule not only lets + * us simplify (or even skip) explicit sorts, but also allows matching index + * sort orders to a query when there are don't-care index columns. + * + * 2. If the new pathkey's equivalence class is the same as that of any + * existing member of the pathkey list, then it is redundant. Some examples: + * SELECT ... ORDER BY x, x; + * SELECT ... ORDER BY x, x DESC; + * SELECT ... WHERE x = y ORDER BY x, y; + * In all these cases the second sort key cannot distinguish values that are + * considered equal by the first, and so there's no point in using it. + * Note in particular that we need not compare opfamily (all the opfamilies + * of the EC have the same notion of equality) nor sort direction. + * + * Both the given pathkey and the list members must be canonical for this + * to work properly, but that's okay since we no longer ever construct any + * non-canonical pathkeys. (Note: the notion of a pathkey *list* being + * canonical includes the additional requirement of no redundant entries, + * which is exactly what we are checking for here.) + * + * Because the equivclass.c machinery forms only one copy of any EC per query, + * pointer comparison is enough to decide whether canonical ECs are the same. + */ +static bool +pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys) +{ + EquivalenceClass *new_ec = new_pathkey->pk_eclass; + ListCell *lc; + + /* Check for EC containing a constant --- unconditionally redundant */ + if (EC_MUST_BE_REDUNDANT(new_ec)) + return true; + + /* If same EC already used in list, then redundant */ + foreach(lc, pathkeys) + { + PathKey *old_pathkey = (PathKey *) lfirst(lc); + + if (new_ec == old_pathkey->pk_eclass) + return true; + } + + return false; +} + +/* + * make_pathkey_from_sortinfo + * Given an expression and sort-order information, create a PathKey. + * The result is always a "canonical" PathKey, but it might be redundant. + * + * expr is the expression, and nullable_relids is the set of base relids + * that are potentially nullable below it. + * + * If the PathKey is being generated from a SortGroupClause, sortref should be + * the SortGroupClause's SortGroupRef; otherwise zero. + * + * If rel is not NULL, it identifies a specific relation we're considering + * a path for, and indicates that child EC members for that relation can be + * considered. Otherwise child members are ignored. (See the comments for + * get_eclass_for_sort_expr.) + * + * create_it is true if we should create any missing EquivalenceClass + * needed to represent the sort key. If it's false, we return NULL if the + * sort key isn't already present in any EquivalenceClass. + */ +static PathKey * +make_pathkey_from_sortinfo(PlannerInfo *root, + Expr *expr, + Relids nullable_relids, + Oid opfamily, + Oid opcintype, + Oid collation, + bool reverse_sort, + bool nulls_first, + Index sortref, + Relids rel, + bool create_it) +{ + int16 strategy; + Oid equality_op; + List *opfamilies; + EquivalenceClass *eclass; + + strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber; + + /* + * EquivalenceClasses need to contain opfamily lists based on the family + * membership of mergejoinable equality operators, which could belong to + * more than one opfamily. So we have to look up the opfamily's equality + * operator and get its membership. + */ + equality_op = get_opfamily_member(opfamily, + opcintype, + opcintype, + BTEqualStrategyNumber); + if (!OidIsValid(equality_op)) /* shouldn't happen */ + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + BTEqualStrategyNumber, opcintype, opcintype, opfamily); + opfamilies = get_mergejoin_opfamilies(equality_op); + if (!opfamilies) /* certainly should find some */ + elog(ERROR, "could not find opfamilies for equality operator %u", + equality_op); + + /* Now find or (optionally) create a matching EquivalenceClass */ + eclass = get_eclass_for_sort_expr(root, expr, nullable_relids, + opfamilies, opcintype, collation, + sortref, rel, create_it); + + /* Fail if no EC and !create_it */ + if (!eclass) + return NULL; + + /* And finally we can find or create a PathKey node */ + return make_canonical_pathkey(root, eclass, opfamily, + strategy, nulls_first); +} + +/* + * make_pathkey_from_sortop + * Like make_pathkey_from_sortinfo, but work from a sort operator. + * + * This should eventually go away, but we need to restructure SortGroupClause + * first. + */ +static PathKey * +make_pathkey_from_sortop(PlannerInfo *root, + Expr *expr, + Relids nullable_relids, + Oid ordering_op, + bool nulls_first, + Index sortref, + bool create_it) +{ + Oid opfamily, + opcintype, + collation; + int16 strategy; + + /* Find the operator in pg_amop --- failure shouldn't happen */ + if (!get_ordering_op_properties(ordering_op, + &opfamily, &opcintype, &strategy)) + elog(ERROR, "operator %u is not a valid ordering operator", + ordering_op); + + /* Because SortGroupClause doesn't carry collation, consult the expr */ + collation = exprCollation((Node *) expr); + + return make_pathkey_from_sortinfo(root, + expr, + nullable_relids, + opfamily, + opcintype, + collation, + (strategy == BTGreaterStrategyNumber), + nulls_first, + sortref, + NULL, + create_it); +} + + +/**************************************************************************** + * PATHKEY COMPARISONS + ****************************************************************************/ + +/* + * compare_pathkeys + * Compare two pathkeys to see if they are equivalent, and if not whether + * one is "better" than the other. + * + * We assume the pathkeys are canonical, and so they can be checked for + * equality by simple pointer comparison. + */ +PathKeysComparison +compare_pathkeys(List *keys1, List *keys2) +{ + ListCell *key1, + *key2; + + /* + * Fall out quickly if we are passed two identical lists. This mostly + * catches the case where both are NIL, but that's common enough to + * warrant the test. + */ + if (keys1 == keys2) + return PATHKEYS_EQUAL; + + forboth(key1, keys1, key2, keys2) + { + PathKey *pathkey1 = (PathKey *) lfirst(key1); + PathKey *pathkey2 = (PathKey *) lfirst(key2); + + if (pathkey1 != pathkey2) + return PATHKEYS_DIFFERENT; /* no need to keep looking */ + } + + /* + * If we reached the end of only one list, the other is longer and + * therefore not a subset. + */ + if (key1 != NULL) + return PATHKEYS_BETTER1; /* key1 is longer */ + if (key2 != NULL) + return PATHKEYS_BETTER2; /* key2 is longer */ + return PATHKEYS_EQUAL; +} + +/* + * pathkeys_contained_in + * Common special case of compare_pathkeys: we just want to know + * if keys2 are at least as well sorted as keys1. + */ +bool +pathkeys_contained_in(List *keys1, List *keys2) +{ + switch (compare_pathkeys(keys1, keys2)) + { + case PATHKEYS_EQUAL: + case PATHKEYS_BETTER2: + return true; + default: + break; + } + return false; +} + +/* + * pathkeys_count_contained_in + * Same as pathkeys_contained_in, but also sets length of longest + * common prefix of keys1 and keys2. + */ +bool +pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common) +{ + int n = 0; + ListCell *key1, + *key2; + + /* + * See if we can avoiding looping through both lists. This optimization + * gains us several percent in planning time in a worst-case test. + */ + if (keys1 == keys2) + { + *n_common = list_length(keys1); + return true; + } + else if (keys1 == NIL) + { + *n_common = 0; + return true; + } + else if (keys2 == NIL) + { + *n_common = 0; + return false; + } + + /* + * If both lists are non-empty, iterate through both to find out how many + * items are shared. + */ + forboth(key1, keys1, key2, keys2) + { + PathKey *pathkey1 = (PathKey *) lfirst(key1); + PathKey *pathkey2 = (PathKey *) lfirst(key2); + + if (pathkey1 != pathkey2) + { + *n_common = n; + return false; + } + n++; + } + + /* If we ended with a null value, then we've processed the whole list. */ + *n_common = n; + return (key1 == NULL); +} + +/* + * get_cheapest_path_for_pathkeys + * Find the cheapest path (according to the specified criterion) that + * satisfies the given pathkeys and parameterization. + * Return NULL if no such path. + * + * 'paths' is a list of possible paths that all generate the same relation + * 'pathkeys' represents a required ordering (in canonical form!) + * 'required_outer' denotes allowable outer relations for parameterized paths + * 'cost_criterion' is STARTUP_COST or TOTAL_COST + * 'require_parallel_safe' causes us to consider only parallel-safe paths + */ +Path * +get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, + Relids required_outer, + CostSelector cost_criterion, + bool require_parallel_safe) +{ + Path *matched_path = NULL; + ListCell *l; + + foreach(l, paths) + { + Path *path = (Path *) lfirst(l); + + /* + * Since cost comparison is a lot cheaper than pathkey comparison, do + * that first. (XXX is that still true?) + */ + if (matched_path != NULL && + compare_path_costs(matched_path, path, cost_criterion) <= 0) + continue; + + if (require_parallel_safe && !path->parallel_safe) + continue; + + if (pathkeys_contained_in(pathkeys, path->pathkeys) && + bms_is_subset(PATH_REQ_OUTER(path), required_outer)) + matched_path = path; + } + return matched_path; +} + +/* + * get_cheapest_fractional_path_for_pathkeys + * Find the cheapest path (for retrieving a specified fraction of all + * the tuples) that satisfies the given pathkeys and parameterization. + * Return NULL if no such path. + * + * See compare_fractional_path_costs() for the interpretation of the fraction + * parameter. + * + * 'paths' is a list of possible paths that all generate the same relation + * 'pathkeys' represents a required ordering (in canonical form!) + * 'required_outer' denotes allowable outer relations for parameterized paths + * 'fraction' is the fraction of the total tuples expected to be retrieved + */ +Path * +get_cheapest_fractional_path_for_pathkeys(List *paths, + List *pathkeys, + Relids required_outer, + double fraction) +{ + Path *matched_path = NULL; + ListCell *l; + + foreach(l, paths) + { + Path *path = (Path *) lfirst(l); + + /* + * Since cost comparison is a lot cheaper than pathkey comparison, do + * that first. (XXX is that still true?) + */ + if (matched_path != NULL && + compare_fractional_path_costs(matched_path, path, fraction) <= 0) + continue; + + if (pathkeys_contained_in(pathkeys, path->pathkeys) && + bms_is_subset(PATH_REQ_OUTER(path), required_outer)) + matched_path = path; + } + return matched_path; +} + + +/* + * get_cheapest_parallel_safe_total_inner + * Find the unparameterized parallel-safe path with the least total cost. + */ +Path * +get_cheapest_parallel_safe_total_inner(List *paths) +{ + ListCell *l; + + foreach(l, paths) + { + Path *innerpath = (Path *) lfirst(l); + + if (innerpath->parallel_safe && + bms_is_empty(PATH_REQ_OUTER(innerpath))) + return innerpath; + } + + return NULL; +} + +/**************************************************************************** + * NEW PATHKEY FORMATION + ****************************************************************************/ + +/* + * build_index_pathkeys + * Build a pathkeys list that describes the ordering induced by an index + * scan using the given index. (Note that an unordered index doesn't + * induce any ordering, so we return NIL.) + * + * If 'scandir' is BackwardScanDirection, build pathkeys representing a + * backwards scan of the index. + * + * We iterate only key columns of covering indexes, since non-key columns + * don't influence index ordering. The result is canonical, meaning that + * redundant pathkeys are removed; it may therefore have fewer entries than + * there are key columns in the index. + * + * Another reason for stopping early is that we may be able to tell that + * an index column's sort order is uninteresting for this query. However, + * that test is just based on the existence of an EquivalenceClass and not + * on position in pathkey lists, so it's not complete. Caller should call + * truncate_useless_pathkeys() to possibly remove more pathkeys. + */ +List * +build_index_pathkeys(PlannerInfo *root, + IndexOptInfo *index, + ScanDirection scandir) +{ + List *retval = NIL; + ListCell *lc; + int i; + + if (index->sortopfamily == NULL) + return NIL; /* non-orderable index */ + + i = 0; + foreach(lc, index->indextlist) + { + TargetEntry *indextle = (TargetEntry *) lfirst(lc); + Expr *indexkey; + bool reverse_sort; + bool nulls_first; + PathKey *cpathkey; + + /* + * INCLUDE columns are stored in index unordered, so they don't + * support ordered index scan. + */ + if (i >= index->nkeycolumns) + break; + + /* We assume we don't need to make a copy of the tlist item */ + indexkey = indextle->expr; + + if (ScanDirectionIsBackward(scandir)) + { + reverse_sort = !index->reverse_sort[i]; + nulls_first = !index->nulls_first[i]; + } + else + { + reverse_sort = index->reverse_sort[i]; + nulls_first = index->nulls_first[i]; + } + + /* + * OK, try to make a canonical pathkey for this sort key. Note we're + * underneath any outer joins, so nullable_relids should be NULL. + */ + cpathkey = make_pathkey_from_sortinfo(root, + indexkey, + NULL, + index->sortopfamily[i], + index->opcintype[i], + index->indexcollations[i], + reverse_sort, + nulls_first, + 0, + index->rel->relids, + false); + + if (cpathkey) + { + /* + * We found the sort key in an EquivalenceClass, so it's relevant + * for this query. Add it to list, unless it's redundant. + */ + if (!pathkey_is_redundant(cpathkey, retval)) + retval = lappend(retval, cpathkey); + } + else + { + /* + * Boolean index keys might be redundant even if they do not + * appear in an EquivalenceClass, because of our special treatment + * of boolean equality conditions --- see the comment for + * indexcol_is_bool_constant_for_query(). If that applies, we can + * continue to examine lower-order index columns. Otherwise, the + * sort key is not an interesting sort order for this query, so we + * should stop considering index columns; any lower-order sort + * keys won't be useful either. + */ + if (!indexcol_is_bool_constant_for_query(root, index, i)) + break; + } + + i++; + } + + return retval; +} + +/* + * partkey_is_bool_constant_for_query + * + * If a partition key column is constrained to have a constant value by the + * query's WHERE conditions, then it's irrelevant for sort-order + * considerations. Usually that means we have a restriction clause + * WHERE partkeycol = constant, which gets turned into an EquivalenceClass + * containing a constant, which is recognized as redundant by + * build_partition_pathkeys(). But if the partition key column is a + * boolean variable (or expression), then we are not going to see such a + * WHERE clause, because expression preprocessing will have simplified it + * to "WHERE partkeycol" or "WHERE NOT partkeycol". So we are not going + * to have a matching EquivalenceClass (unless the query also contains + * "ORDER BY partkeycol"). To allow such cases to work the same as they would + * for non-boolean values, this function is provided to detect whether the + * specified partition key column matches a boolean restriction clause. + */ +static bool +partkey_is_bool_constant_for_query(RelOptInfo *partrel, int partkeycol) +{ + PartitionScheme partscheme = partrel->part_scheme; + ListCell *lc; + + /* If the partkey isn't boolean, we can't possibly get a match */ + if (!IsBooleanOpfamily(partscheme->partopfamily[partkeycol])) + return false; + + /* Check each restriction clause for the partitioned rel */ + foreach(lc, partrel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* Ignore pseudoconstant quals, they won't match */ + if (rinfo->pseudoconstant) + continue; + + /* See if we can match the clause's expression to the partkey column */ + if (matches_boolean_partition_clause(rinfo, partrel, partkeycol)) + return true; + } + + return false; +} + +/* + * matches_boolean_partition_clause + * Determine if the boolean clause described by rinfo matches + * partrel's partkeycol-th partition key column. + * + * "Matches" can be either an exact match (equivalent to partkey = true), + * or a NOT above an exact match (equivalent to partkey = false). + */ +static bool +matches_boolean_partition_clause(RestrictInfo *rinfo, + RelOptInfo *partrel, int partkeycol) +{ + Node *clause = (Node *) rinfo->clause; + Node *partexpr = (Node *) linitial(partrel->partexprs[partkeycol]); + + /* Direct match? */ + if (equal(partexpr, clause)) + return true; + /* NOT clause? */ + else if (is_notclause(clause)) + { + Node *arg = (Node *) get_notclausearg((Expr *) clause); + + if (equal(partexpr, arg)) + return true; + } + + return false; +} + +/* + * build_partition_pathkeys + * Build a pathkeys list that describes the ordering induced by the + * partitions of partrel, under either forward or backward scan + * as per scandir. + * + * Caller must have checked that the partitions are properly ordered, + * as detected by partitions_are_ordered(). + * + * Sets *partialkeys to true if pathkeys were only built for a prefix of the + * partition key, or false if the pathkeys include all columns of the + * partition key. + */ +List * +build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, + ScanDirection scandir, bool *partialkeys) +{ + List *retval = NIL; + PartitionScheme partscheme = partrel->part_scheme; + int i; + + Assert(partscheme != NULL); + Assert(partitions_are_ordered(partrel->boundinfo, partrel->nparts)); + /* For now, we can only cope with baserels */ + Assert(IS_SIMPLE_REL(partrel)); + + for (i = 0; i < partscheme->partnatts; i++) + { + PathKey *cpathkey; + Expr *keyCol = (Expr *) linitial(partrel->partexprs[i]); + + /* + * Try to make a canonical pathkey for this partkey. + * + * We're considering a baserel scan, so nullable_relids should be + * NULL. Also, we assume the PartitionDesc lists any NULL partition + * last, so we treat the scan like a NULLS LAST index: we have + * nulls_first for backwards scan only. + */ + cpathkey = make_pathkey_from_sortinfo(root, + keyCol, + NULL, + partscheme->partopfamily[i], + partscheme->partopcintype[i], + partscheme->partcollation[i], + ScanDirectionIsBackward(scandir), + ScanDirectionIsBackward(scandir), + 0, + partrel->relids, + false); + + + if (cpathkey) + { + /* + * We found the sort key in an EquivalenceClass, so it's relevant + * for this query. Add it to list, unless it's redundant. + */ + if (!pathkey_is_redundant(cpathkey, retval)) + retval = lappend(retval, cpathkey); + } + else + { + /* + * Boolean partition keys might be redundant even if they do not + * appear in an EquivalenceClass, because of our special treatment + * of boolean equality conditions --- see the comment for + * partkey_is_bool_constant_for_query(). If that applies, we can + * continue to examine lower-order partition keys. Otherwise, the + * sort key is not an interesting sort order for this query, so we + * should stop considering partition columns; any lower-order sort + * keys won't be useful either. + */ + if (!partkey_is_bool_constant_for_query(partrel, i)) + { + *partialkeys = true; + return retval; + } + } + } + + *partialkeys = false; + return retval; +} + +/* + * build_expression_pathkey + * Build a pathkeys list that describes an ordering by a single expression + * using the given sort operator. + * + * expr, nullable_relids, and rel are as for make_pathkey_from_sortinfo. + * We induce the other arguments assuming default sort order for the operator. + * + * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it + * is false and the expression isn't already in some EquivalenceClass. + */ +List * +build_expression_pathkey(PlannerInfo *root, + Expr *expr, + Relids nullable_relids, + Oid opno, + Relids rel, + bool create_it) +{ + List *pathkeys; + Oid opfamily, + opcintype; + int16 strategy; + PathKey *cpathkey; + + /* Find the operator in pg_amop --- failure shouldn't happen */ + if (!get_ordering_op_properties(opno, + &opfamily, &opcintype, &strategy)) + elog(ERROR, "operator %u is not a valid ordering operator", + opno); + + cpathkey = make_pathkey_from_sortinfo(root, + expr, + nullable_relids, + opfamily, + opcintype, + exprCollation((Node *) expr), + (strategy == BTGreaterStrategyNumber), + (strategy == BTGreaterStrategyNumber), + 0, + rel, + create_it); + + if (cpathkey) + pathkeys = list_make1(cpathkey); + else + pathkeys = NIL; + + return pathkeys; +} + +/* + * convert_subquery_pathkeys + * Build a pathkeys list that describes the ordering of a subquery's + * result, in the terms of the outer query. This is essentially a + * task of conversion. + * + * 'rel': outer query's RelOptInfo for the subquery relation. + * 'subquery_pathkeys': the subquery's output pathkeys, in its terms. + * 'subquery_tlist': the subquery's output targetlist, in its terms. + * + * We intentionally don't do truncate_useless_pathkeys() here, because there + * are situations where seeing the raw ordering of the subquery is helpful. + * For example, if it returns ORDER BY x DESC, that may prompt us to + * construct a mergejoin using DESC order rather than ASC order; but the + * right_merge_direction heuristic would have us throw the knowledge away. + */ +List * +convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, + List *subquery_pathkeys, + List *subquery_tlist) +{ + List *retval = NIL; + int retvallen = 0; + int outer_query_keys = list_length(root->query_pathkeys); + ListCell *i; + + foreach(i, subquery_pathkeys) + { + PathKey *sub_pathkey = (PathKey *) lfirst(i); + EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass; + PathKey *best_pathkey = NULL; + + if (sub_eclass->ec_has_volatile) + { + /* + * If the sub_pathkey's EquivalenceClass is volatile, then it must + * have come from an ORDER BY clause, and we have to match it to + * that same targetlist entry. + */ + TargetEntry *tle; + Var *outer_var; + + if (sub_eclass->ec_sortref == 0) /* can't happen */ + elog(ERROR, "volatile EquivalenceClass has no sortref"); + tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist); + Assert(tle); + /* Is TLE actually available to the outer query? */ + outer_var = find_var_for_subquery_tle(rel, tle); + if (outer_var) + { + /* We can represent this sub_pathkey */ + EquivalenceMember *sub_member; + EquivalenceClass *outer_ec; + + Assert(list_length(sub_eclass->ec_members) == 1); + sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members); + + /* + * Note: it might look funny to be setting sortref = 0 for a + * reference to a volatile sub_eclass. However, the + * expression is *not* volatile in the outer query: it's just + * a Var referencing whatever the subquery emitted. (IOW, the + * outer query isn't going to re-execute the volatile + * expression itself.) So this is okay. Likewise, it's + * correct to pass nullable_relids = NULL, because we're + * underneath any outer joins appearing in the outer query. + */ + outer_ec = + get_eclass_for_sort_expr(root, + (Expr *) outer_var, + NULL, + sub_eclass->ec_opfamilies, + sub_member->em_datatype, + sub_eclass->ec_collation, + 0, + rel->relids, + false); + + /* + * If we don't find a matching EC, sub-pathkey isn't + * interesting to the outer query + */ + if (outer_ec) + best_pathkey = + make_canonical_pathkey(root, + outer_ec, + sub_pathkey->pk_opfamily, + sub_pathkey->pk_strategy, + sub_pathkey->pk_nulls_first); + } + } + else + { + /* + * Otherwise, the sub_pathkey's EquivalenceClass could contain + * multiple elements (representing knowledge that multiple items + * are effectively equal). Each element might match none, one, or + * more of the output columns that are visible to the outer query. + * This means we may have multiple possible representations of the + * sub_pathkey in the context of the outer query. Ideally we + * would generate them all and put them all into an EC of the + * outer query, thereby propagating equality knowledge up to the + * outer query. Right now we cannot do so, because the outer + * query's EquivalenceClasses are already frozen when this is + * called. Instead we prefer the one that has the highest "score" + * (number of EC peers, plus one if it matches the outer + * query_pathkeys). This is the most likely to be useful in the + * outer query. + */ + int best_score = -1; + ListCell *j; + + foreach(j, sub_eclass->ec_members) + { + EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j); + Expr *sub_expr = sub_member->em_expr; + Oid sub_expr_type = sub_member->em_datatype; + Oid sub_expr_coll = sub_eclass->ec_collation; + ListCell *k; + + if (sub_member->em_is_child) + continue; /* ignore children here */ + + foreach(k, subquery_tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(k); + Var *outer_var; + Expr *tle_expr; + EquivalenceClass *outer_ec; + PathKey *outer_pk; + int score; + + /* Is TLE actually available to the outer query? */ + outer_var = find_var_for_subquery_tle(rel, tle); + if (!outer_var) + continue; + + /* + * The targetlist entry is considered to match if it + * matches after sort-key canonicalization. That is + * needed since the sub_expr has been through the same + * process. + */ + tle_expr = canonicalize_ec_expression(tle->expr, + sub_expr_type, + sub_expr_coll); + if (!equal(tle_expr, sub_expr)) + continue; + + /* See if we have a matching EC for the TLE */ + outer_ec = get_eclass_for_sort_expr(root, + (Expr *) outer_var, + NULL, + sub_eclass->ec_opfamilies, + sub_expr_type, + sub_expr_coll, + 0, + rel->relids, + false); + + /* + * If we don't find a matching EC, this sub-pathkey isn't + * interesting to the outer query + */ + if (!outer_ec) + continue; + + outer_pk = make_canonical_pathkey(root, + outer_ec, + sub_pathkey->pk_opfamily, + sub_pathkey->pk_strategy, + sub_pathkey->pk_nulls_first); + /* score = # of equivalence peers */ + score = list_length(outer_ec->ec_members) - 1; + /* +1 if it matches the proper query_pathkeys item */ + if (retvallen < outer_query_keys && + list_nth(root->query_pathkeys, retvallen) == outer_pk) + score++; + if (score > best_score) + { + best_pathkey = outer_pk; + best_score = score; + } + } + } + } + + /* + * If we couldn't find a representation of this sub_pathkey, we're + * done (we can't use the ones to its right, either). + */ + if (!best_pathkey) + break; + + /* + * Eliminate redundant ordering info; could happen if outer query + * equivalences subquery keys... + */ + if (!pathkey_is_redundant(best_pathkey, retval)) + { + retval = lappend(retval, best_pathkey); + retvallen++; + } + } + + return retval; +} + +/* + * find_var_for_subquery_tle + * + * If the given subquery tlist entry is due to be emitted by the subquery's + * scan node, return a Var for it, else return NULL. + * + * We need this to ensure that we don't return pathkeys describing values + * that are unavailable above the level of the subquery scan. + */ +static Var * +find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle) +{ + ListCell *lc; + + /* If the TLE is resjunk, it's certainly not visible to the outer query */ + if (tle->resjunk) + return NULL; + + /* Search the rel's targetlist to see what it will return */ + foreach(lc, rel->reltarget->exprs) + { + Var *var = (Var *) lfirst(lc); + + /* Ignore placeholders */ + if (!IsA(var, Var)) + continue; + Assert(var->varno == rel->relid); + + /* If we find a Var referencing this TLE, we're good */ + if (var->varattno == tle->resno) + return copyObject(var); /* Make a copy for safety */ + } + return NULL; +} + +/* + * build_join_pathkeys + * Build the path keys for a join relation constructed by mergejoin or + * nestloop join. This is normally the same as the outer path's keys. + * + * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as + * having the outer path's path keys, because null lefthand rows may be + * inserted at random points. It must be treated as unsorted. + * + * We truncate away any pathkeys that are uninteresting for higher joins. + * + * 'joinrel' is the join relation that paths are being formed for + * 'jointype' is the join type (inner, left, full, etc) + * 'outer_pathkeys' is the list of the current outer path's path keys + * + * Returns the list of new path keys. + */ +List * +build_join_pathkeys(PlannerInfo *root, + RelOptInfo *joinrel, + JoinType jointype, + List *outer_pathkeys) +{ + if (jointype == JOIN_FULL || jointype == JOIN_RIGHT) + return NIL; + + /* + * This used to be quite a complex bit of code, but now that all pathkey + * sublists start out life canonicalized, we don't have to do a darn thing + * here! + * + * We do, however, need to truncate the pathkeys list, since it may + * contain pathkeys that were useful for forming this joinrel but are + * uninteresting to higher levels. + */ + return truncate_useless_pathkeys(root, joinrel, outer_pathkeys); +} + +/**************************************************************************** + * PATHKEYS AND SORT CLAUSES + ****************************************************************************/ + +/* + * make_pathkeys_for_sortclauses + * Generate a pathkeys list that represents the sort order specified + * by a list of SortGroupClauses + * + * The resulting PathKeys are always in canonical form. (Actually, there + * is no longer any code anywhere that creates non-canonical PathKeys.) + * + * We assume that root->nullable_baserels is the set of base relids that could + * have gone to NULL below the SortGroupClause expressions. This is okay if + * the expressions came from the query's top level (ORDER BY, DISTINCT, etc) + * and if this function is only invoked after deconstruct_jointree. In the + * future we might have to make callers pass in the appropriate + * nullable-relids set, but for now it seems unnecessary. + * + * 'sortclauses' is a list of SortGroupClause nodes + * 'tlist' is the targetlist to find the referenced tlist entries in + */ +List * +make_pathkeys_for_sortclauses(PlannerInfo *root, + List *sortclauses, + List *tlist) +{ + List *pathkeys = NIL; + ListCell *l; + + foreach(l, sortclauses) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); + Expr *sortkey; + PathKey *pathkey; + + sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist); + Assert(OidIsValid(sortcl->sortop)); + pathkey = make_pathkey_from_sortop(root, + sortkey, + root->nullable_baserels, + sortcl->sortop, + sortcl->nulls_first, + sortcl->tleSortGroupRef, + true); + + /* Canonical form eliminates redundant ordering keys */ + if (!pathkey_is_redundant(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); + } + return pathkeys; +} + +/**************************************************************************** + * PATHKEYS AND MERGECLAUSES + ****************************************************************************/ + +/* + * initialize_mergeclause_eclasses + * Set the EquivalenceClass links in a mergeclause restrictinfo. + * + * RestrictInfo contains fields in which we may cache pointers to + * EquivalenceClasses for the left and right inputs of the mergeclause. + * (If the mergeclause is a true equivalence clause these will be the + * same EquivalenceClass, otherwise not.) If the mergeclause is either + * used to generate an EquivalenceClass, or derived from an EquivalenceClass, + * then it's easy to set up the left_ec and right_ec members --- otherwise, + * this function should be called to set them up. We will generate new + * EquivalenceClauses if necessary to represent the mergeclause's left and + * right sides. + * + * Note this is called before EC merging is complete, so the links won't + * necessarily point to canonical ECs. Before they are actually used for + * anything, update_mergeclause_eclasses must be called to ensure that + * they've been updated to point to canonical ECs. + */ +void +initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) +{ + Expr *clause = restrictinfo->clause; + Oid lefttype, + righttype; + + /* Should be a mergeclause ... */ + Assert(restrictinfo->mergeopfamilies != NIL); + /* ... with links not yet set */ + Assert(restrictinfo->left_ec == NULL); + Assert(restrictinfo->right_ec == NULL); + + /* Need the declared input types of the operator */ + op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype); + + /* Find or create a matching EquivalenceClass for each side */ + restrictinfo->left_ec = + get_eclass_for_sort_expr(root, + (Expr *) get_leftop(clause), + restrictinfo->nullable_relids, + restrictinfo->mergeopfamilies, + lefttype, + ((OpExpr *) clause)->inputcollid, + 0, + NULL, + true); + restrictinfo->right_ec = + get_eclass_for_sort_expr(root, + (Expr *) get_rightop(clause), + restrictinfo->nullable_relids, + restrictinfo->mergeopfamilies, + righttype, + ((OpExpr *) clause)->inputcollid, + 0, + NULL, + true); +} + +/* + * update_mergeclause_eclasses + * Make the cached EquivalenceClass links valid in a mergeclause + * restrictinfo. + * + * These pointers should have been set by process_equivalence or + * initialize_mergeclause_eclasses, but they might have been set to + * non-canonical ECs that got merged later. Chase up to the canonical + * merged parent if so. + */ +void +update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) +{ + /* Should be a merge clause ... */ + Assert(restrictinfo->mergeopfamilies != NIL); + /* ... with pointers already set */ + Assert(restrictinfo->left_ec != NULL); + Assert(restrictinfo->right_ec != NULL); + + /* Chase up to the top as needed */ + while (restrictinfo->left_ec->ec_merged) + restrictinfo->left_ec = restrictinfo->left_ec->ec_merged; + while (restrictinfo->right_ec->ec_merged) + restrictinfo->right_ec = restrictinfo->right_ec->ec_merged; +} + +/* + * find_mergeclauses_for_outer_pathkeys + * This routine attempts to find a list of mergeclauses that can be + * used with a specified ordering for the join's outer relation. + * If successful, it returns a list of mergeclauses. + * + * 'pathkeys' is a pathkeys list showing the ordering of an outer-rel path. + * 'restrictinfos' is a list of mergejoinable restriction clauses for the + * join relation being formed, in no particular order. + * + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * + * The result is NIL if no merge can be done, else a maximal list of + * usable mergeclauses (represented as a list of their restrictinfo nodes). + * The list is ordered to match the pathkeys, as required for execution. + */ +List * +find_mergeclauses_for_outer_pathkeys(PlannerInfo *root, + List *pathkeys, + List *restrictinfos) +{ + List *mergeclauses = NIL; + ListCell *i; + + /* make sure we have eclasses cached in the clauses */ + foreach(i, restrictinfos) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); + + update_mergeclause_eclasses(root, rinfo); + } + + foreach(i, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(i); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + List *matched_restrictinfos = NIL; + ListCell *j; + + /*---------- + * A mergejoin clause matches a pathkey if it has the same EC. + * If there are multiple matching clauses, take them all. In plain + * inner-join scenarios we expect only one match, because + * equivalence-class processing will have removed any redundant + * mergeclauses. However, in outer-join scenarios there might be + * multiple matches. An example is + * + * select * from a full join b + * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2; + * + * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three + * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed + * we *must* do so or we will be unable to form a valid plan. + * + * We expect that the given pathkeys list is canonical, which means + * no two members have the same EC, so it's not possible for this + * code to enter the same mergeclause into the result list twice. + * + * It's possible that multiple matching clauses might have different + * ECs on the other side, in which case the order we put them into our + * result makes a difference in the pathkeys required for the inner + * input rel. However this routine hasn't got any info about which + * order would be best, so we don't worry about that. + * + * It's also possible that the selected mergejoin clauses produce + * a noncanonical ordering of pathkeys for the inner side, ie, we + * might select clauses that reference b.v1, b.v2, b.v1 in that + * order. This is not harmful in itself, though it suggests that + * the clauses are partially redundant. Since the alternative is + * to omit mergejoin clauses and thereby possibly fail to generate a + * plan altogether, we live with it. make_inner_pathkeys_for_merge() + * has to delete duplicates when it constructs the inner pathkeys + * list, and we also have to deal with such cases specially in + * create_mergejoin_plan(). + *---------- + */ + foreach(j, restrictinfos) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(j); + EquivalenceClass *clause_ec; + + clause_ec = rinfo->outer_is_left ? + rinfo->left_ec : rinfo->right_ec; + if (clause_ec == pathkey_ec) + matched_restrictinfos = lappend(matched_restrictinfos, rinfo); + } + + /* + * If we didn't find a mergeclause, we're done --- any additional + * sort-key positions in the pathkeys are useless. (But we can still + * mergejoin if we found at least one mergeclause.) + */ + if (matched_restrictinfos == NIL) + break; + + /* + * If we did find usable mergeclause(s) for this sort-key position, + * add them to result list. + */ + mergeclauses = list_concat(mergeclauses, matched_restrictinfos); + } + + return mergeclauses; +} + +/* + * select_outer_pathkeys_for_merge + * Builds a pathkey list representing a possible sort ordering + * that can be used with the given mergeclauses. + * + * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses + * that will be used in a merge join. + * 'joinrel' is the join relation we are trying to construct. + * + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * + * Returns a pathkeys list that can be applied to the outer relation. + * + * Since we assume here that a sort is required, there is no particular use + * in matching any available ordering of the outerrel. (joinpath.c has an + * entirely separate code path for considering sort-free mergejoins.) Rather, + * it's interesting to try to match the requested query_pathkeys so that a + * second output sort may be avoided; and failing that, we try to list "more + * popular" keys (those with the most unmatched EquivalenceClass peers) + * earlier, in hopes of making the resulting ordering useful for as many + * higher-level mergejoins as possible. + */ +List * +select_outer_pathkeys_for_merge(PlannerInfo *root, + List *mergeclauses, + RelOptInfo *joinrel) +{ + List *pathkeys = NIL; + int nClauses = list_length(mergeclauses); + EquivalenceClass **ecs; + int *scores; + int necs; + ListCell *lc; + int j; + + /* Might have no mergeclauses */ + if (nClauses == 0) + return NIL; + + /* + * Make arrays of the ECs used by the mergeclauses (dropping any + * duplicates) and their "popularity" scores. + */ + ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *)); + scores = (int *) palloc(nClauses * sizeof(int)); + necs = 0; + + foreach(lc, mergeclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + EquivalenceClass *oeclass; + int score; + ListCell *lc2; + + /* get the outer eclass */ + update_mergeclause_eclasses(root, rinfo); + + if (rinfo->outer_is_left) + oeclass = rinfo->left_ec; + else + oeclass = rinfo->right_ec; + + /* reject duplicates */ + for (j = 0; j < necs; j++) + { + if (ecs[j] == oeclass) + break; + } + if (j < necs) + continue; + + /* compute score */ + score = 0; + foreach(lc2, oeclass->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); + + /* Potential future join partner? */ + if (!em->em_is_const && !em->em_is_child && + !bms_overlap(em->em_relids, joinrel->relids)) + score++; + } + + ecs[necs] = oeclass; + scores[necs] = score; + necs++; + } + + /* + * Find out if we have all the ECs mentioned in query_pathkeys; if so we + * can generate a sort order that's also useful for final output. There is + * no percentage in a partial match, though, so we have to have 'em all. + */ + if (root->query_pathkeys) + { + foreach(lc, root->query_pathkeys) + { + PathKey *query_pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *query_ec = query_pathkey->pk_eclass; + + for (j = 0; j < necs; j++) + { + if (ecs[j] == query_ec) + break; /* found match */ + } + if (j >= necs) + break; /* didn't find match */ + } + /* if we got to the end of the list, we have them all */ + if (lc == NULL) + { + /* copy query_pathkeys as starting point for our output */ + pathkeys = list_copy(root->query_pathkeys); + /* mark their ECs as already-emitted */ + foreach(lc, root->query_pathkeys) + { + PathKey *query_pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *query_ec = query_pathkey->pk_eclass; + + for (j = 0; j < necs; j++) + { + if (ecs[j] == query_ec) + { + scores[j] = -1; + break; + } + } + } + } + } + + /* + * Add remaining ECs to the list in popularity order, using a default sort + * ordering. (We could use qsort() here, but the list length is usually + * so small it's not worth it.) + */ + for (;;) + { + int best_j; + int best_score; + EquivalenceClass *ec; + PathKey *pathkey; + + best_j = 0; + best_score = scores[0]; + for (j = 1; j < necs; j++) + { + if (scores[j] > best_score) + { + best_j = j; + best_score = scores[j]; + } + } + if (best_score < 0) + break; /* all done */ + ec = ecs[best_j]; + scores[best_j] = -1; + pathkey = make_canonical_pathkey(root, + ec, + linitial_oid(ec->ec_opfamilies), + BTLessStrategyNumber, + false); + /* can't be redundant because no duplicate ECs */ + Assert(!pathkey_is_redundant(pathkey, pathkeys)); + pathkeys = lappend(pathkeys, pathkey); + } + + pfree(ecs); + pfree(scores); + + return pathkeys; +} + +/* + * make_inner_pathkeys_for_merge + * Builds a pathkey list representing the explicit sort order that + * must be applied to an inner path to make it usable with the + * given mergeclauses. + * + * 'mergeclauses' is a list of RestrictInfos for the mergejoin clauses + * that will be used in a merge join, in order. + * 'outer_pathkeys' are the already-known canonical pathkeys for the outer + * side of the join. + * + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * + * Returns a pathkeys list that can be applied to the inner relation. + * + * Note that it is not this routine's job to decide whether sorting is + * actually needed for a particular input path. Assume a sort is necessary; + * just make the keys, eh? + */ +List * +make_inner_pathkeys_for_merge(PlannerInfo *root, + List *mergeclauses, + List *outer_pathkeys) +{ + List *pathkeys = NIL; + EquivalenceClass *lastoeclass; + PathKey *opathkey; + ListCell *lc; + ListCell *lop; + + lastoeclass = NULL; + opathkey = NULL; + lop = list_head(outer_pathkeys); + + foreach(lc, mergeclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + EquivalenceClass *oeclass; + EquivalenceClass *ieclass; + PathKey *pathkey; + + update_mergeclause_eclasses(root, rinfo); + + if (rinfo->outer_is_left) + { + oeclass = rinfo->left_ec; + ieclass = rinfo->right_ec; + } + else + { + oeclass = rinfo->right_ec; + ieclass = rinfo->left_ec; + } + + /* outer eclass should match current or next pathkeys */ + /* we check this carefully for debugging reasons */ + if (oeclass != lastoeclass) + { + if (!lop) + elog(ERROR, "too few pathkeys for mergeclauses"); + opathkey = (PathKey *) lfirst(lop); + lop = lnext(outer_pathkeys, lop); + lastoeclass = opathkey->pk_eclass; + if (oeclass != lastoeclass) + elog(ERROR, "outer pathkeys do not match mergeclause"); + } + + /* + * Often, we'll have same EC on both sides, in which case the outer + * pathkey is also canonical for the inner side, and we can skip a + * useless search. + */ + if (ieclass == oeclass) + pathkey = opathkey; + else + pathkey = make_canonical_pathkey(root, + ieclass, + opathkey->pk_opfamily, + opathkey->pk_strategy, + opathkey->pk_nulls_first); + + /* + * Don't generate redundant pathkeys (which can happen if multiple + * mergeclauses refer to the same EC). Because we do this, the output + * pathkey list isn't necessarily ordered like the mergeclauses, which + * complicates life for create_mergejoin_plan(). But if we didn't, + * we'd have a noncanonical sort key list, which would be bad; for one + * reason, it certainly wouldn't match any available sort order for + * the input relation. + */ + if (!pathkey_is_redundant(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); + } + + return pathkeys; +} + +/* + * trim_mergeclauses_for_inner_pathkeys + * This routine trims a list of mergeclauses to include just those that + * work with a specified ordering for the join's inner relation. + * + * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses for the + * join relation being formed, in an order known to work for the + * currently-considered sort ordering of the join's outer rel. + * 'pathkeys' is a pathkeys list showing the ordering of an inner-rel path; + * it should be equal to, or a truncation of, the result of + * make_inner_pathkeys_for_merge for these mergeclauses. + * + * What we return will be a prefix of the given mergeclauses list. + * + * We need this logic because make_inner_pathkeys_for_merge's result isn't + * necessarily in the same order as the mergeclauses. That means that if we + * consider an inner-rel pathkey list that is a truncation of that result, + * we might need to drop mergeclauses even though they match a surviving inner + * pathkey. This happens when they are to the right of a mergeclause that + * matches a removed inner pathkey. + * + * The mergeclauses must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + */ +List * +trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root, + List *mergeclauses, + List *pathkeys) +{ + List *new_mergeclauses = NIL; + PathKey *pathkey; + EquivalenceClass *pathkey_ec; + bool matched_pathkey; + ListCell *lip; + ListCell *i; + + /* No pathkeys => no mergeclauses (though we don't expect this case) */ + if (pathkeys == NIL) + return NIL; + /* Initialize to consider first pathkey */ + lip = list_head(pathkeys); + pathkey = (PathKey *) lfirst(lip); + pathkey_ec = pathkey->pk_eclass; + lip = lnext(pathkeys, lip); + matched_pathkey = false; + + /* Scan mergeclauses to see how many we can use */ + foreach(i, mergeclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); + EquivalenceClass *clause_ec; + + /* Assume we needn't do update_mergeclause_eclasses again here */ + + /* Check clause's inner-rel EC against current pathkey */ + clause_ec = rinfo->outer_is_left ? + rinfo->right_ec : rinfo->left_ec; + + /* If we don't have a match, attempt to advance to next pathkey */ + if (clause_ec != pathkey_ec) + { + /* If we had no clauses matching this inner pathkey, must stop */ + if (!matched_pathkey) + break; + + /* Advance to next inner pathkey, if any */ + if (lip == NULL) + break; + pathkey = (PathKey *) lfirst(lip); + pathkey_ec = pathkey->pk_eclass; + lip = lnext(pathkeys, lip); + matched_pathkey = false; + } + + /* If mergeclause matches current inner pathkey, we can use it */ + if (clause_ec == pathkey_ec) + { + new_mergeclauses = lappend(new_mergeclauses, rinfo); + matched_pathkey = true; + } + else + { + /* Else, no hope of adding any more mergeclauses */ + break; + } + } + + return new_mergeclauses; +} + + +/**************************************************************************** + * PATHKEY USEFULNESS CHECKS + * + * We only want to remember as many of the pathkeys of a path as have some + * potential use, either for subsequent mergejoins or for meeting the query's + * requested output ordering. This ensures that add_path() won't consider + * a path to have a usefully different ordering unless it really is useful. + * These routines check for usefulness of given pathkeys. + ****************************************************************************/ + +/* + * pathkeys_useful_for_merging + * Count the number of pathkeys that may be useful for mergejoins + * above the given relation. + * + * We consider a pathkey potentially useful if it corresponds to the merge + * ordering of either side of any joinclause for the rel. This might be + * overoptimistic, since joinclauses that require different other relations + * might never be usable at the same time, but trying to be exact is likely + * to be more trouble than it's worth. + * + * To avoid doubling the number of mergejoin paths considered, we would like + * to consider only one of the two scan directions (ASC or DESC) as useful + * for merging for any given target column. The choice is arbitrary unless + * one of the directions happens to match an ORDER BY key, in which case + * that direction should be preferred, in hopes of avoiding a final sort step. + * right_merge_direction() implements this heuristic. + */ +static int +pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys) +{ + int useful = 0; + ListCell *i; + + foreach(i, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(i); + bool matched = false; + ListCell *j; + + /* If "wrong" direction, not useful for merging */ + if (!right_merge_direction(root, pathkey)) + break; + + /* + * First look into the EquivalenceClass of the pathkey, to see if + * there are any members not yet joined to the rel. If so, it's + * surely possible to generate a mergejoin clause using them. + */ + if (rel->has_eclass_joins && + eclass_useful_for_merging(root, pathkey->pk_eclass, rel)) + matched = true; + else + { + /* + * Otherwise search the rel's joininfo list, which contains + * non-EquivalenceClass-derivable join clauses that might + * nonetheless be mergejoinable. + */ + foreach(j, rel->joininfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); + + if (restrictinfo->mergeopfamilies == NIL) + continue; + update_mergeclause_eclasses(root, restrictinfo); + + if (pathkey->pk_eclass == restrictinfo->left_ec || + pathkey->pk_eclass == restrictinfo->right_ec) + { + matched = true; + break; + } + } + } + + /* + * If we didn't find a mergeclause, we're done --- any additional + * sort-key positions in the pathkeys are useless. (But we can still + * mergejoin if we found at least one mergeclause.) + */ + if (matched) + useful++; + else + break; + } + + return useful; +} + +/* + * right_merge_direction + * Check whether the pathkey embodies the preferred sort direction + * for merging its target column. + */ +static bool +right_merge_direction(PlannerInfo *root, PathKey *pathkey) +{ + ListCell *l; + + foreach(l, root->query_pathkeys) + { + PathKey *query_pathkey = (PathKey *) lfirst(l); + + if (pathkey->pk_eclass == query_pathkey->pk_eclass && + pathkey->pk_opfamily == query_pathkey->pk_opfamily) + { + /* + * Found a matching query sort column. Prefer this pathkey's + * direction iff it matches. Note that we ignore pk_nulls_first, + * which means that a sort might be needed anyway ... but we still + * want to prefer only one of the two possible directions, and we + * might as well use this one. + */ + return (pathkey->pk_strategy == query_pathkey->pk_strategy); + } + } + + /* If no matching ORDER BY request, prefer the ASC direction */ + return (pathkey->pk_strategy == BTLessStrategyNumber); +} + +/* + * pathkeys_useful_for_ordering + * Count the number of pathkeys that are useful for meeting the + * query's requested output ordering. + * + * Because we the have the possibility of incremental sort, a prefix list of + * keys is potentially useful for improving the performance of the requested + * ordering. Thus we return 0, if no valuable keys are found, or the number + * of leading keys shared by the list and the requested ordering.. + */ +static int +pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) +{ + int n_common_pathkeys; + + if (root->query_pathkeys == NIL) + return 0; /* no special ordering requested */ + + if (pathkeys == NIL) + return 0; /* unordered path */ + + (void) pathkeys_count_contained_in(root->query_pathkeys, pathkeys, + &n_common_pathkeys); + + return n_common_pathkeys; +} + +/* + * truncate_useless_pathkeys + * Shorten the given pathkey list to just the useful pathkeys. + */ +List * +truncate_useless_pathkeys(PlannerInfo *root, + RelOptInfo *rel, + List *pathkeys) +{ + int nuseful; + int nuseful2; + + nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); + nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); + if (nuseful2 > nuseful) + nuseful = nuseful2; + + /* + * Note: not safe to modify input list destructively, but we can avoid + * copying the list if we're not actually going to change it + */ + if (nuseful == 0) + return NIL; + else if (nuseful == list_length(pathkeys)) + return pathkeys; + else + return list_truncate(list_copy(pathkeys), nuseful); +} + +/* + * has_useful_pathkeys + * Detect whether the specified rel could have any pathkeys that are + * useful according to truncate_useless_pathkeys(). + * + * This is a cheap test that lets us skip building pathkeys at all in very + * simple queries. It's OK to err in the direction of returning "true" when + * there really aren't any usable pathkeys, but erring in the other direction + * is bad --- so keep this in sync with the routines above! + * + * We could make the test more complex, for example checking to see if any of + * the joinclauses are really mergejoinable, but that likely wouldn't win + * often enough to repay the extra cycles. Queries with neither a join nor + * a sort are reasonably common, though, so this much work seems worthwhile. + */ +bool +has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) +{ + if (rel->joininfo != NIL || rel->has_eclass_joins) + return true; /* might be able to use pathkeys for merging */ + if (root->query_pathkeys != NIL) + return true; /* might be able to use them for ordering */ + return false; /* definitely useless */ +} diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c new file mode 100644 index 0000000..0725d95 --- /dev/null +++ b/src/backend/optimizer/path/tidpath.c @@ -0,0 +1,528 @@ +/*------------------------------------------------------------------------- + * + * tidpath.c + * Routines to determine which TID conditions are usable for scanning + * a given relation, and create TidPaths and TidRangePaths accordingly. + * + * For TidPaths, we look for WHERE conditions of the form + * "CTID = pseudoconstant", which can be implemented by just fetching + * the tuple directly via heap_fetch(). We can also handle OR'd conditions + * such as (CTID = const1) OR (CTID = const2), as well as ScalarArrayOpExpr + * conditions of the form CTID = ANY(pseudoconstant_array). In particular + * this allows + * WHERE ctid IN (tid1, tid2, ...) + * + * As with indexscans, our definition of "pseudoconstant" is pretty liberal: + * we allow anything that doesn't involve a volatile function or a Var of + * the relation under consideration. Vars belonging to other relations of + * the query are allowed, giving rise to parameterized TID scans. + * + * We also support "WHERE CURRENT OF cursor" conditions (CurrentOfExpr), + * which amount to "CTID = run-time-determined-TID". These could in + * theory be translated to a simple comparison of CTID to the result of + * a function, but in practice it works better to keep the special node + * representation all the way through to execution. + * + * Additionally, TidRangePaths may be created for conditions of the form + * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=, and + * AND-clauses composed of such conditions. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/path/tidpath.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/sysattr.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_type.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/restrictinfo.h" + + +/* + * Does this Var represent the CTID column of the specified baserel? + */ +static inline bool +IsCTIDVar(Var *var, RelOptInfo *rel) +{ + /* The vartype check is strictly paranoia */ + if (var->varattno == SelfItemPointerAttributeNumber && + var->vartype == TIDOID && + var->varno == rel->relid && + var->varlevelsup == 0) + return true; + return false; +} + +/* + * Check to see if a RestrictInfo is of the form + * CTID OP pseudoconstant + * or + * pseudoconstant OP CTID + * where OP is a binary operation, the CTID Var belongs to relation "rel", + * and nothing on the other side of the clause does. + */ +static bool +IsBinaryTidClause(RestrictInfo *rinfo, RelOptInfo *rel) +{ + OpExpr *node; + Node *arg1, + *arg2, + *other; + Relids other_relids; + + /* Must be an OpExpr */ + if (!is_opclause(rinfo->clause)) + return false; + node = (OpExpr *) rinfo->clause; + + /* OpExpr must have two arguments */ + if (list_length(node->args) != 2) + return false; + arg1 = linitial(node->args); + arg2 = lsecond(node->args); + + /* Look for CTID as either argument */ + other = NULL; + other_relids = NULL; + if (arg1 && IsA(arg1, Var) && + IsCTIDVar((Var *) arg1, rel)) + { + other = arg2; + other_relids = rinfo->right_relids; + } + if (!other && arg2 && IsA(arg2, Var) && + IsCTIDVar((Var *) arg2, rel)) + { + other = arg1; + other_relids = rinfo->left_relids; + } + if (!other) + return false; + + /* The other argument must be a pseudoconstant */ + if (bms_is_member(rel->relid, other_relids) || + contain_volatile_functions(other)) + return false; + + return true; /* success */ +} + +/* + * Check to see if a RestrictInfo is of the form + * CTID = pseudoconstant + * or + * pseudoconstant = CTID + * where the CTID Var belongs to relation "rel", and nothing on the + * other side of the clause does. + */ +static bool +IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel) +{ + if (!IsBinaryTidClause(rinfo, rel)) + return false; + + if (((OpExpr *) rinfo->clause)->opno == TIDEqualOperator) + return true; + + return false; +} + +/* + * Check to see if a RestrictInfo is of the form + * CTID OP pseudoconstant + * or + * pseudoconstant OP CTID + * where OP is a range operator such as <, <=, >, or >=, the CTID Var belongs + * to relation "rel", and nothing on the other side of the clause does. + */ +static bool +IsTidRangeClause(RestrictInfo *rinfo, RelOptInfo *rel) +{ + Oid opno; + + if (!IsBinaryTidClause(rinfo, rel)) + return false; + opno = ((OpExpr *) rinfo->clause)->opno; + + if (opno == TIDLessOperator || opno == TIDLessEqOperator || + opno == TIDGreaterOperator || opno == TIDGreaterEqOperator) + return true; + + return false; +} + +/* + * Check to see if a RestrictInfo is of the form + * CTID = ANY (pseudoconstant_array) + * where the CTID Var belongs to relation "rel", and nothing on the + * other side of the clause does. + */ +static bool +IsTidEqualAnyClause(PlannerInfo *root, RestrictInfo *rinfo, RelOptInfo *rel) +{ + ScalarArrayOpExpr *node; + Node *arg1, + *arg2; + + /* Must be a ScalarArrayOpExpr */ + if (!(rinfo->clause && IsA(rinfo->clause, ScalarArrayOpExpr))) + return false; + node = (ScalarArrayOpExpr *) rinfo->clause; + + /* Operator must be tideq */ + if (node->opno != TIDEqualOperator) + return false; + if (!node->useOr) + return false; + Assert(list_length(node->args) == 2); + arg1 = linitial(node->args); + arg2 = lsecond(node->args); + + /* CTID must be first argument */ + if (arg1 && IsA(arg1, Var) && + IsCTIDVar((Var *) arg1, rel)) + { + /* The other argument must be a pseudoconstant */ + if (bms_is_member(rel->relid, pull_varnos(root, arg2)) || + contain_volatile_functions(arg2)) + return false; + + return true; /* success */ + } + + return false; +} + +/* + * Check to see if a RestrictInfo is a CurrentOfExpr referencing "rel". + */ +static bool +IsCurrentOfClause(RestrictInfo *rinfo, RelOptInfo *rel) +{ + CurrentOfExpr *node; + + /* Must be a CurrentOfExpr */ + if (!(rinfo->clause && IsA(rinfo->clause, CurrentOfExpr))) + return false; + node = (CurrentOfExpr *) rinfo->clause; + + /* If it references this rel, we're good */ + if (node->cvarno == rel->relid) + return true; + + return false; +} + +/* + * Extract a set of CTID conditions from the given RestrictInfo + * + * Returns a List of CTID qual RestrictInfos for the specified rel (with + * implicit OR semantics across the list), or NIL if there are no usable + * conditions. + * + * This function considers only base cases; AND/OR combination is handled + * below. Therefore the returned List never has more than one element. + * (Using a List may seem a bit weird, but it simplifies the caller.) + */ +static List * +TidQualFromRestrictInfo(PlannerInfo *root, RestrictInfo *rinfo, RelOptInfo *rel) +{ + /* + * We may ignore pseudoconstant clauses (they can't contain Vars, so could + * not match anyway). + */ + if (rinfo->pseudoconstant) + return NIL; + + /* + * If clause must wait till after some lower-security-level restriction + * clause, reject it. + */ + if (!restriction_is_securely_promotable(rinfo, rel)) + return NIL; + + /* + * Check all base cases. If we get a match, return the clause. + */ + if (IsTidEqualClause(rinfo, rel) || + IsTidEqualAnyClause(root, rinfo, rel) || + IsCurrentOfClause(rinfo, rel)) + return list_make1(rinfo); + + return NIL; +} + +/* + * Extract a set of CTID conditions from implicit-AND List of RestrictInfos + * + * Returns a List of CTID qual RestrictInfos for the specified rel (with + * implicit OR semantics across the list), or NIL if there are no usable + * equality conditions. + * + * This function is just concerned with handling AND/OR recursion. + */ +static List * +TidQualFromRestrictInfoList(PlannerInfo *root, List *rlist, RelOptInfo *rel) +{ + List *rlst = NIL; + ListCell *l; + + foreach(l, rlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (restriction_is_or_clause(rinfo)) + { + ListCell *j; + + /* + * We must be able to extract a CTID condition from every + * sub-clause of an OR, or we can't use it. + */ + foreach(j, ((BoolExpr *) rinfo->orclause)->args) + { + Node *orarg = (Node *) lfirst(j); + List *sublist; + + /* OR arguments should be ANDs or sub-RestrictInfos */ + if (is_andclause(orarg)) + { + List *andargs = ((BoolExpr *) orarg)->args; + + /* Recurse in case there are sub-ORs */ + sublist = TidQualFromRestrictInfoList(root, andargs, rel); + } + else + { + RestrictInfo *rinfo = castNode(RestrictInfo, orarg); + + Assert(!restriction_is_or_clause(rinfo)); + sublist = TidQualFromRestrictInfo(root, rinfo, rel); + } + + /* + * If nothing found in this arm, we can't do anything with + * this OR clause. + */ + if (sublist == NIL) + { + rlst = NIL; /* forget anything we had */ + break; /* out of loop over OR args */ + } + + /* + * OK, continue constructing implicitly-OR'ed result list. + */ + rlst = list_concat(rlst, sublist); + } + } + else + { + /* Not an OR clause, so handle base cases */ + rlst = TidQualFromRestrictInfo(root, rinfo, rel); + } + + /* + * Stop as soon as we find any usable CTID condition. In theory we + * could get CTID equality conditions from different AND'ed clauses, + * in which case we could try to pick the most efficient one. In + * practice, such usage seems very unlikely, so we don't bother; we + * just exit as soon as we find the first candidate. + */ + if (rlst) + break; + } + + return rlst; +} + +/* + * Extract a set of CTID range conditions from implicit-AND List of RestrictInfos + * + * Returns a List of CTID range qual RestrictInfos for the specified rel + * (with implicit AND semantics across the list), or NIL if there are no + * usable range conditions or if the rel's table AM does not support TID range + * scans. + */ +static List * +TidRangeQualFromRestrictInfoList(List *rlist, RelOptInfo *rel) +{ + List *rlst = NIL; + ListCell *l; + + if ((rel->amflags & AMFLAG_HAS_TID_RANGE) == 0) + return NIL; + + foreach(l, rlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (IsTidRangeClause(rinfo, rel)) + rlst = lappend(rlst, rinfo); + } + + return rlst; +} + +/* + * Given a list of join clauses involving our rel, create a parameterized + * TidPath for each one that is a suitable TidEqual clause. + * + * In principle we could combine clauses that reference the same outer rels, + * but it doesn't seem like such cases would arise often enough to be worth + * troubling over. + */ +static void +BuildParameterizedTidPaths(PlannerInfo *root, RelOptInfo *rel, List *clauses) +{ + ListCell *l; + + foreach(l, clauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + List *tidquals; + Relids required_outer; + + /* + * Validate whether each clause is actually usable; we must check this + * even when examining clauses generated from an EquivalenceClass, + * since they might not satisfy the restriction on not having Vars of + * our rel on the other side, or somebody might've built an operator + * class that accepts type "tid" but has other operators in it. + * + * We currently consider only TidEqual join clauses. In principle we + * might find a suitable ScalarArrayOpExpr in the rel's joininfo list, + * but it seems unlikely to be worth expending the cycles to check. + * And we definitely won't find a CurrentOfExpr here. Hence, we don't + * use TidQualFromRestrictInfo; but this must match that function + * otherwise. + */ + if (rinfo->pseudoconstant || + !restriction_is_securely_promotable(rinfo, rel) || + !IsTidEqualClause(rinfo, rel)) + continue; + + /* + * Check if clause can be moved to this rel; this is probably + * redundant when considering EC-derived clauses, but we must check it + * for "loose" join clauses. + */ + if (!join_clause_is_movable_to(rinfo, rel)) + continue; + + /* OK, make list of clauses for this path */ + tidquals = list_make1(rinfo); + + /* Compute required outer rels for this path */ + required_outer = bms_union(rinfo->required_relids, rel->lateral_relids); + required_outer = bms_del_member(required_outer, rel->relid); + + add_path(rel, (Path *) create_tidscan_path(root, rel, tidquals, + required_outer)); + } +} + +/* + * Test whether an EquivalenceClass member matches our rel's CTID Var. + * + * This is a callback for use by generate_implied_equalities_for_column. + */ +static bool +ec_member_matches_ctid(PlannerInfo *root, RelOptInfo *rel, + EquivalenceClass *ec, EquivalenceMember *em, + void *arg) +{ + if (em->em_expr && IsA(em->em_expr, Var) && + IsCTIDVar((Var *) em->em_expr, rel)) + return true; + return false; +} + +/* + * create_tidscan_paths + * Create paths corresponding to direct TID scans of the given rel. + * + * Candidate paths are added to the rel's pathlist (using add_path). + */ +void +create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) +{ + List *tidquals; + List *tidrangequals; + + /* + * If any suitable quals exist in the rel's baserestrict list, generate a + * plain (unparameterized) TidPath with them. + */ + tidquals = TidQualFromRestrictInfoList(root, rel->baserestrictinfo, rel); + + if (tidquals != NIL) + { + /* + * This path uses no join clauses, but it could still have required + * parameterization due to LATERAL refs in its tlist. + */ + Relids required_outer = rel->lateral_relids; + + add_path(rel, (Path *) create_tidscan_path(root, rel, tidquals, + required_outer)); + } + + /* + * If there are range quals in the baserestrict list, generate a + * TidRangePath. + */ + tidrangequals = TidRangeQualFromRestrictInfoList(rel->baserestrictinfo, + rel); + + if (tidrangequals != NIL) + { + /* + * This path uses no join clauses, but it could still have required + * parameterization due to LATERAL refs in its tlist. + */ + Relids required_outer = rel->lateral_relids; + + add_path(rel, (Path *) create_tidrangescan_path(root, rel, + tidrangequals, + required_outer)); + } + + /* + * Try to generate parameterized TidPaths using equality clauses extracted + * from EquivalenceClasses. (This is important since simple "t1.ctid = + * t2.ctid" clauses will turn into ECs.) + */ + if (rel->has_eclass_joins) + { + List *clauses; + + /* Generate clauses, skipping any that join to lateral_referencers */ + clauses = generate_implied_equalities_for_column(root, + rel, + ec_member_matches_ctid, + NULL, + rel->lateral_referencers); + + /* Generate a path for each usable join clause */ + BuildParameterizedTidPaths(root, rel, clauses); + } + + /* + * Also consider parameterized TidPaths using "loose" join quals. Quals + * of the form "t1.ctid = t2.ctid" would turn into these if they are outer + * join quals, for example. + */ + BuildParameterizedTidPaths(root, rel, rel->joininfo); +} |