summaryrefslogtreecommitdiffstats
path: root/src/backend/optimizer/path
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:17:33 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:17:33 +0000
commit5e45211a64149b3c659b90ff2de6fa982a5a93ed (patch)
tree739caf8c461053357daa9f162bef34516c7bf452 /src/backend/optimizer/path
parentInitial commit. (diff)
downloadpostgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.tar.xz
postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.zip
Adding upstream version 15.5.upstream/15.5
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/Makefile26
-rw-r--r--src/backend/optimizer/path/allpaths.c4668
-rw-r--r--src/backend/optimizer/path/clausesel.c1000
-rw-r--r--src/backend/optimizer/path/costsize.c6221
-rw-r--r--src/backend/optimizer/path/equivclass.c3226
-rw-r--r--src/backend/optimizer/path/indxpath.c3817
-rw-r--r--src/backend/optimizer/path/joinpath.c2367
-rw-r--r--src/backend/optimizer/path/joinrels.c1783
-rw-r--r--src/backend/optimizer/path/pathkeys.c1917
-rw-r--r--src/backend/optimizer/path/tidpath.c528
10 files changed, 25553 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile
new file mode 100644
index 0000000..1e199ff
--- /dev/null
+++ b/src/backend/optimizer/path/Makefile
@@ -0,0 +1,26 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for optimizer/path
+#
+# IDENTIFICATION
+# src/backend/optimizer/path/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/optimizer/path
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = \
+ allpaths.o \
+ clausesel.o \
+ costsize.o \
+ equivclass.o \
+ indxpath.o \
+ joinpath.o \
+ joinrels.o \
+ pathkeys.o \
+ tidpath.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
new file mode 100644
index 0000000..4e02439
--- /dev/null
+++ b/src/backend/optimizer/path/allpaths.c
@@ -0,0 +1,4668 @@
+/*-------------------------------------------------------------------------
+ *
+ * allpaths.c
+ * Routines to find possible search paths for processing a query
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/path/allpaths.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+#include <math.h>
+
+#include "access/sysattr.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_proc.h"
+#include "foreign/fdwapi.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#ifdef OPTIMIZER_DEBUG
+#include "nodes/print.h"
+#endif
+#include "optimizer/appendinfo.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/geqo.h"
+#include "optimizer/inherit.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/plancat.h"
+#include "optimizer/planner.h"
+#include "optimizer/restrictinfo.h"
+#include "optimizer/tlist.h"
+#include "parser/parse_clause.h"
+#include "parser/parsetree.h"
+#include "partitioning/partbounds.h"
+#include "partitioning/partprune.h"
+#include "rewrite/rewriteManip.h"
+#include "utils/lsyscache.h"
+
+
+/* Bitmask flags for pushdown_safety_info.unsafeFlags */
+#define UNSAFE_HAS_VOLATILE_FUNC (1 << 0)
+#define UNSAFE_HAS_SET_FUNC (1 << 1)
+#define UNSAFE_NOTIN_DISTINCTON_CLAUSE (1 << 2)
+#define UNSAFE_NOTIN_PARTITIONBY_CLAUSE (1 << 3)
+#define UNSAFE_TYPE_MISMATCH (1 << 4)
+
+/* results of subquery_is_pushdown_safe */
+typedef struct pushdown_safety_info
+{
+ unsigned char *unsafeFlags; /* bitmask of reasons why this target list
+ * column is unsafe for qual pushdown, or 0 if
+ * no reason. */
+ bool unsafeVolatile; /* don't push down volatile quals */
+ bool unsafeLeaky; /* don't push down leaky quals */
+} pushdown_safety_info;
+
+/* Return type for qual_is_pushdown_safe */
+typedef enum pushdown_safe_type
+{
+ PUSHDOWN_UNSAFE, /* unsafe to push qual into subquery */
+ PUSHDOWN_SAFE, /* safe to push qual into subquery */
+ PUSHDOWN_WINDOWCLAUSE_RUNCOND /* unsafe, but may work as WindowClause
+ * run condition */
+} pushdown_safe_type;
+
+/* These parameters are set by GUC */
+bool enable_geqo = false; /* just in case GUC doesn't set it */
+int geqo_threshold;
+int min_parallel_table_scan_size;
+int min_parallel_index_scan_size;
+
+/* Hook for plugins to get control in set_rel_pathlist() */
+set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
+
+/* Hook for plugins to replace standard_join_search() */
+join_search_hook_type join_search_hook = NULL;
+
+
+static void set_base_rel_consider_startup(PlannerInfo *root);
+static void set_base_rel_sizes(PlannerInfo *root);
+static void set_base_rel_pathlists(PlannerInfo *root);
+static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte);
+static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte);
+static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
+static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte);
+static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte);
+static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
+ List *live_childrels,
+ List *all_child_pathkeys);
+static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
+ RelOptInfo *rel,
+ Relids required_outer);
+static void accumulate_append_subpath(Path *path,
+ List **subpaths,
+ List **special_subpaths);
+static Path *get_singleton_append_subpath(Path *path);
+static void set_dummy_rel_pathlist(RelOptInfo *rel);
+static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte);
+static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
+static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
+ pushdown_safety_info *safetyInfo);
+static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
+ pushdown_safety_info *safetyInfo);
+static void check_output_expressions(Query *subquery,
+ pushdown_safety_info *safetyInfo);
+static void compare_tlist_datatypes(List *tlist, List *colTypes,
+ pushdown_safety_info *safetyInfo);
+static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query);
+static pushdown_safe_type qual_is_pushdown_safe(Query *subquery, Index rti,
+ RestrictInfo *rinfo,
+ pushdown_safety_info *safetyInfo);
+static void subquery_push_qual(Query *subquery,
+ RangeTblEntry *rte, Index rti, Node *qual);
+static void recurse_push_qual(Node *setOp, Query *topquery,
+ RangeTblEntry *rte, Index rti, Node *qual);
+static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel,
+ Bitmapset *extra_used_attrs);
+
+
+/*
+ * make_one_rel
+ * Finds all possible access paths for executing a query, returning a
+ * single rel that represents the join of all base rels in the query.
+ */
+RelOptInfo *
+make_one_rel(PlannerInfo *root, List *joinlist)
+{
+ RelOptInfo *rel;
+ Index rti;
+ double total_pages;
+
+ /*
+ * Construct the all_baserels Relids set.
+ */
+ root->all_baserels = NULL;
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *brel = root->simple_rel_array[rti];
+
+ /* there may be empty slots corresponding to non-baserel RTEs */
+ if (brel == NULL)
+ continue;
+
+ Assert(brel->relid == rti); /* sanity check on array */
+
+ /* ignore RTEs that are "other rels" */
+ if (brel->reloptkind != RELOPT_BASEREL)
+ continue;
+
+ root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
+ }
+
+ /* Mark base rels as to whether we care about fast-start plans */
+ set_base_rel_consider_startup(root);
+
+ /*
+ * Compute size estimates and consider_parallel flags for each base rel.
+ */
+ set_base_rel_sizes(root);
+
+ /*
+ * We should now have size estimates for every actual table involved in
+ * the query, and we also know which if any have been deleted from the
+ * query by join removal, pruned by partition pruning, or eliminated by
+ * constraint exclusion. So we can now compute total_table_pages.
+ *
+ * Note that appendrels are not double-counted here, even though we don't
+ * bother to distinguish RelOptInfos for appendrel parents, because the
+ * parents will have pages = 0.
+ *
+ * XXX if a table is self-joined, we will count it once per appearance,
+ * which perhaps is the wrong thing ... but that's not completely clear,
+ * and detecting self-joins here is difficult, so ignore it for now.
+ */
+ total_pages = 0;
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *brel = root->simple_rel_array[rti];
+
+ if (brel == NULL)
+ continue;
+
+ Assert(brel->relid == rti); /* sanity check on array */
+
+ if (IS_DUMMY_REL(brel))
+ continue;
+
+ if (IS_SIMPLE_REL(brel))
+ total_pages += (double) brel->pages;
+ }
+ root->total_table_pages = total_pages;
+
+ /*
+ * Generate access paths for each base rel.
+ */
+ set_base_rel_pathlists(root);
+
+ /*
+ * Generate access paths for the entire join tree.
+ */
+ rel = make_rel_from_joinlist(root, joinlist);
+
+ /*
+ * The result should join all and only the query's base rels.
+ */
+ Assert(bms_equal(rel->relids, root->all_baserels));
+
+ return rel;
+}
+
+/*
+ * set_base_rel_consider_startup
+ * Set the consider_[param_]startup flags for each base-relation entry.
+ *
+ * For the moment, we only deal with consider_param_startup here; because the
+ * logic for consider_startup is pretty trivial and is the same for every base
+ * relation, we just let build_simple_rel() initialize that flag correctly to
+ * start with. If that logic ever gets more complicated it would probably
+ * be better to move it here.
+ */
+static void
+set_base_rel_consider_startup(PlannerInfo *root)
+{
+ /*
+ * Since parameterized paths can only be used on the inside of a nestloop
+ * join plan, there is usually little value in considering fast-start
+ * plans for them. However, for relations that are on the RHS of a SEMI
+ * or ANTI join, a fast-start plan can be useful because we're only going
+ * to care about fetching one tuple anyway.
+ *
+ * To minimize growth of planning time, we currently restrict this to
+ * cases where the RHS is a single base relation, not a join; there is no
+ * provision for consider_param_startup to get set at all on joinrels.
+ * Also we don't worry about appendrels. costsize.c's costing rules for
+ * nestloop semi/antijoins don't consider such cases either.
+ */
+ ListCell *lc;
+
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+ int varno;
+
+ if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) &&
+ bms_get_singleton_member(sjinfo->syn_righthand, &varno))
+ {
+ RelOptInfo *rel = find_base_rel(root, varno);
+
+ rel->consider_param_startup = true;
+ }
+ }
+}
+
+/*
+ * set_base_rel_sizes
+ * Set the size estimates (rows and widths) for each base-relation entry.
+ * Also determine whether to consider parallel paths for base relations.
+ *
+ * We do this in a separate pass over the base rels so that rowcount
+ * estimates are available for parameterized path generation, and also so
+ * that each rel's consider_parallel flag is set correctly before we begin to
+ * generate paths.
+ */
+static void
+set_base_rel_sizes(PlannerInfo *root)
+{
+ Index rti;
+
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *rel = root->simple_rel_array[rti];
+ RangeTblEntry *rte;
+
+ /* there may be empty slots corresponding to non-baserel RTEs */
+ if (rel == NULL)
+ continue;
+
+ Assert(rel->relid == rti); /* sanity check on array */
+
+ /* ignore RTEs that are "other rels" */
+ if (rel->reloptkind != RELOPT_BASEREL)
+ continue;
+
+ rte = root->simple_rte_array[rti];
+
+ /*
+ * If parallelism is allowable for this query in general, see whether
+ * it's allowable for this rel in particular. We have to do this
+ * before set_rel_size(), because (a) if this rel is an inheritance
+ * parent, set_append_rel_size() will use and perhaps change the rel's
+ * consider_parallel flag, and (b) for some RTE types, set_rel_size()
+ * goes ahead and makes paths immediately.
+ */
+ if (root->glob->parallelModeOK)
+ set_rel_consider_parallel(root, rel, rte);
+
+ set_rel_size(root, rel, rti, rte);
+ }
+}
+
+/*
+ * set_base_rel_pathlists
+ * Finds all paths available for scanning each base-relation entry.
+ * Sequential scan and any available indices are considered.
+ * Each useful path is attached to its relation's 'pathlist' field.
+ */
+static void
+set_base_rel_pathlists(PlannerInfo *root)
+{
+ Index rti;
+
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *rel = root->simple_rel_array[rti];
+
+ /* there may be empty slots corresponding to non-baserel RTEs */
+ if (rel == NULL)
+ continue;
+
+ Assert(rel->relid == rti); /* sanity check on array */
+
+ /* ignore RTEs that are "other rels" */
+ if (rel->reloptkind != RELOPT_BASEREL)
+ continue;
+
+ set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
+ }
+}
+
+/*
+ * set_rel_size
+ * Set size estimates for a base relation
+ */
+static void
+set_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte)
+{
+ if (rel->reloptkind == RELOPT_BASEREL &&
+ relation_excluded_by_constraints(root, rel, rte))
+ {
+ /*
+ * We proved we don't need to scan the rel via constraint exclusion,
+ * so set up a single dummy path for it. Here we only check this for
+ * regular baserels; if it's an otherrel, CE was already checked in
+ * set_append_rel_size().
+ *
+ * In this case, we go ahead and set up the relation's path right away
+ * instead of leaving it for set_rel_pathlist to do. This is because
+ * we don't have a convention for marking a rel as dummy except by
+ * assigning a dummy path to it.
+ */
+ set_dummy_rel_pathlist(rel);
+ }
+ else if (rte->inh)
+ {
+ /* It's an "append relation", process accordingly */
+ set_append_rel_size(root, rel, rti, rte);
+ }
+ else
+ {
+ switch (rel->rtekind)
+ {
+ case RTE_RELATION:
+ if (rte->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ /* Foreign table */
+ set_foreign_size(root, rel, rte);
+ }
+ else if (rte->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ /*
+ * We could get here if asked to scan a partitioned table
+ * with ONLY. In that case we shouldn't scan any of the
+ * partitions, so mark it as a dummy rel.
+ */
+ set_dummy_rel_pathlist(rel);
+ }
+ else if (rte->tablesample != NULL)
+ {
+ /* Sampled relation */
+ set_tablesample_rel_size(root, rel, rte);
+ }
+ else
+ {
+ /* Plain relation */
+ set_plain_rel_size(root, rel, rte);
+ }
+ break;
+ case RTE_SUBQUERY:
+
+ /*
+ * Subqueries don't support making a choice between
+ * parameterized and unparameterized paths, so just go ahead
+ * and build their paths immediately.
+ */
+ set_subquery_pathlist(root, rel, rti, rte);
+ break;
+ case RTE_FUNCTION:
+ set_function_size_estimates(root, rel);
+ break;
+ case RTE_TABLEFUNC:
+ set_tablefunc_size_estimates(root, rel);
+ break;
+ case RTE_VALUES:
+ set_values_size_estimates(root, rel);
+ break;
+ case RTE_CTE:
+
+ /*
+ * CTEs don't support making a choice between parameterized
+ * and unparameterized paths, so just go ahead and build their
+ * paths immediately.
+ */
+ if (rte->self_reference)
+ set_worktable_pathlist(root, rel, rte);
+ else
+ set_cte_pathlist(root, rel, rte);
+ break;
+ case RTE_NAMEDTUPLESTORE:
+ /* Might as well just build the path immediately */
+ set_namedtuplestore_pathlist(root, rel, rte);
+ break;
+ case RTE_RESULT:
+ /* Might as well just build the path immediately */
+ set_result_pathlist(root, rel, rte);
+ break;
+ default:
+ elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
+ break;
+ }
+ }
+
+ /*
+ * We insist that all non-dummy rels have a nonzero rowcount estimate.
+ */
+ Assert(rel->rows > 0 || IS_DUMMY_REL(rel));
+}
+
+/*
+ * set_rel_pathlist
+ * Build access paths for a base relation
+ */
+static void
+set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte)
+{
+ if (IS_DUMMY_REL(rel))
+ {
+ /* We already proved the relation empty, so nothing more to do */
+ }
+ else if (rte->inh)
+ {
+ /* It's an "append relation", process accordingly */
+ set_append_rel_pathlist(root, rel, rti, rte);
+ }
+ else
+ {
+ switch (rel->rtekind)
+ {
+ case RTE_RELATION:
+ if (rte->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ /* Foreign table */
+ set_foreign_pathlist(root, rel, rte);
+ }
+ else if (rte->tablesample != NULL)
+ {
+ /* Sampled relation */
+ set_tablesample_rel_pathlist(root, rel, rte);
+ }
+ else
+ {
+ /* Plain relation */
+ set_plain_rel_pathlist(root, rel, rte);
+ }
+ break;
+ case RTE_SUBQUERY:
+ /* Subquery --- fully handled during set_rel_size */
+ break;
+ case RTE_FUNCTION:
+ /* RangeFunction */
+ set_function_pathlist(root, rel, rte);
+ break;
+ case RTE_TABLEFUNC:
+ /* Table Function */
+ set_tablefunc_pathlist(root, rel, rte);
+ break;
+ case RTE_VALUES:
+ /* Values list */
+ set_values_pathlist(root, rel, rte);
+ break;
+ case RTE_CTE:
+ /* CTE reference --- fully handled during set_rel_size */
+ break;
+ case RTE_NAMEDTUPLESTORE:
+ /* tuplestore reference --- fully handled during set_rel_size */
+ break;
+ case RTE_RESULT:
+ /* simple Result --- fully handled during set_rel_size */
+ break;
+ default:
+ elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
+ break;
+ }
+ }
+
+ /*
+ * Allow a plugin to editorialize on the set of Paths for this base
+ * relation. It could add new paths (such as CustomPaths) by calling
+ * add_path(), or add_partial_path() if parallel aware. It could also
+ * delete or modify paths added by the core code.
+ */
+ if (set_rel_pathlist_hook)
+ (*set_rel_pathlist_hook) (root, rel, rti, rte);
+
+ /*
+ * If this is a baserel, we should normally consider gathering any partial
+ * paths we may have created for it. We have to do this after calling the
+ * set_rel_pathlist_hook, else it cannot add partial paths to be included
+ * here.
+ *
+ * However, if this is an inheritance child, skip it. Otherwise, we could
+ * end up with a very large number of gather nodes, each trying to grab
+ * its own pool of workers. Instead, we'll consider gathering partial
+ * paths for the parent appendrel.
+ *
+ * Also, if this is the topmost scan/join rel, we postpone gathering until
+ * the final scan/join targetlist is available (see grouping_planner).
+ */
+ if (rel->reloptkind == RELOPT_BASEREL &&
+ !bms_equal(rel->relids, root->all_baserels))
+ generate_useful_gather_paths(root, rel, false);
+
+ /* Now find the cheapest of the paths for this rel */
+ set_cheapest(rel);
+
+#ifdef OPTIMIZER_DEBUG
+ debug_print_rel(root, rel);
+#endif
+}
+
+/*
+ * set_plain_rel_size
+ * Set size estimates for a plain relation (no subquery, no inheritance)
+ */
+static void
+set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ /*
+ * Test any partial indexes of rel for applicability. We must do this
+ * first since partial unique indexes can affect size estimates.
+ */
+ check_index_predicates(root, rel);
+
+ /* Mark rel with estimated output rows, width, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * If this relation could possibly be scanned from within a worker, then set
+ * its consider_parallel flag.
+ */
+static void
+set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte)
+{
+ /*
+ * The flag has previously been initialized to false, so we can just
+ * return if it becomes clear that we can't safely set it.
+ */
+ Assert(!rel->consider_parallel);
+
+ /* Don't call this if parallelism is disallowed for the entire query. */
+ Assert(root->glob->parallelModeOK);
+
+ /* This should only be called for baserels and appendrel children. */
+ Assert(IS_SIMPLE_REL(rel));
+
+ /* Assorted checks based on rtekind. */
+ switch (rte->rtekind)
+ {
+ case RTE_RELATION:
+
+ /*
+ * Currently, parallel workers can't access the leader's temporary
+ * tables. We could possibly relax this if we wrote all of its
+ * local buffers at the start of the query and made no changes
+ * thereafter (maybe we could allow hint bit changes), and if we
+ * taught the workers to read them. Writing a large number of
+ * temporary buffers could be expensive, though, and we don't have
+ * the rest of the necessary infrastructure right now anyway. So
+ * for now, bail out if we see a temporary table.
+ */
+ if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP)
+ return;
+
+ /*
+ * Table sampling can be pushed down to workers if the sample
+ * function and its arguments are safe.
+ */
+ if (rte->tablesample != NULL)
+ {
+ char proparallel = func_parallel(rte->tablesample->tsmhandler);
+
+ if (proparallel != PROPARALLEL_SAFE)
+ return;
+ if (!is_parallel_safe(root, (Node *) rte->tablesample->args))
+ return;
+ }
+
+ /*
+ * Ask FDWs whether they can support performing a ForeignScan
+ * within a worker. Most often, the answer will be no. For
+ * example, if the nature of the FDW is such that it opens a TCP
+ * connection with a remote server, each parallel worker would end
+ * up with a separate connection, and these connections might not
+ * be appropriately coordinated between workers and the leader.
+ */
+ if (rte->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ Assert(rel->fdwroutine);
+ if (!rel->fdwroutine->IsForeignScanParallelSafe)
+ return;
+ if (!rel->fdwroutine->IsForeignScanParallelSafe(root, rel, rte))
+ return;
+ }
+
+ /*
+ * There are additional considerations for appendrels, which we'll
+ * deal with in set_append_rel_size and set_append_rel_pathlist.
+ * For now, just set consider_parallel based on the rel's own
+ * quals and targetlist.
+ */
+ break;
+
+ case RTE_SUBQUERY:
+
+ /*
+ * There's no intrinsic problem with scanning a subquery-in-FROM
+ * (as distinct from a SubPlan or InitPlan) in a parallel worker.
+ * If the subquery doesn't happen to have any parallel-safe paths,
+ * then flagging it as consider_parallel won't change anything,
+ * but that's true for plain tables, too. We must set
+ * consider_parallel based on the rel's own quals and targetlist,
+ * so that if a subquery path is parallel-safe but the quals and
+ * projection we're sticking onto it are not, we correctly mark
+ * the SubqueryScanPath as not parallel-safe. (Note that
+ * set_subquery_pathlist() might push some of these quals down
+ * into the subquery itself, but that doesn't change anything.)
+ *
+ * We can't push sub-select containing LIMIT/OFFSET to workers as
+ * there is no guarantee that the row order will be fully
+ * deterministic, and applying LIMIT/OFFSET will lead to
+ * inconsistent results at the top-level. (In some cases, where
+ * the result is ordered, we could relax this restriction. But it
+ * doesn't currently seem worth expending extra effort to do so.)
+ */
+ {
+ Query *subquery = castNode(Query, rte->subquery);
+
+ if (limit_needed(subquery))
+ return;
+ }
+ break;
+
+ case RTE_JOIN:
+ /* Shouldn't happen; we're only considering baserels here. */
+ Assert(false);
+ return;
+
+ case RTE_FUNCTION:
+ /* Check for parallel-restricted functions. */
+ if (!is_parallel_safe(root, (Node *) rte->functions))
+ return;
+ break;
+
+ case RTE_TABLEFUNC:
+ /* not parallel safe */
+ return;
+
+ case RTE_VALUES:
+ /* Check for parallel-restricted functions. */
+ if (!is_parallel_safe(root, (Node *) rte->values_lists))
+ return;
+ break;
+
+ case RTE_CTE:
+
+ /*
+ * CTE tuplestores aren't shared among parallel workers, so we
+ * force all CTE scans to happen in the leader. Also, populating
+ * the CTE would require executing a subplan that's not available
+ * in the worker, might be parallel-restricted, and must get
+ * executed only once.
+ */
+ return;
+
+ case RTE_NAMEDTUPLESTORE:
+
+ /*
+ * tuplestore cannot be shared, at least without more
+ * infrastructure to support that.
+ */
+ return;
+
+ case RTE_RESULT:
+ /* RESULT RTEs, in themselves, are no problem. */
+ break;
+ }
+
+ /*
+ * If there's anything in baserestrictinfo that's parallel-restricted, we
+ * give up on parallelizing access to this relation. We could consider
+ * instead postponing application of the restricted quals until we're
+ * above all the parallelism in the plan tree, but it's not clear that
+ * that would be a win in very many cases, and it might be tricky to make
+ * outer join clauses work correctly. It would likely break equivalence
+ * classes, too.
+ */
+ if (!is_parallel_safe(root, (Node *) rel->baserestrictinfo))
+ return;
+
+ /*
+ * Likewise, if the relation's outputs are not parallel-safe, give up.
+ * (Usually, they're just Vars, but sometimes they're not.)
+ */
+ if (!is_parallel_safe(root, (Node *) rel->reltarget->exprs))
+ return;
+
+ /* We have a winner. */
+ rel->consider_parallel = true;
+}
+
+/*
+ * set_plain_rel_pathlist
+ * Build access paths for a plain relation (no subquery, no inheritance)
+ */
+static void
+set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Relids required_outer;
+
+ /*
+ * We don't support pushing join clauses into the quals of a seqscan, but
+ * it could still have required parameterization due to LATERAL refs in
+ * its tlist.
+ */
+ required_outer = rel->lateral_relids;
+
+ /* Consider sequential scan */
+ add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
+
+ /* If appropriate, consider parallel sequential scan */
+ if (rel->consider_parallel && required_outer == NULL)
+ create_plain_partial_paths(root, rel);
+
+ /* Consider index scans */
+ create_index_paths(root, rel);
+
+ /* Consider TID scans */
+ create_tidscan_paths(root, rel);
+}
+
+/*
+ * create_plain_partial_paths
+ * Build partial access paths for parallel scan of a plain relation
+ */
+static void
+create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
+{
+ int parallel_workers;
+
+ parallel_workers = compute_parallel_worker(rel, rel->pages, -1,
+ max_parallel_workers_per_gather);
+
+ /* If any limit was set to zero, the user doesn't want a parallel scan. */
+ if (parallel_workers <= 0)
+ return;
+
+ /* Add an unordered partial path based on a parallel sequential scan. */
+ add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers));
+}
+
+/*
+ * set_tablesample_rel_size
+ * Set size estimates for a sampled relation
+ */
+static void
+set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ TableSampleClause *tsc = rte->tablesample;
+ TsmRoutine *tsm;
+ BlockNumber pages;
+ double tuples;
+
+ /*
+ * Test any partial indexes of rel for applicability. We must do this
+ * first since partial unique indexes can affect size estimates.
+ */
+ check_index_predicates(root, rel);
+
+ /*
+ * Call the sampling method's estimation function to estimate the number
+ * of pages it will read and the number of tuples it will return. (Note:
+ * we assume the function returns sane values.)
+ */
+ tsm = GetTsmRoutine(tsc->tsmhandler);
+ tsm->SampleScanGetSampleSize(root, rel, tsc->args,
+ &pages, &tuples);
+
+ /*
+ * For the moment, because we will only consider a SampleScan path for the
+ * rel, it's okay to just overwrite the pages and tuples estimates for the
+ * whole relation. If we ever consider multiple path types for sampled
+ * rels, we'll need more complication.
+ */
+ rel->pages = pages;
+ rel->tuples = tuples;
+
+ /* Mark rel with estimated output rows, width, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * set_tablesample_rel_pathlist
+ * Build access paths for a sampled relation
+ */
+static void
+set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Relids required_outer;
+ Path *path;
+
+ /*
+ * We don't support pushing join clauses into the quals of a samplescan,
+ * but it could still have required parameterization due to LATERAL refs
+ * in its tlist or TABLESAMPLE arguments.
+ */
+ required_outer = rel->lateral_relids;
+
+ /* Consider sampled scan */
+ path = create_samplescan_path(root, rel, required_outer);
+
+ /*
+ * If the sampling method does not support repeatable scans, we must avoid
+ * plans that would scan the rel multiple times. Ideally, we'd simply
+ * avoid putting the rel on the inside of a nestloop join; but adding such
+ * a consideration to the planner seems like a great deal of complication
+ * to support an uncommon usage of second-rate sampling methods. Instead,
+ * if there is a risk that the query might perform an unsafe join, just
+ * wrap the SampleScan in a Materialize node. We can check for joins by
+ * counting the membership of all_baserels (note that this correctly
+ * counts inheritance trees as single rels). If we're inside a subquery,
+ * we can't easily check whether a join might occur in the outer query, so
+ * just assume one is possible.
+ *
+ * GetTsmRoutine is relatively expensive compared to the other tests here,
+ * so check repeatable_across_scans last, even though that's a bit odd.
+ */
+ if ((root->query_level > 1 ||
+ bms_membership(root->all_baserels) != BMS_SINGLETON) &&
+ !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
+ {
+ path = (Path *) create_material_path(rel, path);
+ }
+
+ add_path(rel, path);
+
+ /* For the moment, at least, there are no other paths to consider */
+}
+
+/*
+ * set_foreign_size
+ * Set size estimates for a foreign table RTE
+ */
+static void
+set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ /* Mark rel with estimated output rows, width, etc */
+ set_foreign_size_estimates(root, rel);
+
+ /* Let FDW adjust the size estimates, if it can */
+ rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid);
+
+ /* ... but do not let it set the rows estimate to zero */
+ rel->rows = clamp_row_est(rel->rows);
+
+ /*
+ * Also, make sure rel->tuples is not insane relative to rel->rows.
+ * Notably, this ensures sanity if pg_class.reltuples contains -1 and the
+ * FDW doesn't do anything to replace that.
+ */
+ rel->tuples = Max(rel->tuples, rel->rows);
+}
+
+/*
+ * set_foreign_pathlist
+ * Build access paths for a foreign table RTE
+ */
+static void
+set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ /* Call the FDW's GetForeignPaths function to generate path(s) */
+ rel->fdwroutine->GetForeignPaths(root, rel, rte->relid);
+}
+
+/*
+ * set_append_rel_size
+ * Set size estimates for a simple "append relation"
+ *
+ * The passed-in rel and RTE represent the entire append relation. The
+ * relation's contents are computed by appending together the output of the
+ * individual member relations. Note that in the non-partitioned inheritance
+ * case, the first member relation is actually the same table as is mentioned
+ * in the parent RTE ... but it has a different RTE and RelOptInfo. This is
+ * a good thing because their outputs are not the same size.
+ */
+static void
+set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte)
+{
+ int parentRTindex = rti;
+ bool has_live_children;
+ double parent_rows;
+ double parent_size;
+ double *parent_attrsizes;
+ int nattrs;
+ ListCell *l;
+
+ /* Guard against stack overflow due to overly deep inheritance tree. */
+ check_stack_depth();
+
+ Assert(IS_SIMPLE_REL(rel));
+
+ /*
+ * If this is a partitioned baserel, set the consider_partitionwise_join
+ * flag; currently, we only consider partitionwise joins with the baserel
+ * if its targetlist doesn't contain a whole-row Var.
+ */
+ if (enable_partitionwise_join &&
+ rel->reloptkind == RELOPT_BASEREL &&
+ rte->relkind == RELKIND_PARTITIONED_TABLE &&
+ rel->attr_needed[InvalidAttrNumber - rel->min_attr] == NULL)
+ rel->consider_partitionwise_join = true;
+
+ /*
+ * Initialize to compute size estimates for whole append relation.
+ *
+ * We handle width estimates by weighting the widths of different child
+ * rels proportionally to their number of rows. This is sensible because
+ * the use of width estimates is mainly to compute the total relation
+ * "footprint" if we have to sort or hash it. To do this, we sum the
+ * total equivalent size (in "double" arithmetic) and then divide by the
+ * total rowcount estimate. This is done separately for the total rel
+ * width and each attribute.
+ *
+ * Note: if you consider changing this logic, beware that child rels could
+ * have zero rows and/or width, if they were excluded by constraints.
+ */
+ has_live_children = false;
+ parent_rows = 0;
+ parent_size = 0;
+ nattrs = rel->max_attr - rel->min_attr + 1;
+ parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
+
+ foreach(l, root->append_rel_list)
+ {
+ AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
+ int childRTindex;
+ RangeTblEntry *childRTE;
+ RelOptInfo *childrel;
+ ListCell *parentvars;
+ ListCell *childvars;
+
+ /* append_rel_list contains all append rels; ignore others */
+ if (appinfo->parent_relid != parentRTindex)
+ continue;
+
+ childRTindex = appinfo->child_relid;
+ childRTE = root->simple_rte_array[childRTindex];
+
+ /*
+ * The child rel's RelOptInfo was already created during
+ * add_other_rels_to_query.
+ */
+ childrel = find_base_rel(root, childRTindex);
+ Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
+
+ /* We may have already proven the child to be dummy. */
+ if (IS_DUMMY_REL(childrel))
+ continue;
+
+ /*
+ * We have to copy the parent's targetlist and quals to the child,
+ * with appropriate substitution of variables. However, the
+ * baserestrictinfo quals were already copied/substituted when the
+ * child RelOptInfo was built. So we don't need any additional setup
+ * before applying constraint exclusion.
+ */
+ if (relation_excluded_by_constraints(root, childrel, childRTE))
+ {
+ /*
+ * This child need not be scanned, so we can omit it from the
+ * appendrel.
+ */
+ set_dummy_rel_pathlist(childrel);
+ continue;
+ }
+
+ /*
+ * Constraint exclusion failed, so copy the parent's join quals and
+ * targetlist to the child, with appropriate variable substitutions.
+ *
+ * NB: the resulting childrel->reltarget->exprs may contain arbitrary
+ * expressions, which otherwise would not occur in a rel's targetlist.
+ * Code that might be looking at an appendrel child must cope with
+ * such. (Normally, a rel's targetlist would only include Vars and
+ * PlaceHolderVars.) XXX we do not bother to update the cost or width
+ * fields of childrel->reltarget; not clear if that would be useful.
+ */
+ childrel->joininfo = (List *)
+ adjust_appendrel_attrs(root,
+ (Node *) rel->joininfo,
+ 1, &appinfo);
+ childrel->reltarget->exprs = (List *)
+ adjust_appendrel_attrs(root,
+ (Node *) rel->reltarget->exprs,
+ 1, &appinfo);
+
+ /*
+ * We have to make child entries in the EquivalenceClass data
+ * structures as well. This is needed either if the parent
+ * participates in some eclass joins (because we will want to consider
+ * inner-indexscan joins on the individual children) or if the parent
+ * has useful pathkeys (because we should try to build MergeAppend
+ * paths that produce those sort orderings).
+ */
+ if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
+ add_child_rel_equivalences(root, appinfo, rel, childrel);
+ childrel->has_eclass_joins = rel->has_eclass_joins;
+
+ /*
+ * Note: we could compute appropriate attr_needed data for the child's
+ * variables, by transforming the parent's attr_needed through the
+ * translated_vars mapping. However, currently there's no need
+ * because attr_needed is only examined for base relations not
+ * otherrels. So we just leave the child's attr_needed empty.
+ */
+
+ /*
+ * If we consider partitionwise joins with the parent rel, do the same
+ * for partitioned child rels.
+ *
+ * Note: here we abuse the consider_partitionwise_join flag by setting
+ * it for child rels that are not themselves partitioned. We do so to
+ * tell try_partitionwise_join() that the child rel is sufficiently
+ * valid to be used as a per-partition input, even if it later gets
+ * proven to be dummy. (It's not usable until we've set up the
+ * reltarget and EC entries, which we just did.)
+ */
+ if (rel->consider_partitionwise_join)
+ childrel->consider_partitionwise_join = true;
+
+ /*
+ * If parallelism is allowable for this query in general, see whether
+ * it's allowable for this childrel in particular. But if we've
+ * already decided the appendrel is not parallel-safe as a whole,
+ * there's no point in considering parallelism for this child. For
+ * consistency, do this before calling set_rel_size() for the child.
+ */
+ if (root->glob->parallelModeOK && rel->consider_parallel)
+ set_rel_consider_parallel(root, childrel, childRTE);
+
+ /*
+ * Compute the child's size.
+ */
+ set_rel_size(root, childrel, childRTindex, childRTE);
+
+ /*
+ * It is possible that constraint exclusion detected a contradiction
+ * within a child subquery, even though we didn't prove one above. If
+ * so, we can skip this child.
+ */
+ if (IS_DUMMY_REL(childrel))
+ continue;
+
+ /* We have at least one live child. */
+ has_live_children = true;
+
+ /*
+ * If any live child is not parallel-safe, treat the whole appendrel
+ * as not parallel-safe. In future we might be able to generate plans
+ * in which some children are farmed out to workers while others are
+ * not; but we don't have that today, so it's a waste to consider
+ * partial paths anywhere in the appendrel unless it's all safe.
+ * (Child rels visited before this one will be unmarked in
+ * set_append_rel_pathlist().)
+ */
+ if (!childrel->consider_parallel)
+ rel->consider_parallel = false;
+
+ /*
+ * Accumulate size information from each live child.
+ */
+ Assert(childrel->rows > 0);
+
+ parent_rows += childrel->rows;
+ parent_size += childrel->reltarget->width * childrel->rows;
+
+ /*
+ * Accumulate per-column estimates too. We need not do anything for
+ * PlaceHolderVars in the parent list. If child expression isn't a
+ * Var, or we didn't record a width estimate for it, we have to fall
+ * back on a datatype-based estimate.
+ *
+ * By construction, child's targetlist is 1-to-1 with parent's.
+ */
+ forboth(parentvars, rel->reltarget->exprs,
+ childvars, childrel->reltarget->exprs)
+ {
+ Var *parentvar = (Var *) lfirst(parentvars);
+ Node *childvar = (Node *) lfirst(childvars);
+
+ if (IsA(parentvar, Var) && parentvar->varno == parentRTindex)
+ {
+ int pndx = parentvar->varattno - rel->min_attr;
+ int32 child_width = 0;
+
+ if (IsA(childvar, Var) &&
+ ((Var *) childvar)->varno == childrel->relid)
+ {
+ int cndx = ((Var *) childvar)->varattno - childrel->min_attr;
+
+ child_width = childrel->attr_widths[cndx];
+ }
+ if (child_width <= 0)
+ child_width = get_typavgwidth(exprType(childvar),
+ exprTypmod(childvar));
+ Assert(child_width > 0);
+ parent_attrsizes[pndx] += child_width * childrel->rows;
+ }
+ }
+ }
+
+ if (has_live_children)
+ {
+ /*
+ * Save the finished size estimates.
+ */
+ int i;
+
+ Assert(parent_rows > 0);
+ rel->rows = parent_rows;
+ rel->reltarget->width = rint(parent_size / parent_rows);
+ for (i = 0; i < nattrs; i++)
+ rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
+
+ /*
+ * Set "raw tuples" count equal to "rows" for the appendrel; needed
+ * because some places assume rel->tuples is valid for any baserel.
+ */
+ rel->tuples = parent_rows;
+
+ /*
+ * Note that we leave rel->pages as zero; this is important to avoid
+ * double-counting the appendrel tree in total_table_pages.
+ */
+ }
+ else
+ {
+ /*
+ * All children were excluded by constraints, so mark the whole
+ * appendrel dummy. We must do this in this phase so that the rel's
+ * dummy-ness is visible when we generate paths for other rels.
+ */
+ set_dummy_rel_pathlist(rel);
+ }
+
+ pfree(parent_attrsizes);
+}
+
+/*
+ * set_append_rel_pathlist
+ * Build access paths for an "append relation"
+ */
+static void
+set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte)
+{
+ int parentRTindex = rti;
+ List *live_childrels = NIL;
+ ListCell *l;
+
+ /*
+ * Generate access paths for each member relation, and remember the
+ * non-dummy children.
+ */
+ foreach(l, root->append_rel_list)
+ {
+ AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
+ int childRTindex;
+ RangeTblEntry *childRTE;
+ RelOptInfo *childrel;
+
+ /* append_rel_list contains all append rels; ignore others */
+ if (appinfo->parent_relid != parentRTindex)
+ continue;
+
+ /* Re-locate the child RTE and RelOptInfo */
+ childRTindex = appinfo->child_relid;
+ childRTE = root->simple_rte_array[childRTindex];
+ childrel = root->simple_rel_array[childRTindex];
+
+ /*
+ * If set_append_rel_size() decided the parent appendrel was
+ * parallel-unsafe at some point after visiting this child rel, we
+ * need to propagate the unsafety marking down to the child, so that
+ * we don't generate useless partial paths for it.
+ */
+ if (!rel->consider_parallel)
+ childrel->consider_parallel = false;
+
+ /*
+ * Compute the child's access paths.
+ */
+ set_rel_pathlist(root, childrel, childRTindex, childRTE);
+
+ /*
+ * If child is dummy, ignore it.
+ */
+ if (IS_DUMMY_REL(childrel))
+ continue;
+
+ /*
+ * Child is live, so add it to the live_childrels list for use below.
+ */
+ live_childrels = lappend(live_childrels, childrel);
+ }
+
+ /* Add paths to the append relation. */
+ add_paths_to_append_rel(root, rel, live_childrels);
+}
+
+
+/*
+ * add_paths_to_append_rel
+ * Generate paths for the given append relation given the set of non-dummy
+ * child rels.
+ *
+ * The function collects all parameterizations and orderings supported by the
+ * non-dummy children. For every such parameterization or ordering, it creates
+ * an append path collecting one path from each non-dummy child with given
+ * parameterization or ordering. Similarly it collects partial paths from
+ * non-dummy children to create partial append paths.
+ */
+void
+add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
+ List *live_childrels)
+{
+ List *subpaths = NIL;
+ bool subpaths_valid = true;
+ List *partial_subpaths = NIL;
+ List *pa_partial_subpaths = NIL;
+ List *pa_nonpartial_subpaths = NIL;
+ bool partial_subpaths_valid = true;
+ bool pa_subpaths_valid;
+ List *all_child_pathkeys = NIL;
+ List *all_child_outers = NIL;
+ ListCell *l;
+ double partial_rows = -1;
+
+ /* If appropriate, consider parallel append */
+ pa_subpaths_valid = enable_parallel_append && rel->consider_parallel;
+
+ /*
+ * For every non-dummy child, remember the cheapest path. Also, identify
+ * all pathkeys (orderings) and parameterizations (required_outer sets)
+ * available for the non-dummy member relations.
+ */
+ foreach(l, live_childrels)
+ {
+ RelOptInfo *childrel = lfirst(l);
+ ListCell *lcp;
+ Path *cheapest_partial_path = NULL;
+
+ /*
+ * If child has an unparameterized cheapest-total path, add that to
+ * the unparameterized Append path we are constructing for the parent.
+ * If not, there's no workable unparameterized path.
+ *
+ * With partitionwise aggregates, the child rel's pathlist may be
+ * empty, so don't assume that a path exists here.
+ */
+ if (childrel->pathlist != NIL &&
+ childrel->cheapest_total_path->param_info == NULL)
+ accumulate_append_subpath(childrel->cheapest_total_path,
+ &subpaths, NULL);
+ else
+ subpaths_valid = false;
+
+ /* Same idea, but for a partial plan. */
+ if (childrel->partial_pathlist != NIL)
+ {
+ cheapest_partial_path = linitial(childrel->partial_pathlist);
+ accumulate_append_subpath(cheapest_partial_path,
+ &partial_subpaths, NULL);
+ }
+ else
+ partial_subpaths_valid = false;
+
+ /*
+ * Same idea, but for a parallel append mixing partial and non-partial
+ * paths.
+ */
+ if (pa_subpaths_valid)
+ {
+ Path *nppath = NULL;
+
+ nppath =
+ get_cheapest_parallel_safe_total_inner(childrel->pathlist);
+
+ if (cheapest_partial_path == NULL && nppath == NULL)
+ {
+ /* Neither a partial nor a parallel-safe path? Forget it. */
+ pa_subpaths_valid = false;
+ }
+ else if (nppath == NULL ||
+ (cheapest_partial_path != NULL &&
+ cheapest_partial_path->total_cost < nppath->total_cost))
+ {
+ /* Partial path is cheaper or the only option. */
+ Assert(cheapest_partial_path != NULL);
+ accumulate_append_subpath(cheapest_partial_path,
+ &pa_partial_subpaths,
+ &pa_nonpartial_subpaths);
+ }
+ else
+ {
+ /*
+ * Either we've got only a non-partial path, or we think that
+ * a single backend can execute the best non-partial path
+ * faster than all the parallel backends working together can
+ * execute the best partial path.
+ *
+ * It might make sense to be more aggressive here. Even if
+ * the best non-partial path is more expensive than the best
+ * partial path, it could still be better to choose the
+ * non-partial path if there are several such paths that can
+ * be given to different workers. For now, we don't try to
+ * figure that out.
+ */
+ accumulate_append_subpath(nppath,
+ &pa_nonpartial_subpaths,
+ NULL);
+ }
+ }
+
+ /*
+ * Collect lists of all the available path orderings and
+ * parameterizations for all the children. We use these as a
+ * heuristic to indicate which sort orderings and parameterizations we
+ * should build Append and MergeAppend paths for.
+ */
+ foreach(lcp, childrel->pathlist)
+ {
+ Path *childpath = (Path *) lfirst(lcp);
+ List *childkeys = childpath->pathkeys;
+ Relids childouter = PATH_REQ_OUTER(childpath);
+
+ /* Unsorted paths don't contribute to pathkey list */
+ if (childkeys != NIL)
+ {
+ ListCell *lpk;
+ bool found = false;
+
+ /* Have we already seen this ordering? */
+ foreach(lpk, all_child_pathkeys)
+ {
+ List *existing_pathkeys = (List *) lfirst(lpk);
+
+ if (compare_pathkeys(existing_pathkeys,
+ childkeys) == PATHKEYS_EQUAL)
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ /* No, so add it to all_child_pathkeys */
+ all_child_pathkeys = lappend(all_child_pathkeys,
+ childkeys);
+ }
+ }
+
+ /* Unparameterized paths don't contribute to param-set list */
+ if (childouter)
+ {
+ ListCell *lco;
+ bool found = false;
+
+ /* Have we already seen this param set? */
+ foreach(lco, all_child_outers)
+ {
+ Relids existing_outers = (Relids) lfirst(lco);
+
+ if (bms_equal(existing_outers, childouter))
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ /* No, so add it to all_child_outers */
+ all_child_outers = lappend(all_child_outers,
+ childouter);
+ }
+ }
+ }
+ }
+
+ /*
+ * If we found unparameterized paths for all children, build an unordered,
+ * unparameterized Append path for the rel. (Note: this is correct even
+ * if we have zero or one live subpath due to constraint exclusion.)
+ */
+ if (subpaths_valid)
+ add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL,
+ NIL, NULL, 0, false,
+ -1));
+
+ /*
+ * Consider an append of unordered, unparameterized partial paths. Make
+ * it parallel-aware if possible.
+ */
+ if (partial_subpaths_valid && partial_subpaths != NIL)
+ {
+ AppendPath *appendpath;
+ ListCell *lc;
+ int parallel_workers = 0;
+
+ /* Find the highest number of workers requested for any subpath. */
+ foreach(lc, partial_subpaths)
+ {
+ Path *path = lfirst(lc);
+
+ parallel_workers = Max(parallel_workers, path->parallel_workers);
+ }
+ Assert(parallel_workers > 0);
+
+ /*
+ * If the use of parallel append is permitted, always request at least
+ * log2(# of children) workers. We assume it can be useful to have
+ * extra workers in this case because they will be spread out across
+ * the children. The precise formula is just a guess, but we don't
+ * want to end up with a radically different answer for a table with N
+ * partitions vs. an unpartitioned table with the same data, so the
+ * use of some kind of log-scaling here seems to make some sense.
+ */
+ if (enable_parallel_append)
+ {
+ parallel_workers = Max(parallel_workers,
+ fls(list_length(live_childrels)));
+ parallel_workers = Min(parallel_workers,
+ max_parallel_workers_per_gather);
+ }
+ Assert(parallel_workers > 0);
+
+ /* Generate a partial append path. */
+ appendpath = create_append_path(root, rel, NIL, partial_subpaths,
+ NIL, NULL, parallel_workers,
+ enable_parallel_append,
+ -1);
+
+ /*
+ * Make sure any subsequent partial paths use the same row count
+ * estimate.
+ */
+ partial_rows = appendpath->path.rows;
+
+ /* Add the path. */
+ add_partial_path(rel, (Path *) appendpath);
+ }
+
+ /*
+ * Consider a parallel-aware append using a mix of partial and non-partial
+ * paths. (This only makes sense if there's at least one child which has
+ * a non-partial path that is substantially cheaper than any partial path;
+ * otherwise, we should use the append path added in the previous step.)
+ */
+ if (pa_subpaths_valid && pa_nonpartial_subpaths != NIL)
+ {
+ AppendPath *appendpath;
+ ListCell *lc;
+ int parallel_workers = 0;
+
+ /*
+ * Find the highest number of workers requested for any partial
+ * subpath.
+ */
+ foreach(lc, pa_partial_subpaths)
+ {
+ Path *path = lfirst(lc);
+
+ parallel_workers = Max(parallel_workers, path->parallel_workers);
+ }
+
+ /*
+ * Same formula here as above. It's even more important in this
+ * instance because the non-partial paths won't contribute anything to
+ * the planned number of parallel workers.
+ */
+ parallel_workers = Max(parallel_workers,
+ fls(list_length(live_childrels)));
+ parallel_workers = Min(parallel_workers,
+ max_parallel_workers_per_gather);
+ Assert(parallel_workers > 0);
+
+ appendpath = create_append_path(root, rel, pa_nonpartial_subpaths,
+ pa_partial_subpaths,
+ NIL, NULL, parallel_workers, true,
+ partial_rows);
+ add_partial_path(rel, (Path *) appendpath);
+ }
+
+ /*
+ * Also build unparameterized ordered append paths based on the collected
+ * list of child pathkeys.
+ */
+ if (subpaths_valid)
+ generate_orderedappend_paths(root, rel, live_childrels,
+ all_child_pathkeys);
+
+ /*
+ * Build Append paths for each parameterization seen among the child rels.
+ * (This may look pretty expensive, but in most cases of practical
+ * interest, the child rels will expose mostly the same parameterizations,
+ * so that not that many cases actually get considered here.)
+ *
+ * The Append node itself cannot enforce quals, so all qual checking must
+ * be done in the child paths. This means that to have a parameterized
+ * Append path, we must have the exact same parameterization for each
+ * child path; otherwise some children might be failing to check the
+ * moved-down quals. To make them match up, we can try to increase the
+ * parameterization of lesser-parameterized paths.
+ */
+ foreach(l, all_child_outers)
+ {
+ Relids required_outer = (Relids) lfirst(l);
+ ListCell *lcr;
+
+ /* Select the child paths for an Append with this parameterization */
+ subpaths = NIL;
+ subpaths_valid = true;
+ foreach(lcr, live_childrels)
+ {
+ RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
+ Path *subpath;
+
+ if (childrel->pathlist == NIL)
+ {
+ /* failed to make a suitable path for this child */
+ subpaths_valid = false;
+ break;
+ }
+
+ subpath = get_cheapest_parameterized_child_path(root,
+ childrel,
+ required_outer);
+ if (subpath == NULL)
+ {
+ /* failed to make a suitable path for this child */
+ subpaths_valid = false;
+ break;
+ }
+ accumulate_append_subpath(subpath, &subpaths, NULL);
+ }
+
+ if (subpaths_valid)
+ add_path(rel, (Path *)
+ create_append_path(root, rel, subpaths, NIL,
+ NIL, required_outer, 0, false,
+ -1));
+ }
+
+ /*
+ * When there is only a single child relation, the Append path can inherit
+ * any ordering available for the child rel's path, so that it's useful to
+ * consider ordered partial paths. Above we only considered the cheapest
+ * partial path for each child, but let's also make paths using any
+ * partial paths that have pathkeys.
+ */
+ if (list_length(live_childrels) == 1)
+ {
+ RelOptInfo *childrel = (RelOptInfo *) linitial(live_childrels);
+
+ /* skip the cheapest partial path, since we already used that above */
+ for_each_from(l, childrel->partial_pathlist, 1)
+ {
+ Path *path = (Path *) lfirst(l);
+ AppendPath *appendpath;
+
+ /* skip paths with no pathkeys. */
+ if (path->pathkeys == NIL)
+ continue;
+
+ appendpath = create_append_path(root, rel, NIL, list_make1(path),
+ NIL, NULL,
+ path->parallel_workers, true,
+ partial_rows);
+ add_partial_path(rel, (Path *) appendpath);
+ }
+ }
+}
+
+/*
+ * generate_orderedappend_paths
+ * Generate ordered append paths for an append relation
+ *
+ * Usually we generate MergeAppend paths here, but there are some special
+ * cases where we can generate simple Append paths, because the subpaths
+ * can provide tuples in the required order already.
+ *
+ * We generate a path for each ordering (pathkey list) appearing in
+ * all_child_pathkeys.
+ *
+ * We consider both cheapest-startup and cheapest-total cases, ie, for each
+ * interesting ordering, collect all the cheapest startup subpaths and all the
+ * cheapest total paths, and build a suitable path for each case.
+ *
+ * We don't currently generate any parameterized ordered paths here. While
+ * it would not take much more code here to do so, it's very unclear that it
+ * is worth the planning cycles to investigate such paths: there's little
+ * use for an ordered path on the inside of a nestloop. In fact, it's likely
+ * that the current coding of add_path would reject such paths out of hand,
+ * because add_path gives no credit for sort ordering of parameterized paths,
+ * and a parameterized MergeAppend is going to be more expensive than the
+ * corresponding parameterized Append path. If we ever try harder to support
+ * parameterized mergejoin plans, it might be worth adding support for
+ * parameterized paths here to feed such joins. (See notes in
+ * optimizer/README for why that might not ever happen, though.)
+ */
+static void
+generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
+ List *live_childrels,
+ List *all_child_pathkeys)
+{
+ ListCell *lcp;
+ List *partition_pathkeys = NIL;
+ List *partition_pathkeys_desc = NIL;
+ bool partition_pathkeys_partial = true;
+ bool partition_pathkeys_desc_partial = true;
+
+ /*
+ * Some partitioned table setups may allow us to use an Append node
+ * instead of a MergeAppend. This is possible in cases such as RANGE
+ * partitioned tables where it's guaranteed that an earlier partition must
+ * contain rows which come earlier in the sort order. To detect whether
+ * this is relevant, build pathkey descriptions of the partition ordering,
+ * for both forward and reverse scans.
+ */
+ if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) &&
+ partitions_are_ordered(rel->boundinfo, rel->live_parts))
+ {
+ partition_pathkeys = build_partition_pathkeys(root, rel,
+ ForwardScanDirection,
+ &partition_pathkeys_partial);
+
+ partition_pathkeys_desc = build_partition_pathkeys(root, rel,
+ BackwardScanDirection,
+ &partition_pathkeys_desc_partial);
+
+ /*
+ * You might think we should truncate_useless_pathkeys here, but
+ * allowing partition keys which are a subset of the query's pathkeys
+ * can often be useful. For example, consider a table partitioned by
+ * RANGE (a, b), and a query with ORDER BY a, b, c. If we have child
+ * paths that can produce the a, b, c ordering (perhaps via indexes on
+ * (a, b, c)) then it works to consider the appendrel output as
+ * ordered by a, b, c.
+ */
+ }
+
+ /* Now consider each interesting sort ordering */
+ foreach(lcp, all_child_pathkeys)
+ {
+ List *pathkeys = (List *) lfirst(lcp);
+ List *startup_subpaths = NIL;
+ List *total_subpaths = NIL;
+ List *fractional_subpaths = NIL;
+ bool startup_neq_total = false;
+ ListCell *lcr;
+ bool match_partition_order;
+ bool match_partition_order_desc;
+
+ /*
+ * Determine if this sort ordering matches any partition pathkeys we
+ * have, for both ascending and descending partition order. If the
+ * partition pathkeys happen to be contained in pathkeys then it still
+ * works, as described above, providing that the partition pathkeys
+ * are complete and not just a prefix of the partition keys. (In such
+ * cases we'll be relying on the child paths to have sorted the
+ * lower-order columns of the required pathkeys.)
+ */
+ match_partition_order =
+ pathkeys_contained_in(pathkeys, partition_pathkeys) ||
+ (!partition_pathkeys_partial &&
+ pathkeys_contained_in(partition_pathkeys, pathkeys));
+
+ match_partition_order_desc = !match_partition_order &&
+ (pathkeys_contained_in(pathkeys, partition_pathkeys_desc) ||
+ (!partition_pathkeys_desc_partial &&
+ pathkeys_contained_in(partition_pathkeys_desc, pathkeys)));
+
+ /* Select the child paths for this ordering... */
+ foreach(lcr, live_childrels)
+ {
+ RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
+ Path *cheapest_startup,
+ *cheapest_total,
+ *cheapest_fractional = NULL;
+
+ /* Locate the right paths, if they are available. */
+ cheapest_startup =
+ get_cheapest_path_for_pathkeys(childrel->pathlist,
+ pathkeys,
+ NULL,
+ STARTUP_COST,
+ false);
+ cheapest_total =
+ get_cheapest_path_for_pathkeys(childrel->pathlist,
+ pathkeys,
+ NULL,
+ TOTAL_COST,
+ false);
+
+ /*
+ * If we can't find any paths with the right order just use the
+ * cheapest-total path; we'll have to sort it later.
+ */
+ if (cheapest_startup == NULL || cheapest_total == NULL)
+ {
+ cheapest_startup = cheapest_total =
+ childrel->cheapest_total_path;
+ /* Assert we do have an unparameterized path for this child */
+ Assert(cheapest_total->param_info == NULL);
+ }
+
+ /*
+ * When building a fractional path, determine a cheapest
+ * fractional path for each child relation too. Looking at startup
+ * and total costs is not enough, because the cheapest fractional
+ * path may be dominated by two separate paths (one for startup,
+ * one for total).
+ *
+ * When needed (building fractional path), determine the cheapest
+ * fractional path too.
+ */
+ if (root->tuple_fraction > 0)
+ {
+ double path_fraction = (1.0 / root->tuple_fraction);
+
+ cheapest_fractional =
+ get_cheapest_fractional_path_for_pathkeys(childrel->pathlist,
+ pathkeys,
+ NULL,
+ path_fraction);
+
+ /*
+ * If we found no path with matching pathkeys, use the
+ * cheapest total path instead.
+ *
+ * XXX We might consider partially sorted paths too (with an
+ * incremental sort on top). But we'd have to build all the
+ * incremental paths, do the costing etc.
+ */
+ if (!cheapest_fractional)
+ cheapest_fractional = cheapest_total;
+ }
+
+ /*
+ * Notice whether we actually have different paths for the
+ * "cheapest" and "total" cases; frequently there will be no point
+ * in two create_merge_append_path() calls.
+ */
+ if (cheapest_startup != cheapest_total)
+ startup_neq_total = true;
+
+ /*
+ * Collect the appropriate child paths. The required logic varies
+ * for the Append and MergeAppend cases.
+ */
+ if (match_partition_order)
+ {
+ /*
+ * We're going to make a plain Append path. We don't need
+ * most of what accumulate_append_subpath would do, but we do
+ * want to cut out child Appends or MergeAppends if they have
+ * just a single subpath (and hence aren't doing anything
+ * useful).
+ */
+ cheapest_startup = get_singleton_append_subpath(cheapest_startup);
+ cheapest_total = get_singleton_append_subpath(cheapest_total);
+
+ startup_subpaths = lappend(startup_subpaths, cheapest_startup);
+ total_subpaths = lappend(total_subpaths, cheapest_total);
+
+ if (cheapest_fractional)
+ {
+ cheapest_fractional = get_singleton_append_subpath(cheapest_fractional);
+ fractional_subpaths = lappend(fractional_subpaths, cheapest_fractional);
+ }
+ }
+ else if (match_partition_order_desc)
+ {
+ /*
+ * As above, but we need to reverse the order of the children,
+ * because nodeAppend.c doesn't know anything about reverse
+ * ordering and will scan the children in the order presented.
+ */
+ cheapest_startup = get_singleton_append_subpath(cheapest_startup);
+ cheapest_total = get_singleton_append_subpath(cheapest_total);
+
+ startup_subpaths = lcons(cheapest_startup, startup_subpaths);
+ total_subpaths = lcons(cheapest_total, total_subpaths);
+
+ if (cheapest_fractional)
+ {
+ cheapest_fractional = get_singleton_append_subpath(cheapest_fractional);
+ fractional_subpaths = lcons(cheapest_fractional, fractional_subpaths);
+ }
+ }
+ else
+ {
+ /*
+ * Otherwise, rely on accumulate_append_subpath to collect the
+ * child paths for the MergeAppend.
+ */
+ accumulate_append_subpath(cheapest_startup,
+ &startup_subpaths, NULL);
+ accumulate_append_subpath(cheapest_total,
+ &total_subpaths, NULL);
+
+ if (cheapest_fractional)
+ accumulate_append_subpath(cheapest_fractional,
+ &fractional_subpaths, NULL);
+ }
+ }
+
+ /* ... and build the Append or MergeAppend paths */
+ if (match_partition_order || match_partition_order_desc)
+ {
+ /* We only need Append */
+ add_path(rel, (Path *) create_append_path(root,
+ rel,
+ startup_subpaths,
+ NIL,
+ pathkeys,
+ NULL,
+ 0,
+ false,
+ -1));
+ if (startup_neq_total)
+ add_path(rel, (Path *) create_append_path(root,
+ rel,
+ total_subpaths,
+ NIL,
+ pathkeys,
+ NULL,
+ 0,
+ false,
+ -1));
+
+ if (fractional_subpaths)
+ add_path(rel, (Path *) create_append_path(root,
+ rel,
+ fractional_subpaths,
+ NIL,
+ pathkeys,
+ NULL,
+ 0,
+ false,
+ -1));
+ }
+ else
+ {
+ /* We need MergeAppend */
+ add_path(rel, (Path *) create_merge_append_path(root,
+ rel,
+ startup_subpaths,
+ pathkeys,
+ NULL));
+ if (startup_neq_total)
+ add_path(rel, (Path *) create_merge_append_path(root,
+ rel,
+ total_subpaths,
+ pathkeys,
+ NULL));
+
+ if (fractional_subpaths)
+ add_path(rel, (Path *) create_merge_append_path(root,
+ rel,
+ fractional_subpaths,
+ pathkeys,
+ NULL));
+ }
+ }
+}
+
+/*
+ * get_cheapest_parameterized_child_path
+ * Get cheapest path for this relation that has exactly the requested
+ * parameterization.
+ *
+ * Returns NULL if unable to create such a path.
+ */
+static Path *
+get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
+ Relids required_outer)
+{
+ Path *cheapest;
+ ListCell *lc;
+
+ /*
+ * Look up the cheapest existing path with no more than the needed
+ * parameterization. If it has exactly the needed parameterization, we're
+ * done.
+ */
+ cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
+ NIL,
+ required_outer,
+ TOTAL_COST,
+ false);
+ Assert(cheapest != NULL);
+ if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
+ return cheapest;
+
+ /*
+ * Otherwise, we can "reparameterize" an existing path to match the given
+ * parameterization, which effectively means pushing down additional
+ * joinquals to be checked within the path's scan. However, some existing
+ * paths might check the available joinquals already while others don't;
+ * therefore, it's not clear which existing path will be cheapest after
+ * reparameterization. We have to go through them all and find out.
+ */
+ cheapest = NULL;
+ foreach(lc, rel->pathlist)
+ {
+ Path *path = (Path *) lfirst(lc);
+
+ /* Can't use it if it needs more than requested parameterization */
+ if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
+ continue;
+
+ /*
+ * Reparameterization can only increase the path's cost, so if it's
+ * already more expensive than the current cheapest, forget it.
+ */
+ if (cheapest != NULL &&
+ compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
+ continue;
+
+ /* Reparameterize if needed, then recheck cost */
+ if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
+ {
+ path = reparameterize_path(root, path, required_outer, 1.0);
+ if (path == NULL)
+ continue; /* failed to reparameterize this one */
+ Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
+
+ if (cheapest != NULL &&
+ compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
+ continue;
+ }
+
+ /* We have a new best path */
+ cheapest = path;
+ }
+
+ /* Return the best path, or NULL if we found no suitable candidate */
+ return cheapest;
+}
+
+/*
+ * accumulate_append_subpath
+ * Add a subpath to the list being built for an Append or MergeAppend.
+ *
+ * It's possible that the child is itself an Append or MergeAppend path, in
+ * which case we can "cut out the middleman" and just add its child paths to
+ * our own list. (We don't try to do this earlier because we need to apply
+ * both levels of transformation to the quals.)
+ *
+ * Note that if we omit a child MergeAppend in this way, we are effectively
+ * omitting a sort step, which seems fine: if the parent is to be an Append,
+ * its result would be unsorted anyway, while if the parent is to be a
+ * MergeAppend, there's no point in a separate sort on a child.
+ *
+ * Normally, either path is a partial path and subpaths is a list of partial
+ * paths, or else path is a non-partial plan and subpaths is a list of those.
+ * However, if path is a parallel-aware Append, then we add its partial path
+ * children to subpaths and the rest to special_subpaths. If the latter is
+ * NULL, we don't flatten the path at all (unless it contains only partial
+ * paths).
+ */
+static void
+accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
+{
+ if (IsA(path, AppendPath))
+ {
+ AppendPath *apath = (AppendPath *) path;
+
+ if (!apath->path.parallel_aware || apath->first_partial_path == 0)
+ {
+ *subpaths = list_concat(*subpaths, apath->subpaths);
+ return;
+ }
+ else if (special_subpaths != NULL)
+ {
+ List *new_special_subpaths;
+
+ /* Split Parallel Append into partial and non-partial subpaths */
+ *subpaths = list_concat(*subpaths,
+ list_copy_tail(apath->subpaths,
+ apath->first_partial_path));
+ new_special_subpaths =
+ list_truncate(list_copy(apath->subpaths),
+ apath->first_partial_path);
+ *special_subpaths = list_concat(*special_subpaths,
+ new_special_subpaths);
+ return;
+ }
+ }
+ else if (IsA(path, MergeAppendPath))
+ {
+ MergeAppendPath *mpath = (MergeAppendPath *) path;
+
+ *subpaths = list_concat(*subpaths, mpath->subpaths);
+ return;
+ }
+
+ *subpaths = lappend(*subpaths, path);
+}
+
+/*
+ * get_singleton_append_subpath
+ * Returns the single subpath of an Append/MergeAppend, or just
+ * return 'path' if it's not a single sub-path Append/MergeAppend.
+ *
+ * Note: 'path' must not be a parallel-aware path.
+ */
+static Path *
+get_singleton_append_subpath(Path *path)
+{
+ Assert(!path->parallel_aware);
+
+ if (IsA(path, AppendPath))
+ {
+ AppendPath *apath = (AppendPath *) path;
+
+ if (list_length(apath->subpaths) == 1)
+ return (Path *) linitial(apath->subpaths);
+ }
+ else if (IsA(path, MergeAppendPath))
+ {
+ MergeAppendPath *mpath = (MergeAppendPath *) path;
+
+ if (list_length(mpath->subpaths) == 1)
+ return (Path *) linitial(mpath->subpaths);
+ }
+
+ return path;
+}
+
+/*
+ * set_dummy_rel_pathlist
+ * Build a dummy path for a relation that's been excluded by constraints
+ *
+ * Rather than inventing a special "dummy" path type, we represent this as an
+ * AppendPath with no members (see also IS_DUMMY_APPEND/IS_DUMMY_REL macros).
+ *
+ * (See also mark_dummy_rel, which does basically the same thing, but is
+ * typically used to change a rel into dummy state after we already made
+ * paths for it.)
+ */
+static void
+set_dummy_rel_pathlist(RelOptInfo *rel)
+{
+ /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
+ rel->rows = 0;
+ rel->reltarget->width = 0;
+
+ /* Discard any pre-existing paths; no further need for them */
+ rel->pathlist = NIL;
+ rel->partial_pathlist = NIL;
+
+ /* Set up the dummy path */
+ add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
+ NIL, rel->lateral_relids,
+ 0, false, -1));
+
+ /*
+ * We set the cheapest-path fields immediately, just in case they were
+ * pointing at some discarded path. This is redundant when we're called
+ * from set_rel_size(), but not when called from elsewhere, and doing it
+ * twice is harmless anyway.
+ */
+ set_cheapest(rel);
+}
+
+/* quick-and-dirty test to see if any joining is needed */
+static bool
+has_multiple_baserels(PlannerInfo *root)
+{
+ int num_base_rels = 0;
+ Index rti;
+
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *brel = root->simple_rel_array[rti];
+
+ if (brel == NULL)
+ continue;
+
+ /* ignore RTEs that are "other rels" */
+ if (brel->reloptkind == RELOPT_BASEREL)
+ if (++num_base_rels > 1)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * find_window_run_conditions
+ * Determine if 'wfunc' is really a WindowFunc and call its prosupport
+ * function to determine the function's monotonic properties. We then
+ * see if 'opexpr' can be used to short-circuit execution.
+ *
+ * For example row_number() over (order by ...) always produces a value one
+ * higher than the previous. If someone has a window function in a subquery
+ * and has a WHERE clause in the outer query to filter rows <= 10, then we may
+ * as well stop processing the windowagg once the row number reaches 11. Here
+ * we check if 'opexpr' might help us to stop doing needless extra processing
+ * in WindowAgg nodes.
+ *
+ * '*keep_original' is set to true if the caller should also use 'opexpr' for
+ * its original purpose. This is set to false if the caller can assume that
+ * the run condition will handle all of the required filtering.
+ *
+ * Returns true if 'opexpr' was found to be useful and was added to the
+ * WindowClauses runCondition. We also set *keep_original accordingly and add
+ * 'attno' to *run_cond_attrs offset by FirstLowInvalidHeapAttributeNumber.
+ * If the 'opexpr' cannot be used then we set *keep_original to true and
+ * return false.
+ */
+static bool
+find_window_run_conditions(Query *subquery, RangeTblEntry *rte, Index rti,
+ AttrNumber attno, WindowFunc *wfunc, OpExpr *opexpr,
+ bool wfunc_left, bool *keep_original,
+ Bitmapset **run_cond_attrs)
+{
+ Oid prosupport;
+ Expr *otherexpr;
+ SupportRequestWFuncMonotonic req;
+ SupportRequestWFuncMonotonic *res;
+ WindowClause *wclause;
+ List *opinfos;
+ OpExpr *runopexpr;
+ Oid runoperator;
+ ListCell *lc;
+
+ *keep_original = true;
+
+ while (IsA(wfunc, RelabelType))
+ wfunc = (WindowFunc *) ((RelabelType *) wfunc)->arg;
+
+ /* we can only work with window functions */
+ if (!IsA(wfunc, WindowFunc))
+ return false;
+
+ /* can't use it if there are subplans in the WindowFunc */
+ if (contain_subplans((Node *) wfunc))
+ return false;
+
+ prosupport = get_func_support(wfunc->winfnoid);
+
+ /* Check if there's a support function for 'wfunc' */
+ if (!OidIsValid(prosupport))
+ return false;
+
+ /* get the Expr from the other side of the OpExpr */
+ if (wfunc_left)
+ otherexpr = lsecond(opexpr->args);
+ else
+ otherexpr = linitial(opexpr->args);
+
+ /*
+ * The value being compared must not change during the evaluation of the
+ * window partition.
+ */
+ if (!is_pseudo_constant_clause((Node *) otherexpr))
+ return false;
+
+ /* find the window clause belonging to the window function */
+ wclause = (WindowClause *) list_nth(subquery->windowClause,
+ wfunc->winref - 1);
+
+ req.type = T_SupportRequestWFuncMonotonic;
+ req.window_func = wfunc;
+ req.window_clause = wclause;
+
+ /* call the support function */
+ res = (SupportRequestWFuncMonotonic *)
+ DatumGetPointer(OidFunctionCall1(prosupport,
+ PointerGetDatum(&req)));
+
+ /*
+ * Nothing to do if the function is neither monotonically increasing nor
+ * monotonically decreasing.
+ */
+ if (res == NULL || res->monotonic == MONOTONICFUNC_NONE)
+ return false;
+
+ runopexpr = NULL;
+ runoperator = InvalidOid;
+ opinfos = get_op_btree_interpretation(opexpr->opno);
+
+ foreach(lc, opinfos)
+ {
+ OpBtreeInterpretation *opinfo = (OpBtreeInterpretation *) lfirst(lc);
+ int strategy = opinfo->strategy;
+
+ /* handle < / <= */
+ if (strategy == BTLessStrategyNumber ||
+ strategy == BTLessEqualStrategyNumber)
+ {
+ /*
+ * < / <= is supported for monotonically increasing functions in
+ * the form <wfunc> op <pseudoconst> and <pseudoconst> op <wfunc>
+ * for monotonically decreasing functions.
+ */
+ if ((wfunc_left && (res->monotonic & MONOTONICFUNC_INCREASING)) ||
+ (!wfunc_left && (res->monotonic & MONOTONICFUNC_DECREASING)))
+ {
+ *keep_original = false;
+ runopexpr = opexpr;
+ runoperator = opexpr->opno;
+ }
+ break;
+ }
+ /* handle > / >= */
+ else if (strategy == BTGreaterStrategyNumber ||
+ strategy == BTGreaterEqualStrategyNumber)
+ {
+ /*
+ * > / >= is supported for monotonically decreasing functions in
+ * the form <wfunc> op <pseudoconst> and <pseudoconst> op <wfunc>
+ * for monotonically increasing functions.
+ */
+ if ((wfunc_left && (res->monotonic & MONOTONICFUNC_DECREASING)) ||
+ (!wfunc_left && (res->monotonic & MONOTONICFUNC_INCREASING)))
+ {
+ *keep_original = false;
+ runopexpr = opexpr;
+ runoperator = opexpr->opno;
+ }
+ break;
+ }
+ /* handle = */
+ else if (strategy == BTEqualStrategyNumber)
+ {
+ int16 newstrategy;
+
+ /*
+ * When both monotonically increasing and decreasing then the
+ * return value of the window function will be the same each time.
+ * We can simply use 'opexpr' as the run condition without
+ * modifying it.
+ */
+ if ((res->monotonic & MONOTONICFUNC_BOTH) == MONOTONICFUNC_BOTH)
+ {
+ *keep_original = false;
+ runopexpr = opexpr;
+ runoperator = opexpr->opno;
+ break;
+ }
+
+ /*
+ * When monotonically increasing we make a qual with <wfunc> <=
+ * <value> or <value> >= <wfunc> in order to filter out values
+ * which are above the value in the equality condition. For
+ * monotonically decreasing functions we want to filter values
+ * below the value in the equality condition.
+ */
+ if (res->monotonic & MONOTONICFUNC_INCREASING)
+ newstrategy = wfunc_left ? BTLessEqualStrategyNumber : BTGreaterEqualStrategyNumber;
+ else
+ newstrategy = wfunc_left ? BTGreaterEqualStrategyNumber : BTLessEqualStrategyNumber;
+
+ /* We must keep the original equality qual */
+ *keep_original = true;
+ runopexpr = opexpr;
+
+ /* determine the operator to use for the runCondition qual */
+ runoperator = get_opfamily_member(opinfo->opfamily_id,
+ opinfo->oplefttype,
+ opinfo->oprighttype,
+ newstrategy);
+ break;
+ }
+ }
+
+ if (runopexpr != NULL)
+ {
+ Expr *newexpr;
+
+ /*
+ * Build the qual required for the run condition keeping the
+ * WindowFunc on the same side as it was originally.
+ */
+ if (wfunc_left)
+ newexpr = make_opclause(runoperator,
+ runopexpr->opresulttype,
+ runopexpr->opretset, (Expr *) wfunc,
+ otherexpr, runopexpr->opcollid,
+ runopexpr->inputcollid);
+ else
+ newexpr = make_opclause(runoperator,
+ runopexpr->opresulttype,
+ runopexpr->opretset,
+ otherexpr, (Expr *) wfunc,
+ runopexpr->opcollid,
+ runopexpr->inputcollid);
+
+ wclause->runCondition = lappend(wclause->runCondition, newexpr);
+
+ /* record that this attno was used in a run condition */
+ *run_cond_attrs = bms_add_member(*run_cond_attrs,
+ attno - FirstLowInvalidHeapAttributeNumber);
+ return true;
+ }
+
+ /* unsupported OpExpr */
+ return false;
+}
+
+/*
+ * check_and_push_window_quals
+ * Check if 'clause' is a qual that can be pushed into a WindowFunc's
+ * WindowClause as a 'runCondition' qual. These, when present, allow
+ * some unnecessary work to be skipped during execution.
+ *
+ * 'run_cond_attrs' will be populated with all targetlist resnos of subquery
+ * targets (offset by FirstLowInvalidHeapAttributeNumber) that we pushed
+ * window quals for.
+ *
+ * Returns true if the caller still must keep the original qual or false if
+ * the caller can safely ignore the original qual because the WindowAgg node
+ * will use the runCondition to stop returning tuples.
+ */
+static bool
+check_and_push_window_quals(Query *subquery, RangeTblEntry *rte, Index rti,
+ Node *clause, Bitmapset **run_cond_attrs)
+{
+ OpExpr *opexpr = (OpExpr *) clause;
+ bool keep_original = true;
+ Var *var1;
+ Var *var2;
+
+ /* We're only able to use OpExprs with 2 operands */
+ if (!IsA(opexpr, OpExpr))
+ return true;
+
+ if (list_length(opexpr->args) != 2)
+ return true;
+
+ /*
+ * Currently, we restrict this optimization to strict OpExprs. The reason
+ * for this is that during execution, once the runcondition becomes false,
+ * we stop evaluating WindowFuncs. To avoid leaving around stale window
+ * function result values, we set them to NULL. Having only strict
+ * OpExprs here ensures that we properly filter out the tuples with NULLs
+ * in the top-level WindowAgg.
+ */
+ set_opfuncid(opexpr);
+ if (!func_strict(opexpr->opfuncid))
+ return true;
+
+ /*
+ * Check for plain Vars that reference window functions in the subquery.
+ * If we find any, we'll ask find_window_run_conditions() if 'opexpr' can
+ * be used as part of the run condition.
+ */
+
+ /* Check the left side of the OpExpr */
+ var1 = linitial(opexpr->args);
+ if (IsA(var1, Var) && var1->varattno > 0)
+ {
+ TargetEntry *tle = list_nth(subquery->targetList, var1->varattno - 1);
+ WindowFunc *wfunc = (WindowFunc *) tle->expr;
+
+ if (find_window_run_conditions(subquery, rte, rti, tle->resno, wfunc,
+ opexpr, true, &keep_original,
+ run_cond_attrs))
+ return keep_original;
+ }
+
+ /* and check the right side */
+ var2 = lsecond(opexpr->args);
+ if (IsA(var2, Var) && var2->varattno > 0)
+ {
+ TargetEntry *tle = list_nth(subquery->targetList, var2->varattno - 1);
+ WindowFunc *wfunc = (WindowFunc *) tle->expr;
+
+ if (find_window_run_conditions(subquery, rte, rti, tle->resno, wfunc,
+ opexpr, false, &keep_original,
+ run_cond_attrs))
+ return keep_original;
+ }
+
+ return true;
+}
+
+/*
+ * set_subquery_pathlist
+ * Generate SubqueryScan access paths for a subquery RTE
+ *
+ * We don't currently support generating parameterized paths for subqueries
+ * by pushing join clauses down into them; it seems too expensive to re-plan
+ * the subquery multiple times to consider different alternatives.
+ * (XXX that could stand to be reconsidered, now that we use Paths.)
+ * So the paths made here will be parameterized if the subquery contains
+ * LATERAL references, otherwise not. As long as that's true, there's no need
+ * for a separate set_subquery_size phase: just make the paths right away.
+ */
+static void
+set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte)
+{
+ Query *parse = root->parse;
+ Query *subquery = rte->subquery;
+ Relids required_outer;
+ pushdown_safety_info safetyInfo;
+ double tuple_fraction;
+ RelOptInfo *sub_final_rel;
+ Bitmapset *run_cond_attrs = NULL;
+ ListCell *lc;
+
+ /*
+ * Must copy the Query so that planning doesn't mess up the RTE contents
+ * (really really need to fix the planner to not scribble on its input,
+ * someday ... but see remove_unused_subquery_outputs to start with).
+ */
+ subquery = copyObject(subquery);
+
+ /*
+ * If it's a LATERAL subquery, it might contain some Vars of the current
+ * query level, requiring it to be treated as parameterized, even though
+ * we don't support pushing down join quals into subqueries.
+ */
+ required_outer = rel->lateral_relids;
+
+ /*
+ * Zero out result area for subquery_is_pushdown_safe, so that it can set
+ * flags as needed while recursing. In particular, we need a workspace
+ * for keeping track of the reasons why columns are unsafe to reference.
+ * These reasons are stored in the bits inside unsafeFlags[i] when we
+ * discover reasons that column i of the subquery is unsafe to be used in
+ * a pushed-down qual.
+ */
+ memset(&safetyInfo, 0, sizeof(safetyInfo));
+ safetyInfo.unsafeFlags = (unsigned char *)
+ palloc0((list_length(subquery->targetList) + 1) * sizeof(unsigned char));
+
+ /*
+ * If the subquery has the "security_barrier" flag, it means the subquery
+ * originated from a view that must enforce row-level security. Then we
+ * must not push down quals that contain leaky functions. (Ideally this
+ * would be checked inside subquery_is_pushdown_safe, but since we don't
+ * currently pass the RTE to that function, we must do it here.)
+ */
+ safetyInfo.unsafeLeaky = rte->security_barrier;
+
+ /*
+ * If there are any restriction clauses that have been attached to the
+ * subquery relation, consider pushing them down to become WHERE or HAVING
+ * quals of the subquery itself. This transformation is useful because it
+ * may allow us to generate a better plan for the subquery than evaluating
+ * all the subquery output rows and then filtering them.
+ *
+ * There are several cases where we cannot push down clauses. Restrictions
+ * involving the subquery are checked by subquery_is_pushdown_safe().
+ * Restrictions on individual clauses are checked by
+ * qual_is_pushdown_safe(). Also, we don't want to push down
+ * pseudoconstant clauses; better to have the gating node above the
+ * subquery.
+ *
+ * Non-pushed-down clauses will get evaluated as qpquals of the
+ * SubqueryScan node.
+ *
+ * XXX Are there any cases where we want to make a policy decision not to
+ * push down a pushable qual, because it'd result in a worse plan?
+ */
+ if (rel->baserestrictinfo != NIL &&
+ subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
+ {
+ /* OK to consider pushing down individual quals */
+ List *upperrestrictlist = NIL;
+ ListCell *l;
+
+ foreach(l, rel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+ Node *clause = (Node *) rinfo->clause;
+
+ if (rinfo->pseudoconstant)
+ {
+ upperrestrictlist = lappend(upperrestrictlist, rinfo);
+ continue;
+ }
+
+ switch (qual_is_pushdown_safe(subquery, rti, rinfo, &safetyInfo))
+ {
+ case PUSHDOWN_SAFE:
+ /* Push it down */
+ subquery_push_qual(subquery, rte, rti, clause);
+ break;
+
+ case PUSHDOWN_WINDOWCLAUSE_RUNCOND:
+
+ /*
+ * Since we can't push the qual down into the subquery,
+ * check if it happens to reference a window function. If
+ * so then it might be useful to use for the WindowAgg's
+ * runCondition.
+ */
+ if (!subquery->hasWindowFuncs ||
+ check_and_push_window_quals(subquery, rte, rti, clause,
+ &run_cond_attrs))
+ {
+ /*
+ * subquery has no window funcs or the clause is not a
+ * suitable window run condition qual or it is, but
+ * the original must also be kept in the upper query.
+ */
+ upperrestrictlist = lappend(upperrestrictlist, rinfo);
+ }
+ break;
+
+ case PUSHDOWN_UNSAFE:
+ upperrestrictlist = lappend(upperrestrictlist, rinfo);
+ break;
+ }
+ }
+ rel->baserestrictinfo = upperrestrictlist;
+ /* We don't bother recomputing baserestrict_min_security */
+ }
+
+ pfree(safetyInfo.unsafeFlags);
+
+ /*
+ * The upper query might not use all the subquery's output columns; if
+ * not, we can simplify. Pass the attributes that were pushed down into
+ * WindowAgg run conditions to ensure we don't accidentally think those
+ * are unused.
+ */
+ remove_unused_subquery_outputs(subquery, rel, run_cond_attrs);
+
+ /*
+ * We can safely pass the outer tuple_fraction down to the subquery if the
+ * outer level has no joining, aggregation, or sorting to do. Otherwise
+ * we'd better tell the subquery to plan for full retrieval. (XXX This
+ * could probably be made more intelligent ...)
+ */
+ if (parse->hasAggs ||
+ parse->groupClause ||
+ parse->groupingSets ||
+ parse->havingQual ||
+ parse->distinctClause ||
+ parse->sortClause ||
+ has_multiple_baserels(root))
+ tuple_fraction = 0.0; /* default case */
+ else
+ tuple_fraction = root->tuple_fraction;
+
+ /* plan_params should not be in use in current query level */
+ Assert(root->plan_params == NIL);
+
+ /* Generate a subroot and Paths for the subquery */
+ rel->subroot = subquery_planner(root->glob, subquery,
+ root,
+ false, tuple_fraction);
+
+ /* Isolate the params needed by this specific subplan */
+ rel->subplan_params = root->plan_params;
+ root->plan_params = NIL;
+
+ /*
+ * It's possible that constraint exclusion proved the subquery empty. If
+ * so, it's desirable to produce an unadorned dummy path so that we will
+ * recognize appropriate optimizations at this query level.
+ */
+ sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
+
+ if (IS_DUMMY_REL(sub_final_rel))
+ {
+ set_dummy_rel_pathlist(rel);
+ return;
+ }
+
+ /*
+ * Mark rel with estimated output rows, width, etc. Note that we have to
+ * do this before generating outer-query paths, else cost_subqueryscan is
+ * not happy.
+ */
+ set_subquery_size_estimates(root, rel);
+
+ /*
+ * For each Path that subquery_planner produced, make a SubqueryScanPath
+ * in the outer query.
+ */
+ foreach(lc, sub_final_rel->pathlist)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+ List *pathkeys;
+
+ /* Convert subpath's pathkeys to outer representation */
+ pathkeys = convert_subquery_pathkeys(root,
+ rel,
+ subpath->pathkeys,
+ make_tlist_from_pathtarget(subpath->pathtarget));
+
+ /* Generate outer path using this subpath */
+ add_path(rel, (Path *)
+ create_subqueryscan_path(root, rel, subpath,
+ pathkeys, required_outer));
+ }
+
+ /* If outer rel allows parallelism, do same for partial paths. */
+ if (rel->consider_parallel && bms_is_empty(required_outer))
+ {
+ /* If consider_parallel is false, there should be no partial paths. */
+ Assert(sub_final_rel->consider_parallel ||
+ sub_final_rel->partial_pathlist == NIL);
+
+ /* Same for partial paths. */
+ foreach(lc, sub_final_rel->partial_pathlist)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+ List *pathkeys;
+
+ /* Convert subpath's pathkeys to outer representation */
+ pathkeys = convert_subquery_pathkeys(root,
+ rel,
+ subpath->pathkeys,
+ make_tlist_from_pathtarget(subpath->pathtarget));
+
+ /* Generate outer path using this subpath */
+ add_partial_path(rel, (Path *)
+ create_subqueryscan_path(root, rel, subpath,
+ pathkeys,
+ required_outer));
+ }
+ }
+}
+
+/*
+ * set_function_pathlist
+ * Build the (single) access path for a function RTE
+ */
+static void
+set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Relids required_outer;
+ List *pathkeys = NIL;
+
+ /*
+ * We don't support pushing join clauses into the quals of a function
+ * scan, but it could still have required parameterization due to LATERAL
+ * refs in the function expression.
+ */
+ required_outer = rel->lateral_relids;
+
+ /*
+ * The result is considered unordered unless ORDINALITY was used, in which
+ * case it is ordered by the ordinal column (the last one). See if we
+ * care, by checking for uses of that Var in equivalence classes.
+ */
+ if (rte->funcordinality)
+ {
+ AttrNumber ordattno = rel->max_attr;
+ Var *var = NULL;
+ ListCell *lc;
+
+ /*
+ * Is there a Var for it in rel's targetlist? If not, the query did
+ * not reference the ordinality column, or at least not in any way
+ * that would be interesting for sorting.
+ */
+ foreach(lc, rel->reltarget->exprs)
+ {
+ Var *node = (Var *) lfirst(lc);
+
+ /* checking varno/varlevelsup is just paranoia */
+ if (IsA(node, Var) &&
+ node->varattno == ordattno &&
+ node->varno == rel->relid &&
+ node->varlevelsup == 0)
+ {
+ var = node;
+ break;
+ }
+ }
+
+ /*
+ * Try to build pathkeys for this Var with int8 sorting. We tell
+ * build_expression_pathkey not to build any new equivalence class; if
+ * the Var isn't already mentioned in some EC, it means that nothing
+ * cares about the ordering.
+ */
+ if (var)
+ pathkeys = build_expression_pathkey(root,
+ (Expr *) var,
+ NULL, /* below outer joins */
+ Int8LessOperator,
+ rel->relids,
+ false);
+ }
+
+ /* Generate appropriate path */
+ add_path(rel, create_functionscan_path(root, rel,
+ pathkeys, required_outer));
+}
+
+/*
+ * set_values_pathlist
+ * Build the (single) access path for a VALUES RTE
+ */
+static void
+set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Relids required_outer;
+
+ /*
+ * We don't support pushing join clauses into the quals of a values scan,
+ * but it could still have required parameterization due to LATERAL refs
+ * in the values expressions.
+ */
+ required_outer = rel->lateral_relids;
+
+ /* Generate appropriate path */
+ add_path(rel, create_valuesscan_path(root, rel, required_outer));
+}
+
+/*
+ * set_tablefunc_pathlist
+ * Build the (single) access path for a table func RTE
+ */
+static void
+set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Relids required_outer;
+
+ /*
+ * We don't support pushing join clauses into the quals of a tablefunc
+ * scan, but it could still have required parameterization due to LATERAL
+ * refs in the function expression.
+ */
+ required_outer = rel->lateral_relids;
+
+ /* Generate appropriate path */
+ add_path(rel, create_tablefuncscan_path(root, rel,
+ required_outer));
+}
+
+/*
+ * set_cte_pathlist
+ * Build the (single) access path for a non-self-reference CTE RTE
+ *
+ * There's no need for a separate set_cte_size phase, since we don't
+ * support join-qual-parameterized paths for CTEs.
+ */
+static void
+set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Plan *cteplan;
+ PlannerInfo *cteroot;
+ Index levelsup;
+ int ndx;
+ ListCell *lc;
+ int plan_id;
+ Relids required_outer;
+
+ /*
+ * Find the referenced CTE, and locate the plan previously made for it.
+ */
+ levelsup = rte->ctelevelsup;
+ cteroot = root;
+ while (levelsup-- > 0)
+ {
+ cteroot = cteroot->parent_root;
+ if (!cteroot) /* shouldn't happen */
+ elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
+ }
+
+ /*
+ * Note: cte_plan_ids can be shorter than cteList, if we are still working
+ * on planning the CTEs (ie, this is a side-reference from another CTE).
+ * So we mustn't use forboth here.
+ */
+ ndx = 0;
+ foreach(lc, cteroot->parse->cteList)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
+
+ if (strcmp(cte->ctename, rte->ctename) == 0)
+ break;
+ ndx++;
+ }
+ if (lc == NULL) /* shouldn't happen */
+ elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
+ if (ndx >= list_length(cteroot->cte_plan_ids))
+ elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
+ plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
+ if (plan_id <= 0)
+ elog(ERROR, "no plan was made for CTE \"%s\"", rte->ctename);
+ cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
+
+ /* Mark rel with estimated output rows, width, etc */
+ set_cte_size_estimates(root, rel, cteplan->plan_rows);
+
+ /*
+ * We don't support pushing join clauses into the quals of a CTE scan, but
+ * it could still have required parameterization due to LATERAL refs in
+ * its tlist.
+ */
+ required_outer = rel->lateral_relids;
+
+ /* Generate appropriate path */
+ add_path(rel, create_ctescan_path(root, rel, required_outer));
+}
+
+/*
+ * set_namedtuplestore_pathlist
+ * Build the (single) access path for a named tuplestore RTE
+ *
+ * There's no need for a separate set_namedtuplestore_size phase, since we
+ * don't support join-qual-parameterized paths for tuplestores.
+ */
+static void
+set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte)
+{
+ Relids required_outer;
+
+ /* Mark rel with estimated output rows, width, etc */
+ set_namedtuplestore_size_estimates(root, rel);
+
+ /*
+ * We don't support pushing join clauses into the quals of a tuplestore
+ * scan, but it could still have required parameterization due to LATERAL
+ * refs in its tlist.
+ */
+ required_outer = rel->lateral_relids;
+
+ /* Generate appropriate path */
+ add_path(rel, create_namedtuplestorescan_path(root, rel, required_outer));
+
+ /* Select cheapest path (pretty easy in this case...) */
+ set_cheapest(rel);
+}
+
+/*
+ * set_result_pathlist
+ * Build the (single) access path for an RTE_RESULT RTE
+ *
+ * There's no need for a separate set_result_size phase, since we
+ * don't support join-qual-parameterized paths for these RTEs.
+ */
+static void
+set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte)
+{
+ Relids required_outer;
+
+ /* Mark rel with estimated output rows, width, etc */
+ set_result_size_estimates(root, rel);
+
+ /*
+ * We don't support pushing join clauses into the quals of a Result scan,
+ * but it could still have required parameterization due to LATERAL refs
+ * in its tlist.
+ */
+ required_outer = rel->lateral_relids;
+
+ /* Generate appropriate path */
+ add_path(rel, create_resultscan_path(root, rel, required_outer));
+
+ /* Select cheapest path (pretty easy in this case...) */
+ set_cheapest(rel);
+}
+
+/*
+ * set_worktable_pathlist
+ * Build the (single) access path for a self-reference CTE RTE
+ *
+ * There's no need for a separate set_worktable_size phase, since we don't
+ * support join-qual-parameterized paths for CTEs.
+ */
+static void
+set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Path *ctepath;
+ PlannerInfo *cteroot;
+ Index levelsup;
+ Relids required_outer;
+
+ /*
+ * We need to find the non-recursive term's path, which is in the plan
+ * level that's processing the recursive UNION, which is one level *below*
+ * where the CTE comes from.
+ */
+ levelsup = rte->ctelevelsup;
+ if (levelsup == 0) /* shouldn't happen */
+ elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
+ levelsup--;
+ cteroot = root;
+ while (levelsup-- > 0)
+ {
+ cteroot = cteroot->parent_root;
+ if (!cteroot) /* shouldn't happen */
+ elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
+ }
+ ctepath = cteroot->non_recursive_path;
+ if (!ctepath) /* shouldn't happen */
+ elog(ERROR, "could not find path for CTE \"%s\"", rte->ctename);
+
+ /* Mark rel with estimated output rows, width, etc */
+ set_cte_size_estimates(root, rel, ctepath->rows);
+
+ /*
+ * We don't support pushing join clauses into the quals of a worktable
+ * scan, but it could still have required parameterization due to LATERAL
+ * refs in its tlist. (I'm not sure this is actually possible given the
+ * restrictions on recursive references, but it's easy enough to support.)
+ */
+ required_outer = rel->lateral_relids;
+
+ /* Generate appropriate path */
+ add_path(rel, create_worktablescan_path(root, rel, required_outer));
+}
+
+/*
+ * generate_gather_paths
+ * Generate parallel access paths for a relation by pushing a Gather or
+ * Gather Merge on top of a partial path.
+ *
+ * This must not be called until after we're done creating all partial paths
+ * for the specified relation. (Otherwise, add_partial_path might delete a
+ * path that some GatherPath or GatherMergePath has a reference to.)
+ *
+ * If we're generating paths for a scan or join relation, override_rows will
+ * be false, and we'll just use the relation's size estimate. When we're
+ * being called for a partially-grouped path, though, we need to override
+ * the rowcount estimate. (It's not clear that the particular value we're
+ * using here is actually best, but the underlying rel has no estimate so
+ * we must do something.)
+ */
+void
+generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
+{
+ Path *cheapest_partial_path;
+ Path *simple_gather_path;
+ ListCell *lc;
+ double rows;
+ double *rowsp = NULL;
+
+ /* If there are no partial paths, there's nothing to do here. */
+ if (rel->partial_pathlist == NIL)
+ return;
+
+ /* Should we override the rel's rowcount estimate? */
+ if (override_rows)
+ rowsp = &rows;
+
+ /*
+ * The output of Gather is always unsorted, so there's only one partial
+ * path of interest: the cheapest one. That will be the one at the front
+ * of partial_pathlist because of the way add_partial_path works.
+ */
+ cheapest_partial_path = linitial(rel->partial_pathlist);
+ rows =
+ cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
+ simple_gather_path = (Path *)
+ create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
+ NULL, rowsp);
+ add_path(rel, simple_gather_path);
+
+ /*
+ * For each useful ordering, we can consider an order-preserving Gather
+ * Merge.
+ */
+ foreach(lc, rel->partial_pathlist)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+ GatherMergePath *path;
+
+ if (subpath->pathkeys == NIL)
+ continue;
+
+ rows = subpath->rows * subpath->parallel_workers;
+ path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
+ subpath->pathkeys, NULL, rowsp);
+ add_path(rel, &path->path);
+ }
+}
+
+/*
+ * get_useful_pathkeys_for_relation
+ * Determine which orderings of a relation might be useful.
+ *
+ * Getting data in sorted order can be useful either because the requested
+ * order matches the final output ordering for the overall query we're
+ * planning, or because it enables an efficient merge join. Here, we try
+ * to figure out which pathkeys to consider.
+ *
+ * This allows us to do incremental sort on top of an index scan under a gather
+ * merge node, i.e. parallelized.
+ *
+ * If the require_parallel_safe is true, we also require the expressions to
+ * be parallel safe (which allows pushing the sort below Gather Merge).
+ *
+ * XXX At the moment this can only ever return a list with a single element,
+ * because it looks at query_pathkeys only. So we might return the pathkeys
+ * directly, but it seems plausible we'll want to consider other orderings
+ * in the future. For example, we might want to consider pathkeys useful for
+ * merge joins.
+ */
+static List *
+get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel,
+ bool require_parallel_safe)
+{
+ List *useful_pathkeys_list = NIL;
+
+ /*
+ * Considering query_pathkeys is always worth it, because it might allow
+ * us to avoid a total sort when we have a partially presorted path
+ * available or to push the total sort into the parallel portion of the
+ * query.
+ */
+ if (root->query_pathkeys)
+ {
+ ListCell *lc;
+ int npathkeys = 0; /* useful pathkeys */
+
+ foreach(lc, root->query_pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(lc);
+ EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
+
+ /*
+ * We can only build a sort for pathkeys that contain a
+ * safe-to-compute-early EC member computable from the current
+ * relation's reltarget, so ignore the remainder of the list as
+ * soon as we find a pathkey without such a member.
+ *
+ * It's still worthwhile to return any prefix of the pathkeys list
+ * that meets this requirement, as we may be able to do an
+ * incremental sort.
+ *
+ * If requested, ensure the sort expression is parallel-safe too.
+ */
+ if (!relation_can_be_sorted_early(root, rel, pathkey_ec,
+ require_parallel_safe))
+ break;
+
+ npathkeys++;
+ }
+
+ /*
+ * The whole query_pathkeys list matches, so append it directly, to
+ * allow comparing pathkeys easily by comparing list pointer. If we
+ * have to truncate the pathkeys, we gotta do a copy though.
+ */
+ if (npathkeys == list_length(root->query_pathkeys))
+ useful_pathkeys_list = lappend(useful_pathkeys_list,
+ root->query_pathkeys);
+ else if (npathkeys > 0)
+ useful_pathkeys_list = lappend(useful_pathkeys_list,
+ list_truncate(list_copy(root->query_pathkeys),
+ npathkeys));
+ }
+
+ return useful_pathkeys_list;
+}
+
+/*
+ * generate_useful_gather_paths
+ * Generate parallel access paths for a relation by pushing a Gather or
+ * Gather Merge on top of a partial path.
+ *
+ * Unlike plain generate_gather_paths, this looks both at pathkeys of input
+ * paths (aiming to preserve the ordering), but also considers ordering that
+ * might be useful for nodes above the gather merge node, and tries to add
+ * a sort (regular or incremental) to provide that.
+ */
+void
+generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
+{
+ ListCell *lc;
+ double rows;
+ double *rowsp = NULL;
+ List *useful_pathkeys_list = NIL;
+ Path *cheapest_partial_path = NULL;
+
+ /* If there are no partial paths, there's nothing to do here. */
+ if (rel->partial_pathlist == NIL)
+ return;
+
+ /* Should we override the rel's rowcount estimate? */
+ if (override_rows)
+ rowsp = &rows;
+
+ /* generate the regular gather (merge) paths */
+ generate_gather_paths(root, rel, override_rows);
+
+ /* consider incremental sort for interesting orderings */
+ useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel, true);
+
+ /* used for explicit (full) sort paths */
+ cheapest_partial_path = linitial(rel->partial_pathlist);
+
+ /*
+ * Consider sorted paths for each interesting ordering. We generate both
+ * incremental and full sort.
+ */
+ foreach(lc, useful_pathkeys_list)
+ {
+ List *useful_pathkeys = lfirst(lc);
+ ListCell *lc2;
+ bool is_sorted;
+ int presorted_keys;
+
+ foreach(lc2, rel->partial_pathlist)
+ {
+ Path *subpath = (Path *) lfirst(lc2);
+ GatherMergePath *path;
+
+ is_sorted = pathkeys_count_contained_in(useful_pathkeys,
+ subpath->pathkeys,
+ &presorted_keys);
+
+ /*
+ * We don't need to consider the case where a subpath is already
+ * fully sorted because generate_gather_paths already creates a
+ * gather merge path for every subpath that has pathkeys present.
+ *
+ * But since the subpath is already sorted, we know we don't need
+ * to consider adding a sort (full or incremental) on top of it,
+ * so we can continue here.
+ */
+ if (is_sorted)
+ continue;
+
+ /*
+ * Consider regular sort for the cheapest partial path (for each
+ * useful pathkeys). We know the path is not sorted, because we'd
+ * not get here otherwise.
+ *
+ * This is not redundant with the gather paths created in
+ * generate_gather_paths, because that doesn't generate ordered
+ * output. Here we add an explicit sort to match the useful
+ * ordering.
+ */
+ if (cheapest_partial_path == subpath)
+ {
+ Path *tmp;
+
+ tmp = (Path *) create_sort_path(root,
+ rel,
+ subpath,
+ useful_pathkeys,
+ -1.0);
+
+ rows = tmp->rows * tmp->parallel_workers;
+
+ path = create_gather_merge_path(root, rel,
+ tmp,
+ rel->reltarget,
+ tmp->pathkeys,
+ NULL,
+ rowsp);
+
+ add_path(rel, &path->path);
+
+ /* Fall through */
+ }
+
+ /*
+ * Consider incremental sort, but only when the subpath is already
+ * partially sorted on a pathkey prefix.
+ */
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ Path *tmp;
+
+ /*
+ * We should have already excluded pathkeys of length 1
+ * because then presorted_keys > 0 would imply is_sorted was
+ * true.
+ */
+ Assert(list_length(useful_pathkeys) != 1);
+
+ tmp = (Path *) create_incremental_sort_path(root,
+ rel,
+ subpath,
+ useful_pathkeys,
+ presorted_keys,
+ -1);
+
+ path = create_gather_merge_path(root, rel,
+ tmp,
+ rel->reltarget,
+ tmp->pathkeys,
+ NULL,
+ rowsp);
+
+ add_path(rel, &path->path);
+ }
+ }
+ }
+}
+
+/*
+ * make_rel_from_joinlist
+ * Build access paths using a "joinlist" to guide the join path search.
+ *
+ * See comments for deconstruct_jointree() for definition of the joinlist
+ * data structure.
+ */
+static RelOptInfo *
+make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
+{
+ int levels_needed;
+ List *initial_rels;
+ ListCell *jl;
+
+ /*
+ * Count the number of child joinlist nodes. This is the depth of the
+ * dynamic-programming algorithm we must employ to consider all ways of
+ * joining the child nodes.
+ */
+ levels_needed = list_length(joinlist);
+
+ if (levels_needed <= 0)
+ return NULL; /* nothing to do? */
+
+ /*
+ * Construct a list of rels corresponding to the child joinlist nodes.
+ * This may contain both base rels and rels constructed according to
+ * sub-joinlists.
+ */
+ initial_rels = NIL;
+ foreach(jl, joinlist)
+ {
+ Node *jlnode = (Node *) lfirst(jl);
+ RelOptInfo *thisrel;
+
+ if (IsA(jlnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jlnode)->rtindex;
+
+ thisrel = find_base_rel(root, varno);
+ }
+ else if (IsA(jlnode, List))
+ {
+ /* Recurse to handle subproblem */
+ thisrel = make_rel_from_joinlist(root, (List *) jlnode);
+ }
+ else
+ {
+ elog(ERROR, "unrecognized joinlist node type: %d",
+ (int) nodeTag(jlnode));
+ thisrel = NULL; /* keep compiler quiet */
+ }
+
+ initial_rels = lappend(initial_rels, thisrel);
+ }
+
+ if (levels_needed == 1)
+ {
+ /*
+ * Single joinlist node, so we're done.
+ */
+ return (RelOptInfo *) linitial(initial_rels);
+ }
+ else
+ {
+ /*
+ * Consider the different orders in which we could join the rels,
+ * using a plugin, GEQO, or the regular join search code.
+ *
+ * We put the initial_rels list into a PlannerInfo field because
+ * has_legal_joinclause() needs to look at it (ugly :-().
+ */
+ root->initial_rels = initial_rels;
+
+ if (join_search_hook)
+ return (*join_search_hook) (root, levels_needed, initial_rels);
+ else if (enable_geqo && levels_needed >= geqo_threshold)
+ return geqo(root, levels_needed, initial_rels);
+ else
+ return standard_join_search(root, levels_needed, initial_rels);
+ }
+}
+
+/*
+ * standard_join_search
+ * Find possible joinpaths for a query by successively finding ways
+ * to join component relations into join relations.
+ *
+ * 'levels_needed' is the number of iterations needed, ie, the number of
+ * independent jointree items in the query. This is > 1.
+ *
+ * 'initial_rels' is a list of RelOptInfo nodes for each independent
+ * jointree item. These are the components to be joined together.
+ * Note that levels_needed == list_length(initial_rels).
+ *
+ * Returns the final level of join relations, i.e., the relation that is
+ * the result of joining all the original relations together.
+ * At least one implementation path must be provided for this relation and
+ * all required sub-relations.
+ *
+ * To support loadable plugins that modify planner behavior by changing the
+ * join searching algorithm, we provide a hook variable that lets a plugin
+ * replace or supplement this function. Any such hook must return the same
+ * final join relation as the standard code would, but it might have a
+ * different set of implementation paths attached, and only the sub-joinrels
+ * needed for these paths need have been instantiated.
+ *
+ * Note to plugin authors: the functions invoked during standard_join_search()
+ * modify root->join_rel_list and root->join_rel_hash. If you want to do more
+ * than one join-order search, you'll probably need to save and restore the
+ * original states of those data structures. See geqo_eval() for an example.
+ */
+RelOptInfo *
+standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
+{
+ int lev;
+ RelOptInfo *rel;
+
+ /*
+ * This function cannot be invoked recursively within any one planning
+ * problem, so join_rel_level[] can't be in use already.
+ */
+ Assert(root->join_rel_level == NULL);
+
+ /*
+ * We employ a simple "dynamic programming" algorithm: we first find all
+ * ways to build joins of two jointree items, then all ways to build joins
+ * of three items (from two-item joins and single items), then four-item
+ * joins, and so on until we have considered all ways to join all the
+ * items into one rel.
+ *
+ * root->join_rel_level[j] is a list of all the j-item rels. Initially we
+ * set root->join_rel_level[1] to represent all the single-jointree-item
+ * relations.
+ */
+ root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
+
+ root->join_rel_level[1] = initial_rels;
+
+ for (lev = 2; lev <= levels_needed; lev++)
+ {
+ ListCell *lc;
+
+ /*
+ * Determine all possible pairs of relations to be joined at this
+ * level, and build paths for making each one from every available
+ * pair of lower-level relations.
+ */
+ join_search_one_level(root, lev);
+
+ /*
+ * Run generate_partitionwise_join_paths() and
+ * generate_useful_gather_paths() for each just-processed joinrel. We
+ * could not do this earlier because both regular and partial paths
+ * can get added to a particular joinrel at multiple times within
+ * join_search_one_level.
+ *
+ * After that, we're done creating paths for the joinrel, so run
+ * set_cheapest().
+ */
+ foreach(lc, root->join_rel_level[lev])
+ {
+ rel = (RelOptInfo *) lfirst(lc);
+
+ /* Create paths for partitionwise joins. */
+ generate_partitionwise_join_paths(root, rel);
+
+ /*
+ * Except for the topmost scan/join rel, consider gathering
+ * partial paths. We'll do the same for the topmost scan/join rel
+ * once we know the final targetlist (see grouping_planner).
+ */
+ if (!bms_equal(rel->relids, root->all_baserels))
+ generate_useful_gather_paths(root, rel, false);
+
+ /* Find and save the cheapest paths for this rel */
+ set_cheapest(rel);
+
+#ifdef OPTIMIZER_DEBUG
+ debug_print_rel(root, rel);
+#endif
+ }
+ }
+
+ /*
+ * We should have a single rel at the final level.
+ */
+ if (root->join_rel_level[levels_needed] == NIL)
+ elog(ERROR, "failed to build any %d-way joins", levels_needed);
+ Assert(list_length(root->join_rel_level[levels_needed]) == 1);
+
+ rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
+
+ root->join_rel_level = NULL;
+
+ return rel;
+}
+
+/*****************************************************************************
+ * PUSHING QUALS DOWN INTO SUBQUERIES
+ *****************************************************************************/
+
+/*
+ * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
+ *
+ * subquery is the particular component query being checked. topquery
+ * is the top component of a set-operations tree (the same Query if no
+ * set-op is involved).
+ *
+ * Conditions checked here:
+ *
+ * 1. If the subquery has a LIMIT clause, we must not push down any quals,
+ * since that could change the set of rows returned.
+ *
+ * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
+ * quals into it, because that could change the results.
+ *
+ * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it.
+ * This is because upper-level quals should semantically be evaluated only
+ * once per distinct row, not once per original row, and if the qual is
+ * volatile then extra evaluations could change the results. (This issue
+ * does not apply to other forms of aggregation such as GROUP BY, because
+ * when those are present we push into HAVING not WHERE, so that the quals
+ * are still applied after aggregation.)
+ *
+ * 4. If the subquery contains window functions, we cannot push volatile quals
+ * into it. The issue here is a bit different from DISTINCT: a volatile qual
+ * might succeed for some rows of a window partition and fail for others,
+ * thereby changing the partition contents and thus the window functions'
+ * results for rows that remain.
+ *
+ * 5. If the subquery contains any set-returning functions in its targetlist,
+ * we cannot push volatile quals into it. That would push them below the SRFs
+ * and thereby change the number of times they are evaluated. Also, a
+ * volatile qual could succeed for some SRF output rows and fail for others,
+ * a behavior that cannot occur if it's evaluated before SRF expansion.
+ *
+ * 6. If the subquery has nonempty grouping sets, we cannot push down any
+ * quals. The concern here is that a qual referencing a "constant" grouping
+ * column could get constant-folded, which would be improper because the value
+ * is potentially nullable by grouping-set expansion. This restriction could
+ * be removed if we had a parsetree representation that shows that such
+ * grouping columns are not really constant. (There are other ideas that
+ * could be used to relax this restriction, but that's the approach most
+ * likely to get taken in the future. Note that there's not much to be gained
+ * so long as subquery_planner can't move HAVING clauses to WHERE within such
+ * a subquery.)
+ *
+ * In addition, we make several checks on the subquery's output columns to see
+ * if it is safe to reference them in pushed-down quals. If output column k
+ * is found to be unsafe to reference, we set the reason for that inside
+ * safetyInfo->unsafeFlags[k], but we don't reject the subquery overall since
+ * column k might not be referenced by some/all quals. The unsafeFlags[]
+ * array will be consulted later by qual_is_pushdown_safe(). It's better to
+ * do it this way than to make the checks directly in qual_is_pushdown_safe(),
+ * because when the subquery involves set operations we have to check the
+ * output expressions in each arm of the set op.
+ *
+ * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
+ * we're effectively assuming that the quals cannot distinguish values that
+ * the DISTINCT's equality operator sees as equal, yet there are many
+ * counterexamples to that assumption. However use of such a qual with a
+ * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
+ * "equal" value will be chosen as the output value by the DISTINCT operation.
+ * So we don't worry too much about that. Another objection is that if the
+ * qual is expensive to evaluate, running it for each original row might cost
+ * more than we save by eliminating rows before the DISTINCT step. But it
+ * would be very hard to estimate that at this stage, and in practice pushdown
+ * seldom seems to make things worse, so we ignore that problem too.
+ *
+ * Note: likewise, pushing quals into a subquery with window functions is a
+ * bit dubious: the quals might remove some rows of a window partition while
+ * leaving others, causing changes in the window functions' results for the
+ * surviving rows. We insist that such a qual reference only partitioning
+ * columns, but again that only protects us if the qual does not distinguish
+ * values that the partitioning equality operator sees as equal. The risks
+ * here are perhaps larger than for DISTINCT, since no de-duplication of rows
+ * occurs and thus there is no theoretical problem with such a qual. But
+ * we'll do this anyway because the potential performance benefits are very
+ * large, and we've seen no field complaints about the longstanding comparable
+ * behavior with DISTINCT.
+ */
+static bool
+subquery_is_pushdown_safe(Query *subquery, Query *topquery,
+ pushdown_safety_info *safetyInfo)
+{
+ SetOperationStmt *topop;
+
+ /* Check point 1 */
+ if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
+ return false;
+
+ /* Check point 6 */
+ if (subquery->groupClause && subquery->groupingSets)
+ return false;
+
+ /* Check points 3, 4, and 5 */
+ if (subquery->distinctClause ||
+ subquery->hasWindowFuncs ||
+ subquery->hasTargetSRFs)
+ safetyInfo->unsafeVolatile = true;
+
+ /*
+ * If we're at a leaf query, check for unsafe expressions in its target
+ * list, and mark any reasons why they're unsafe in unsafeFlags[].
+ * (Non-leaf nodes in setop trees have only simple Vars in their tlists,
+ * so no need to check them.)
+ */
+ if (subquery->setOperations == NULL)
+ check_output_expressions(subquery, safetyInfo);
+
+ /* Are we at top level, or looking at a setop component? */
+ if (subquery == topquery)
+ {
+ /* Top level, so check any component queries */
+ if (subquery->setOperations != NULL)
+ if (!recurse_pushdown_safe(subquery->setOperations, topquery,
+ safetyInfo))
+ return false;
+ }
+ else
+ {
+ /* Setop component must not have more components (too weird) */
+ if (subquery->setOperations != NULL)
+ return false;
+ /* Check whether setop component output types match top level */
+ topop = castNode(SetOperationStmt, topquery->setOperations);
+ Assert(topop);
+ compare_tlist_datatypes(subquery->targetList,
+ topop->colTypes,
+ safetyInfo);
+ }
+ return true;
+}
+
+/*
+ * Helper routine to recurse through setOperations tree
+ */
+static bool
+recurse_pushdown_safe(Node *setOp, Query *topquery,
+ pushdown_safety_info *safetyInfo)
+{
+ if (IsA(setOp, RangeTblRef))
+ {
+ RangeTblRef *rtr = (RangeTblRef *) setOp;
+ RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
+ Query *subquery = rte->subquery;
+
+ Assert(subquery != NULL);
+ return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
+ }
+ else if (IsA(setOp, SetOperationStmt))
+ {
+ SetOperationStmt *op = (SetOperationStmt *) setOp;
+
+ /* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */
+ if (op->op == SETOP_EXCEPT)
+ return false;
+ /* Else recurse */
+ if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
+ return false;
+ if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
+ return false;
+ }
+ else
+ {
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(setOp));
+ }
+ return true;
+}
+
+/*
+ * check_output_expressions - check subquery's output expressions for safety
+ *
+ * There are several cases in which it's unsafe to push down an upper-level
+ * qual if it references a particular output column of a subquery. We check
+ * each output column of the subquery and set flags in unsafeFlags[k] when we
+ * see that column is unsafe for a pushed-down qual to reference. The
+ * conditions checked here are:
+ *
+ * 1. We must not push down any quals that refer to subselect outputs that
+ * return sets, else we'd introduce functions-returning-sets into the
+ * subquery's WHERE/HAVING quals.
+ *
+ * 2. We must not push down any quals that refer to subselect outputs that
+ * contain volatile functions, for fear of introducing strange results due
+ * to multiple evaluation of a volatile function.
+ *
+ * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
+ * refer to non-DISTINCT output columns, because that could change the set
+ * of rows returned. (This condition is vacuous for DISTINCT, because then
+ * there are no non-DISTINCT output columns, so we needn't check. Note that
+ * subquery_is_pushdown_safe already reported that we can't use volatile
+ * quals if there's DISTINCT or DISTINCT ON.)
+ *
+ * 4. If the subquery has any window functions, we must not push down quals
+ * that reference any output columns that are not listed in all the subquery's
+ * window PARTITION BY clauses. We can push down quals that use only
+ * partitioning columns because they should succeed or fail identically for
+ * every row of any one window partition, and totally excluding some
+ * partitions will not change a window function's results for remaining
+ * partitions. (Again, this also requires nonvolatile quals, but
+ * subquery_is_pushdown_safe handles that.). Subquery columns marked as
+ * unsafe for this reason can still have WindowClause run conditions pushed
+ * down.
+ */
+static void
+check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
+{
+ ListCell *lc;
+
+ foreach(lc, subquery->targetList)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+ if (tle->resjunk)
+ continue; /* ignore resjunk columns */
+
+ /* Functions returning sets are unsafe (point 1) */
+ if (subquery->hasTargetSRFs &&
+ (safetyInfo->unsafeFlags[tle->resno] &
+ UNSAFE_HAS_SET_FUNC) == 0 &&
+ expression_returns_set((Node *) tle->expr))
+ {
+ safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_HAS_SET_FUNC;
+ continue;
+ }
+
+ /* Volatile functions are unsafe (point 2) */
+ if ((safetyInfo->unsafeFlags[tle->resno] &
+ UNSAFE_HAS_VOLATILE_FUNC) == 0 &&
+ contain_volatile_functions((Node *) tle->expr))
+ {
+ safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_HAS_VOLATILE_FUNC;
+ continue;
+ }
+
+ /* If subquery uses DISTINCT ON, check point 3 */
+ if (subquery->hasDistinctOn &&
+ (safetyInfo->unsafeFlags[tle->resno] &
+ UNSAFE_NOTIN_DISTINCTON_CLAUSE) == 0 &&
+ !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
+ {
+ /* non-DISTINCT column, so mark it unsafe */
+ safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_NOTIN_DISTINCTON_CLAUSE;
+ continue;
+ }
+
+ /* If subquery uses window functions, check point 4 */
+ if (subquery->hasWindowFuncs &&
+ (safetyInfo->unsafeFlags[tle->resno] &
+ UNSAFE_NOTIN_DISTINCTON_CLAUSE) == 0 &&
+ !targetIsInAllPartitionLists(tle, subquery))
+ {
+ /* not present in all PARTITION BY clauses, so mark it unsafe */
+ safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_NOTIN_PARTITIONBY_CLAUSE;
+ continue;
+ }
+ }
+}
+
+/*
+ * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
+ * push quals into each component query, but the quals can only reference
+ * subquery columns that suffer no type coercions in the set operation.
+ * Otherwise there are possible semantic gotchas. So, we check the
+ * component queries to see if any of them have output types different from
+ * the top-level setop outputs. We set the UNSAFE_TYPE_MISMATCH bit in
+ * unsafeFlags[k] if column k has different type in any component.
+ *
+ * We don't have to care about typmods here: the only allowed difference
+ * between set-op input and output typmods is input is a specific typmod
+ * and output is -1, and that does not require a coercion.
+ *
+ * tlist is a subquery tlist.
+ * colTypes is an OID list of the top-level setop's output column types.
+ * safetyInfo is the pushdown_safety_info to set unsafeFlags[] for.
+ */
+static void
+compare_tlist_datatypes(List *tlist, List *colTypes,
+ pushdown_safety_info *safetyInfo)
+{
+ ListCell *l;
+ ListCell *colType = list_head(colTypes);
+
+ foreach(l, tlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+ if (tle->resjunk)
+ continue; /* ignore resjunk columns */
+ if (colType == NULL)
+ elog(ERROR, "wrong number of tlist entries");
+ if (exprType((Node *) tle->expr) != lfirst_oid(colType))
+ safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_TYPE_MISMATCH;
+ colType = lnext(colTypes, colType);
+ }
+ if (colType != NULL)
+ elog(ERROR, "wrong number of tlist entries");
+}
+
+/*
+ * targetIsInAllPartitionLists
+ * True if the TargetEntry is listed in the PARTITION BY clause
+ * of every window defined in the query.
+ *
+ * It would be safe to ignore windows not actually used by any window
+ * function, but it's not easy to get that info at this stage; and it's
+ * unlikely to be useful to spend any extra cycles getting it, since
+ * unreferenced window definitions are probably infrequent in practice.
+ */
+static bool
+targetIsInAllPartitionLists(TargetEntry *tle, Query *query)
+{
+ ListCell *lc;
+
+ foreach(lc, query->windowClause)
+ {
+ WindowClause *wc = (WindowClause *) lfirst(lc);
+
+ if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause))
+ return false;
+ }
+ return true;
+}
+
+/*
+ * qual_is_pushdown_safe - is a particular rinfo safe to push down?
+ *
+ * rinfo is a restriction clause applying to the given subquery (whose RTE
+ * has index rti in the parent query).
+ *
+ * Conditions checked here:
+ *
+ * 1. rinfo's clause must not contain any SubPlans (mainly because it's
+ * unclear that it will work correctly: SubLinks will already have been
+ * transformed into SubPlans in the qual, but not in the subquery). Note that
+ * SubLinks that transform to initplans are safe, and will be accepted here
+ * because what we'll see in the qual is just a Param referencing the initplan
+ * output.
+ *
+ * 2. If unsafeVolatile is set, rinfo's clause must not contain any volatile
+ * functions.
+ *
+ * 3. If unsafeLeaky is set, rinfo's clause must not contain any leaky
+ * functions that are passed Var nodes, and therefore might reveal values from
+ * the subquery as side effects.
+ *
+ * 4. rinfo's clause must not refer to the whole-row output of the subquery
+ * (since there is no easy way to name that within the subquery itself).
+ *
+ * 5. rinfo's clause must not refer to any subquery output columns that were
+ * found to be unsafe to reference by subquery_is_pushdown_safe().
+ */
+static pushdown_safe_type
+qual_is_pushdown_safe(Query *subquery, Index rti, RestrictInfo *rinfo,
+ pushdown_safety_info *safetyInfo)
+{
+ pushdown_safe_type safe = PUSHDOWN_SAFE;
+ Node *qual = (Node *) rinfo->clause;
+ List *vars;
+ ListCell *vl;
+
+ /* Refuse subselects (point 1) */
+ if (contain_subplans(qual))
+ return PUSHDOWN_UNSAFE;
+
+ /* Refuse volatile quals if we found they'd be unsafe (point 2) */
+ if (safetyInfo->unsafeVolatile &&
+ contain_volatile_functions((Node *) rinfo))
+ return PUSHDOWN_UNSAFE;
+
+ /* Refuse leaky quals if told to (point 3) */
+ if (safetyInfo->unsafeLeaky &&
+ contain_leaked_vars(qual))
+ return PUSHDOWN_UNSAFE;
+
+ /*
+ * It would be unsafe to push down window function calls, but at least for
+ * the moment we could never see any in a qual anyhow. (The same applies
+ * to aggregates, which we check for in pull_var_clause below.)
+ */
+ Assert(!contain_window_function(qual));
+
+ /*
+ * Examine all Vars used in clause. Since it's a restriction clause, all
+ * such Vars must refer to subselect output columns ... unless this is
+ * part of a LATERAL subquery, in which case there could be lateral
+ * references.
+ */
+ vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS);
+ foreach(vl, vars)
+ {
+ Var *var = (Var *) lfirst(vl);
+
+ /*
+ * XXX Punt if we find any PlaceHolderVars in the restriction clause.
+ * It's not clear whether a PHV could safely be pushed down, and even
+ * less clear whether such a situation could arise in any cases of
+ * practical interest anyway. So for the moment, just refuse to push
+ * down.
+ */
+ if (!IsA(var, Var))
+ {
+ safe = PUSHDOWN_UNSAFE;
+ break;
+ }
+
+ /*
+ * Punt if we find any lateral references. It would be safe to push
+ * these down, but we'd have to convert them into outer references,
+ * which subquery_push_qual lacks the infrastructure to do. The case
+ * arises so seldom that it doesn't seem worth working hard on.
+ */
+ if (var->varno != rti)
+ {
+ safe = PUSHDOWN_UNSAFE;
+ break;
+ }
+
+ /* Subqueries have no system columns */
+ Assert(var->varattno >= 0);
+
+ /* Check point 4 */
+ if (var->varattno == 0)
+ {
+ safe = PUSHDOWN_UNSAFE;
+ break;
+ }
+
+ /* Check point 5 */
+ if (safetyInfo->unsafeFlags[var->varattno] != 0)
+ {
+ if (safetyInfo->unsafeFlags[var->varattno] &
+ (UNSAFE_HAS_VOLATILE_FUNC | UNSAFE_HAS_SET_FUNC |
+ UNSAFE_NOTIN_DISTINCTON_CLAUSE | UNSAFE_TYPE_MISMATCH))
+ {
+ safe = PUSHDOWN_UNSAFE;
+ break;
+ }
+ else
+ {
+ /* UNSAFE_NOTIN_PARTITIONBY_CLAUSE is ok for run conditions */
+ safe = PUSHDOWN_WINDOWCLAUSE_RUNCOND;
+ /* don't break, we might find another Var that's unsafe */
+ }
+ }
+ }
+
+ list_free(vars);
+
+ return safe;
+}
+
+/*
+ * subquery_push_qual - push down a qual that we have determined is safe
+ */
+static void
+subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
+{
+ if (subquery->setOperations != NULL)
+ {
+ /* Recurse to push it separately to each component query */
+ recurse_push_qual(subquery->setOperations, subquery,
+ rte, rti, qual);
+ }
+ else
+ {
+ /*
+ * We need to replace Vars in the qual (which must refer to outputs of
+ * the subquery) with copies of the subquery's targetlist expressions.
+ * Note that at this point, any uplevel Vars in the qual should have
+ * been replaced with Params, so they need no work.
+ *
+ * This step also ensures that when we are pushing into a setop tree,
+ * each component query gets its own copy of the qual.
+ */
+ qual = ReplaceVarsFromTargetList(qual, rti, 0, rte,
+ subquery->targetList,
+ REPLACEVARS_REPORT_ERROR, 0,
+ &subquery->hasSubLinks);
+
+ /*
+ * Now attach the qual to the proper place: normally WHERE, but if the
+ * subquery uses grouping or aggregation, put it in HAVING (since the
+ * qual really refers to the group-result rows).
+ */
+ if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual)
+ subquery->havingQual = make_and_qual(subquery->havingQual, qual);
+ else
+ subquery->jointree->quals =
+ make_and_qual(subquery->jointree->quals, qual);
+
+ /*
+ * We need not change the subquery's hasAggs or hasSubLinks flags,
+ * since we can't be pushing down any aggregates that weren't there
+ * before, and we don't push down subselects at all.
+ */
+ }
+}
+
+/*
+ * Helper routine to recurse through setOperations tree
+ */
+static void
+recurse_push_qual(Node *setOp, Query *topquery,
+ RangeTblEntry *rte, Index rti, Node *qual)
+{
+ if (IsA(setOp, RangeTblRef))
+ {
+ RangeTblRef *rtr = (RangeTblRef *) setOp;
+ RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
+ Query *subquery = subrte->subquery;
+
+ Assert(subquery != NULL);
+ subquery_push_qual(subquery, rte, rti, qual);
+ }
+ else if (IsA(setOp, SetOperationStmt))
+ {
+ SetOperationStmt *op = (SetOperationStmt *) setOp;
+
+ recurse_push_qual(op->larg, topquery, rte, rti, qual);
+ recurse_push_qual(op->rarg, topquery, rte, rti, qual);
+ }
+ else
+ {
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(setOp));
+ }
+}
+
+/*****************************************************************************
+ * SIMPLIFYING SUBQUERY TARGETLISTS
+ *****************************************************************************/
+
+/*
+ * remove_unused_subquery_outputs
+ * Remove subquery targetlist items we don't need
+ *
+ * It's possible, even likely, that the upper query does not read all the
+ * output columns of the subquery. We can remove any such outputs that are
+ * not needed by the subquery itself (e.g., as sort/group columns) and do not
+ * affect semantics otherwise (e.g., volatile functions can't be removed).
+ * This is useful not only because we might be able to remove expensive-to-
+ * compute expressions, but because deletion of output columns might allow
+ * optimizations such as join removal to occur within the subquery.
+ *
+ * extra_used_attrs can be passed as non-NULL to mark any columns (offset by
+ * FirstLowInvalidHeapAttributeNumber) that we should not remove. This
+ * parameter is modifed by the function, so callers must make a copy if they
+ * need to use the passed in Bitmapset after calling this function.
+ *
+ * To avoid affecting column numbering in the targetlist, we don't physically
+ * remove unused tlist entries, but rather replace their expressions with NULL
+ * constants. This is implemented by modifying subquery->targetList.
+ */
+static void
+remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel,
+ Bitmapset *extra_used_attrs)
+{
+ Bitmapset *attrs_used;
+ ListCell *lc;
+
+ /*
+ * Just point directly to extra_used_attrs. No need to bms_copy as none of
+ * the current callers use the Bitmapset after calling this function.
+ */
+ attrs_used = extra_used_attrs;
+
+ /*
+ * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we
+ * could update all the child SELECTs' tlists, but it seems not worth the
+ * trouble presently.
+ */
+ if (subquery->setOperations)
+ return;
+
+ /*
+ * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
+ * time: all its output columns must be used in the distinctClause.
+ */
+ if (subquery->distinctClause && !subquery->hasDistinctOn)
+ return;
+
+ /*
+ * Collect a bitmap of all the output column numbers used by the upper
+ * query.
+ *
+ * Add all the attributes needed for joins or final output. Note: we must
+ * look at rel's targetlist, not the attr_needed data, because attr_needed
+ * isn't computed for inheritance child rels, cf set_append_rel_size().
+ * (XXX might be worth changing that sometime.)
+ */
+ pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &attrs_used);
+
+ /* Add all the attributes used by un-pushed-down restriction clauses. */
+ foreach(lc, rel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
+ }
+
+ /*
+ * If there's a whole-row reference to the subquery, we can't remove
+ * anything.
+ */
+ if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used))
+ return;
+
+ /*
+ * Run through the tlist and zap entries we don't need. It's okay to
+ * modify the tlist items in-place because set_subquery_pathlist made a
+ * copy of the subquery.
+ */
+ foreach(lc, subquery->targetList)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lc);
+ Node *texpr = (Node *) tle->expr;
+
+ /*
+ * If it has a sortgroupref number, it's used in some sort/group
+ * clause so we'd better not remove it. Also, don't remove any
+ * resjunk columns, since their reason for being has nothing to do
+ * with anybody reading the subquery's output. (It's likely that
+ * resjunk columns in a sub-SELECT would always have ressortgroupref
+ * set, but even if they don't, it seems imprudent to remove them.)
+ */
+ if (tle->ressortgroupref || tle->resjunk)
+ continue;
+
+ /*
+ * If it's used by the upper query, we can't remove it.
+ */
+ if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber,
+ attrs_used))
+ continue;
+
+ /*
+ * If it contains a set-returning function, we can't remove it since
+ * that could change the number of rows returned by the subquery.
+ */
+ if (subquery->hasTargetSRFs &&
+ expression_returns_set(texpr))
+ continue;
+
+ /*
+ * If it contains volatile functions, we daren't remove it for fear
+ * that the user is expecting their side-effects to happen.
+ */
+ if (contain_volatile_functions(texpr))
+ continue;
+
+ /*
+ * OK, we don't need it. Replace the expression with a NULL constant.
+ * Preserve the exposed type of the expression, in case something
+ * looks at the rowtype of the subquery's result.
+ */
+ tle->expr = (Expr *) makeNullConst(exprType(texpr),
+ exprTypmod(texpr),
+ exprCollation(texpr));
+ }
+}
+
+/*
+ * create_partial_bitmap_paths
+ * Build partial bitmap heap path for the relation
+ */
+void
+create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
+ Path *bitmapqual)
+{
+ int parallel_workers;
+ double pages_fetched;
+
+ /* Compute heap pages for bitmap heap scan */
+ pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0,
+ NULL, NULL);
+
+ parallel_workers = compute_parallel_worker(rel, pages_fetched, -1,
+ max_parallel_workers_per_gather);
+
+ if (parallel_workers <= 0)
+ return;
+
+ add_partial_path(rel, (Path *) create_bitmap_heap_path(root, rel,
+ bitmapqual, rel->lateral_relids, 1.0, parallel_workers));
+}
+
+/*
+ * Compute the number of parallel workers that should be used to scan a
+ * relation. We compute the parallel workers based on the size of the heap to
+ * be scanned and the size of the index to be scanned, then choose a minimum
+ * of those.
+ *
+ * "heap_pages" is the number of pages from the table that we expect to scan, or
+ * -1 if we don't expect to scan any.
+ *
+ * "index_pages" is the number of pages from the index that we expect to scan, or
+ * -1 if we don't expect to scan any.
+ *
+ * "max_workers" is caller's limit on the number of workers. This typically
+ * comes from a GUC.
+ */
+int
+compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages,
+ int max_workers)
+{
+ int parallel_workers = 0;
+
+ /*
+ * If the user has set the parallel_workers reloption, use that; otherwise
+ * select a default number of workers.
+ */
+ if (rel->rel_parallel_workers != -1)
+ parallel_workers = rel->rel_parallel_workers;
+ else
+ {
+ /*
+ * If the number of pages being scanned is insufficient to justify a
+ * parallel scan, just return zero ... unless it's an inheritance
+ * child. In that case, we want to generate a parallel path here
+ * anyway. It might not be worthwhile just for this relation, but
+ * when combined with all of its inheritance siblings it may well pay
+ * off.
+ */
+ if (rel->reloptkind == RELOPT_BASEREL &&
+ ((heap_pages >= 0 && heap_pages < min_parallel_table_scan_size) ||
+ (index_pages >= 0 && index_pages < min_parallel_index_scan_size)))
+ return 0;
+
+ if (heap_pages >= 0)
+ {
+ int heap_parallel_threshold;
+ int heap_parallel_workers = 1;
+
+ /*
+ * Select the number of workers based on the log of the size of
+ * the relation. This probably needs to be a good deal more
+ * sophisticated, but we need something here for now. Note that
+ * the upper limit of the min_parallel_table_scan_size GUC is
+ * chosen to prevent overflow here.
+ */
+ heap_parallel_threshold = Max(min_parallel_table_scan_size, 1);
+ while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3))
+ {
+ heap_parallel_workers++;
+ heap_parallel_threshold *= 3;
+ if (heap_parallel_threshold > INT_MAX / 3)
+ break; /* avoid overflow */
+ }
+
+ parallel_workers = heap_parallel_workers;
+ }
+
+ if (index_pages >= 0)
+ {
+ int index_parallel_workers = 1;
+ int index_parallel_threshold;
+
+ /* same calculation as for heap_pages above */
+ index_parallel_threshold = Max(min_parallel_index_scan_size, 1);
+ while (index_pages >= (BlockNumber) (index_parallel_threshold * 3))
+ {
+ index_parallel_workers++;
+ index_parallel_threshold *= 3;
+ if (index_parallel_threshold > INT_MAX / 3)
+ break; /* avoid overflow */
+ }
+
+ if (parallel_workers > 0)
+ parallel_workers = Min(parallel_workers, index_parallel_workers);
+ else
+ parallel_workers = index_parallel_workers;
+ }
+ }
+
+ /* In no case use more than caller supplied maximum number of workers */
+ parallel_workers = Min(parallel_workers, max_workers);
+
+ return parallel_workers;
+}
+
+/*
+ * generate_partitionwise_join_paths
+ * Create paths representing partitionwise join for given partitioned
+ * join relation.
+ *
+ * This must not be called until after we are done adding paths for all
+ * child-joins. Otherwise, add_path might delete a path to which some path
+ * generated here has a reference.
+ */
+void
+generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
+{
+ List *live_children = NIL;
+ int cnt_parts;
+ int num_parts;
+ RelOptInfo **part_rels;
+
+ /* Handle only join relations here. */
+ if (!IS_JOIN_REL(rel))
+ return;
+
+ /* We've nothing to do if the relation is not partitioned. */
+ if (!IS_PARTITIONED_REL(rel))
+ return;
+
+ /* The relation should have consider_partitionwise_join set. */
+ Assert(rel->consider_partitionwise_join);
+
+ /* Guard against stack overflow due to overly deep partition hierarchy. */
+ check_stack_depth();
+
+ num_parts = rel->nparts;
+ part_rels = rel->part_rels;
+
+ /* Collect non-dummy child-joins. */
+ for (cnt_parts = 0; cnt_parts < num_parts; cnt_parts++)
+ {
+ RelOptInfo *child_rel = part_rels[cnt_parts];
+
+ /* If it's been pruned entirely, it's certainly dummy. */
+ if (child_rel == NULL)
+ continue;
+
+ /* Make partitionwise join paths for this partitioned child-join. */
+ generate_partitionwise_join_paths(root, child_rel);
+
+ /* If we failed to make any path for this child, we must give up. */
+ if (child_rel->pathlist == NIL)
+ {
+ /*
+ * Mark the parent joinrel as unpartitioned so that later
+ * functions treat it correctly.
+ */
+ rel->nparts = 0;
+ return;
+ }
+
+ /* Else, identify the cheapest path for it. */
+ set_cheapest(child_rel);
+
+ /* Dummy children need not be scanned, so ignore those. */
+ if (IS_DUMMY_REL(child_rel))
+ continue;
+
+#ifdef OPTIMIZER_DEBUG
+ debug_print_rel(root, child_rel);
+#endif
+
+ live_children = lappend(live_children, child_rel);
+ }
+
+ /* If all child-joins are dummy, parent join is also dummy. */
+ if (!live_children)
+ {
+ mark_dummy_rel(rel);
+ return;
+ }
+
+ /* Build additional paths for this rel from child-join paths. */
+ add_paths_to_append_rel(root, rel, live_children);
+ list_free(live_children);
+}
+
+
+/*****************************************************************************
+ * DEBUG SUPPORT
+ *****************************************************************************/
+
+#ifdef OPTIMIZER_DEBUG
+
+static void
+print_relids(PlannerInfo *root, Relids relids)
+{
+ int x;
+ bool first = true;
+
+ x = -1;
+ while ((x = bms_next_member(relids, x)) >= 0)
+ {
+ if (!first)
+ printf(" ");
+ if (x < root->simple_rel_array_size &&
+ root->simple_rte_array[x])
+ printf("%s", root->simple_rte_array[x]->eref->aliasname);
+ else
+ printf("%d", x);
+ first = false;
+ }
+}
+
+static void
+print_restrictclauses(PlannerInfo *root, List *clauses)
+{
+ ListCell *l;
+
+ foreach(l, clauses)
+ {
+ RestrictInfo *c = lfirst(l);
+
+ print_expr((Node *) c->clause, root->parse->rtable);
+ if (lnext(clauses, l))
+ printf(", ");
+ }
+}
+
+static void
+print_path(PlannerInfo *root, Path *path, int indent)
+{
+ const char *ptype;
+ bool join = false;
+ Path *subpath = NULL;
+ int i;
+
+ switch (nodeTag(path))
+ {
+ case T_Path:
+ switch (path->pathtype)
+ {
+ case T_SeqScan:
+ ptype = "SeqScan";
+ break;
+ case T_SampleScan:
+ ptype = "SampleScan";
+ break;
+ case T_FunctionScan:
+ ptype = "FunctionScan";
+ break;
+ case T_TableFuncScan:
+ ptype = "TableFuncScan";
+ break;
+ case T_ValuesScan:
+ ptype = "ValuesScan";
+ break;
+ case T_CteScan:
+ ptype = "CteScan";
+ break;
+ case T_NamedTuplestoreScan:
+ ptype = "NamedTuplestoreScan";
+ break;
+ case T_Result:
+ ptype = "Result";
+ break;
+ case T_WorkTableScan:
+ ptype = "WorkTableScan";
+ break;
+ default:
+ ptype = "???Path";
+ break;
+ }
+ break;
+ case T_IndexPath:
+ ptype = "IdxScan";
+ break;
+ case T_BitmapHeapPath:
+ ptype = "BitmapHeapScan";
+ break;
+ case T_BitmapAndPath:
+ ptype = "BitmapAndPath";
+ break;
+ case T_BitmapOrPath:
+ ptype = "BitmapOrPath";
+ break;
+ case T_TidPath:
+ ptype = "TidScan";
+ break;
+ case T_TidRangePath:
+ ptype = "TidRangePath";
+ break;
+ case T_SubqueryScanPath:
+ ptype = "SubqueryScan";
+ break;
+ case T_ForeignPath:
+ ptype = "ForeignScan";
+ break;
+ case T_CustomPath:
+ ptype = "CustomScan";
+ break;
+ case T_NestPath:
+ ptype = "NestLoop";
+ join = true;
+ break;
+ case T_MergePath:
+ ptype = "MergeJoin";
+ join = true;
+ break;
+ case T_HashPath:
+ ptype = "HashJoin";
+ join = true;
+ break;
+ case T_AppendPath:
+ ptype = "Append";
+ break;
+ case T_MergeAppendPath:
+ ptype = "MergeAppend";
+ break;
+ case T_GroupResultPath:
+ ptype = "GroupResult";
+ break;
+ case T_MaterialPath:
+ ptype = "Material";
+ subpath = ((MaterialPath *) path)->subpath;
+ break;
+ case T_MemoizePath:
+ ptype = "Memoize";
+ subpath = ((MemoizePath *) path)->subpath;
+ break;
+ case T_UniquePath:
+ ptype = "Unique";
+ subpath = ((UniquePath *) path)->subpath;
+ break;
+ case T_GatherPath:
+ ptype = "Gather";
+ subpath = ((GatherPath *) path)->subpath;
+ break;
+ case T_GatherMergePath:
+ ptype = "GatherMerge";
+ subpath = ((GatherMergePath *) path)->subpath;
+ break;
+ case T_ProjectionPath:
+ ptype = "Projection";
+ subpath = ((ProjectionPath *) path)->subpath;
+ break;
+ case T_ProjectSetPath:
+ ptype = "ProjectSet";
+ subpath = ((ProjectSetPath *) path)->subpath;
+ break;
+ case T_SortPath:
+ ptype = "Sort";
+ subpath = ((SortPath *) path)->subpath;
+ break;
+ case T_IncrementalSortPath:
+ ptype = "IncrementalSort";
+ subpath = ((SortPath *) path)->subpath;
+ break;
+ case T_GroupPath:
+ ptype = "Group";
+ subpath = ((GroupPath *) path)->subpath;
+ break;
+ case T_UpperUniquePath:
+ ptype = "UpperUnique";
+ subpath = ((UpperUniquePath *) path)->subpath;
+ break;
+ case T_AggPath:
+ ptype = "Agg";
+ subpath = ((AggPath *) path)->subpath;
+ break;
+ case T_GroupingSetsPath:
+ ptype = "GroupingSets";
+ subpath = ((GroupingSetsPath *) path)->subpath;
+ break;
+ case T_MinMaxAggPath:
+ ptype = "MinMaxAgg";
+ break;
+ case T_WindowAggPath:
+ ptype = "WindowAgg";
+ subpath = ((WindowAggPath *) path)->subpath;
+ break;
+ case T_SetOpPath:
+ ptype = "SetOp";
+ subpath = ((SetOpPath *) path)->subpath;
+ break;
+ case T_RecursiveUnionPath:
+ ptype = "RecursiveUnion";
+ break;
+ case T_LockRowsPath:
+ ptype = "LockRows";
+ subpath = ((LockRowsPath *) path)->subpath;
+ break;
+ case T_ModifyTablePath:
+ ptype = "ModifyTable";
+ break;
+ case T_LimitPath:
+ ptype = "Limit";
+ subpath = ((LimitPath *) path)->subpath;
+ break;
+ default:
+ ptype = "???Path";
+ break;
+ }
+
+ for (i = 0; i < indent; i++)
+ printf("\t");
+ printf("%s", ptype);
+
+ if (path->parent)
+ {
+ printf("(");
+ print_relids(root, path->parent->relids);
+ printf(")");
+ }
+ if (path->param_info)
+ {
+ printf(" required_outer (");
+ print_relids(root, path->param_info->ppi_req_outer);
+ printf(")");
+ }
+ printf(" rows=%.0f cost=%.2f..%.2f\n",
+ path->rows, path->startup_cost, path->total_cost);
+
+ if (path->pathkeys)
+ {
+ for (i = 0; i < indent; i++)
+ printf("\t");
+ printf(" pathkeys: ");
+ print_pathkeys(path->pathkeys, root->parse->rtable);
+ }
+
+ if (join)
+ {
+ JoinPath *jp = (JoinPath *) path;
+
+ for (i = 0; i < indent; i++)
+ printf("\t");
+ printf(" clauses: ");
+ print_restrictclauses(root, jp->joinrestrictinfo);
+ printf("\n");
+
+ if (IsA(path, MergePath))
+ {
+ MergePath *mp = (MergePath *) path;
+
+ for (i = 0; i < indent; i++)
+ printf("\t");
+ printf(" sortouter=%d sortinner=%d materializeinner=%d\n",
+ ((mp->outersortkeys) ? 1 : 0),
+ ((mp->innersortkeys) ? 1 : 0),
+ ((mp->materialize_inner) ? 1 : 0));
+ }
+
+ print_path(root, jp->outerjoinpath, indent + 1);
+ print_path(root, jp->innerjoinpath, indent + 1);
+ }
+
+ if (subpath)
+ print_path(root, subpath, indent + 1);
+}
+
+void
+debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
+{
+ ListCell *l;
+
+ printf("RELOPTINFO (");
+ print_relids(root, rel->relids);
+ printf("): rows=%.0f width=%d\n", rel->rows, rel->reltarget->width);
+
+ if (rel->baserestrictinfo)
+ {
+ printf("\tbaserestrictinfo: ");
+ print_restrictclauses(root, rel->baserestrictinfo);
+ printf("\n");
+ }
+
+ if (rel->joininfo)
+ {
+ printf("\tjoininfo: ");
+ print_restrictclauses(root, rel->joininfo);
+ printf("\n");
+ }
+
+ printf("\tpath list:\n");
+ foreach(l, rel->pathlist)
+ print_path(root, lfirst(l), 1);
+ if (rel->cheapest_parameterized_paths)
+ {
+ printf("\n\tcheapest parameterized paths:\n");
+ foreach(l, rel->cheapest_parameterized_paths)
+ print_path(root, lfirst(l), 1);
+ }
+ if (rel->cheapest_startup_path)
+ {
+ printf("\n\tcheapest startup path:\n");
+ print_path(root, rel->cheapest_startup_path, 1);
+ }
+ if (rel->cheapest_total_path)
+ {
+ printf("\n\tcheapest total path:\n");
+ print_path(root, rel->cheapest_total_path, 1);
+ }
+ printf("\n");
+ fflush(stdout);
+}
+
+#endif /* OPTIMIZER_DEBUG */
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
new file mode 100644
index 0000000..06f8363
--- /dev/null
+++ b/src/backend/optimizer/path/clausesel.c
@@ -0,0 +1,1000 @@
+/*-------------------------------------------------------------------------
+ *
+ * clausesel.c
+ * Routines to compute clause selectivities
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/path/clausesel.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/plancat.h"
+#include "statistics/statistics.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
+
+/*
+ * Data structure for accumulating info about possible range-query
+ * clause pairs in clauselist_selectivity.
+ */
+typedef struct RangeQueryClause
+{
+ struct RangeQueryClause *next; /* next in linked list */
+ Node *var; /* The common variable of the clauses */
+ bool have_lobound; /* found a low-bound clause yet? */
+ bool have_hibound; /* found a high-bound clause yet? */
+ Selectivity lobound; /* Selectivity of a var > something clause */
+ Selectivity hibound; /* Selectivity of a var < something clause */
+} RangeQueryClause;
+
+static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
+ bool varonleft, bool isLTsel, Selectivity s2);
+static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
+ List *clauses);
+static Selectivity clauselist_selectivity_or(PlannerInfo *root,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ bool use_extended_stats);
+
+/****************************************************************************
+ * ROUTINES TO COMPUTE SELECTIVITIES
+ ****************************************************************************/
+
+/*
+ * clauselist_selectivity -
+ * Compute the selectivity of an implicitly-ANDed list of boolean
+ * expression clauses. The list can be empty, in which case 1.0
+ * must be returned. List elements may be either RestrictInfos
+ * or bare expression clauses --- the former is preferred since
+ * it allows caching of results.
+ *
+ * See clause_selectivity() for the meaning of the additional parameters.
+ *
+ * The basic approach is to apply extended statistics first, on as many
+ * clauses as possible, in order to capture cross-column dependencies etc.
+ * The remaining clauses are then estimated by taking the product of their
+ * selectivities, but that's only right if they have independent
+ * probabilities, and in reality they are often NOT independent even if they
+ * only refer to a single column. So, we want to be smarter where we can.
+ *
+ * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
+ * are recognized as possible range query components if they are restriction
+ * opclauses whose operators have scalarltsel or a related function as their
+ * restriction selectivity estimator. We pair up clauses of this form that
+ * refer to the same variable. An unpairable clause of this kind is simply
+ * multiplied into the selectivity product in the normal way. But when we
+ * find a pair, we know that the selectivities represent the relative
+ * positions of the low and high bounds within the column's range, so instead
+ * of figuring the selectivity as hisel * losel, we can figure it as hisel +
+ * losel - 1. (To visualize this, see that hisel is the fraction of the range
+ * below the high bound, while losel is the fraction above the low bound; so
+ * hisel can be interpreted directly as a 0..1 value but we need to convert
+ * losel to 1-losel before interpreting it as a value. Then the available
+ * range is 1-losel to hisel. However, this calculation double-excludes
+ * nulls, so really we need hisel + losel + null_frac - 1.)
+ *
+ * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
+ * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
+ * yields an impossible (negative) result.
+ *
+ * A free side-effect is that we can recognize redundant inequalities such
+ * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
+ *
+ * Of course this is all very dependent on the behavior of the inequality
+ * selectivity functions; perhaps some day we can generalize the approach.
+ */
+Selectivity
+clauselist_selectivity(PlannerInfo *root,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo)
+{
+ return clauselist_selectivity_ext(root, clauses, varRelid,
+ jointype, sjinfo, true);
+}
+
+/*
+ * clauselist_selectivity_ext -
+ * Extended version of clauselist_selectivity(). If "use_extended_stats"
+ * is false, all extended statistics will be ignored, and only per-column
+ * statistics will be used.
+ */
+Selectivity
+clauselist_selectivity_ext(PlannerInfo *root,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ bool use_extended_stats)
+{
+ Selectivity s1 = 1.0;
+ RelOptInfo *rel;
+ Bitmapset *estimatedclauses = NULL;
+ RangeQueryClause *rqlist = NULL;
+ ListCell *l;
+ int listidx;
+
+ /*
+ * If there's exactly one clause, just go directly to
+ * clause_selectivity_ext(). None of what we might do below is relevant.
+ */
+ if (list_length(clauses) == 1)
+ return clause_selectivity_ext(root, (Node *) linitial(clauses),
+ varRelid, jointype, sjinfo,
+ use_extended_stats);
+
+ /*
+ * Determine if these clauses reference a single relation. If so, and if
+ * it has extended statistics, try to apply those.
+ */
+ rel = find_single_rel_for_clauses(root, clauses);
+ if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+ {
+ /*
+ * Estimate as many clauses as possible using extended statistics.
+ *
+ * 'estimatedclauses' is populated with the 0-based list position
+ * index of clauses estimated here, and that should be ignored below.
+ */
+ s1 = statext_clauselist_selectivity(root, clauses, varRelid,
+ jointype, sjinfo, rel,
+ &estimatedclauses, false);
+ }
+
+ /*
+ * Apply normal selectivity estimates for remaining clauses. We'll be
+ * careful to skip any clauses which were already estimated above.
+ *
+ * Anything that doesn't look like a potential rangequery clause gets
+ * multiplied into s1 and forgotten. Anything that does gets inserted into
+ * an rqlist entry.
+ */
+ listidx = -1;
+ foreach(l, clauses)
+ {
+ Node *clause = (Node *) lfirst(l);
+ RestrictInfo *rinfo;
+ Selectivity s2;
+
+ listidx++;
+
+ /*
+ * Skip this clause if it's already been estimated by some other
+ * statistics above.
+ */
+ if (bms_is_member(listidx, estimatedclauses))
+ continue;
+
+ /* Compute the selectivity of this clause in isolation */
+ s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
+ use_extended_stats);
+
+ /*
+ * Check for being passed a RestrictInfo.
+ *
+ * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
+ * 0.0; just use that rather than looking for range pairs.
+ */
+ if (IsA(clause, RestrictInfo))
+ {
+ rinfo = (RestrictInfo *) clause;
+ if (rinfo->pseudoconstant)
+ {
+ s1 = s1 * s2;
+ continue;
+ }
+ clause = (Node *) rinfo->clause;
+ }
+ else
+ rinfo = NULL;
+
+ /*
+ * See if it looks like a restriction clause with a pseudoconstant on
+ * one side. (Anything more complicated than that might not behave in
+ * the simple way we are expecting.) Most of the tests here can be
+ * done more efficiently with rinfo than without.
+ */
+ if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
+ {
+ OpExpr *expr = (OpExpr *) clause;
+ bool varonleft = true;
+ bool ok;
+
+ if (rinfo)
+ {
+ ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
+ (is_pseudo_constant_clause_relids(lsecond(expr->args),
+ rinfo->right_relids) ||
+ (varonleft = false,
+ is_pseudo_constant_clause_relids(linitial(expr->args),
+ rinfo->left_relids)));
+ }
+ else
+ {
+ ok = (NumRelids(root, clause) == 1) &&
+ (is_pseudo_constant_clause(lsecond(expr->args)) ||
+ (varonleft = false,
+ is_pseudo_constant_clause(linitial(expr->args))));
+ }
+
+ if (ok)
+ {
+ /*
+ * If it's not a "<"/"<="/">"/">=" operator, just merge the
+ * selectivity in generically. But if it's the right oprrest,
+ * add the clause to rqlist for later processing.
+ */
+ switch (get_oprrest(expr->opno))
+ {
+ case F_SCALARLTSEL:
+ case F_SCALARLESEL:
+ addRangeClause(&rqlist, clause,
+ varonleft, true, s2);
+ break;
+ case F_SCALARGTSEL:
+ case F_SCALARGESEL:
+ addRangeClause(&rqlist, clause,
+ varonleft, false, s2);
+ break;
+ default:
+ /* Just merge the selectivity in generically */
+ s1 = s1 * s2;
+ break;
+ }
+ continue; /* drop to loop bottom */
+ }
+ }
+
+ /* Not the right form, so treat it generically. */
+ s1 = s1 * s2;
+ }
+
+ /*
+ * Now scan the rangequery pair list.
+ */
+ while (rqlist != NULL)
+ {
+ RangeQueryClause *rqnext;
+
+ if (rqlist->have_lobound && rqlist->have_hibound)
+ {
+ /* Successfully matched a pair of range clauses */
+ Selectivity s2;
+
+ /*
+ * Exact equality to the default value probably means the
+ * selectivity function punted. This is not airtight but should
+ * be good enough.
+ */
+ if (rqlist->hibound == DEFAULT_INEQ_SEL ||
+ rqlist->lobound == DEFAULT_INEQ_SEL)
+ {
+ s2 = DEFAULT_RANGE_INEQ_SEL;
+ }
+ else
+ {
+ s2 = rqlist->hibound + rqlist->lobound - 1.0;
+
+ /* Adjust for double-exclusion of NULLs */
+ s2 += nulltestsel(root, IS_NULL, rqlist->var,
+ varRelid, jointype, sjinfo);
+
+ /*
+ * A zero or slightly negative s2 should be converted into a
+ * small positive value; we probably are dealing with a very
+ * tight range and got a bogus result due to roundoff errors.
+ * However, if s2 is very negative, then we probably have
+ * default selectivity estimates on one or both sides of the
+ * range that we failed to recognize above for some reason.
+ */
+ if (s2 <= 0.0)
+ {
+ if (s2 < -0.01)
+ {
+ /*
+ * No data available --- use a default estimate that
+ * is small, but not real small.
+ */
+ s2 = DEFAULT_RANGE_INEQ_SEL;
+ }
+ else
+ {
+ /*
+ * It's just roundoff error; use a small positive
+ * value
+ */
+ s2 = 1.0e-10;
+ }
+ }
+ }
+ /* Merge in the selectivity of the pair of clauses */
+ s1 *= s2;
+ }
+ else
+ {
+ /* Only found one of a pair, merge it in generically */
+ if (rqlist->have_lobound)
+ s1 *= rqlist->lobound;
+ else
+ s1 *= rqlist->hibound;
+ }
+ /* release storage and advance */
+ rqnext = rqlist->next;
+ pfree(rqlist);
+ rqlist = rqnext;
+ }
+
+ return s1;
+}
+
+/*
+ * clauselist_selectivity_or -
+ * Compute the selectivity of an implicitly-ORed list of boolean
+ * expression clauses. The list can be empty, in which case 0.0
+ * must be returned. List elements may be either RestrictInfos
+ * or bare expression clauses --- the former is preferred since
+ * it allows caching of results.
+ *
+ * See clause_selectivity() for the meaning of the additional parameters.
+ *
+ * The basic approach is to apply extended statistics first, on as many
+ * clauses as possible, in order to capture cross-column dependencies etc.
+ * The remaining clauses are then estimated as if they were independent.
+ */
+static Selectivity
+clauselist_selectivity_or(PlannerInfo *root,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ bool use_extended_stats)
+{
+ Selectivity s1 = 0.0;
+ RelOptInfo *rel;
+ Bitmapset *estimatedclauses = NULL;
+ ListCell *lc;
+ int listidx;
+
+ /*
+ * Determine if these clauses reference a single relation. If so, and if
+ * it has extended statistics, try to apply those.
+ */
+ rel = find_single_rel_for_clauses(root, clauses);
+ if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+ {
+ /*
+ * Estimate as many clauses as possible using extended statistics.
+ *
+ * 'estimatedclauses' is populated with the 0-based list position
+ * index of clauses estimated here, and that should be ignored below.
+ */
+ s1 = statext_clauselist_selectivity(root, clauses, varRelid,
+ jointype, sjinfo, rel,
+ &estimatedclauses, true);
+ }
+
+ /*
+ * Estimate the remaining clauses as if they were independent.
+ *
+ * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
+ * for the probable overlap of selected tuple sets.
+ *
+ * XXX is this too conservative?
+ */
+ listidx = -1;
+ foreach(lc, clauses)
+ {
+ Selectivity s2;
+
+ listidx++;
+
+ /*
+ * Skip this clause if it's already been estimated by some other
+ * statistics above.
+ */
+ if (bms_is_member(listidx, estimatedclauses))
+ continue;
+
+ s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
+ jointype, sjinfo, use_extended_stats);
+
+ s1 = s1 + s2 - s1 * s2;
+ }
+
+ return s1;
+}
+
+/*
+ * addRangeClause --- add a new range clause for clauselist_selectivity
+ *
+ * Here is where we try to match up pairs of range-query clauses
+ */
+static void
+addRangeClause(RangeQueryClause **rqlist, Node *clause,
+ bool varonleft, bool isLTsel, Selectivity s2)
+{
+ RangeQueryClause *rqelem;
+ Node *var;
+ bool is_lobound;
+
+ if (varonleft)
+ {
+ var = get_leftop((Expr *) clause);
+ is_lobound = !isLTsel; /* x < something is high bound */
+ }
+ else
+ {
+ var = get_rightop((Expr *) clause);
+ is_lobound = isLTsel; /* something < x is low bound */
+ }
+
+ for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
+ {
+ /*
+ * We use full equal() here because the "var" might be a function of
+ * one or more attributes of the same relation...
+ */
+ if (!equal(var, rqelem->var))
+ continue;
+ /* Found the right group to put this clause in */
+ if (is_lobound)
+ {
+ if (!rqelem->have_lobound)
+ {
+ rqelem->have_lobound = true;
+ rqelem->lobound = s2;
+ }
+ else
+ {
+
+ /*------
+ * We have found two similar clauses, such as
+ * x < y AND x <= z.
+ * Keep only the more restrictive one.
+ *------
+ */
+ if (rqelem->lobound > s2)
+ rqelem->lobound = s2;
+ }
+ }
+ else
+ {
+ if (!rqelem->have_hibound)
+ {
+ rqelem->have_hibound = true;
+ rqelem->hibound = s2;
+ }
+ else
+ {
+
+ /*------
+ * We have found two similar clauses, such as
+ * x > y AND x >= z.
+ * Keep only the more restrictive one.
+ *------
+ */
+ if (rqelem->hibound > s2)
+ rqelem->hibound = s2;
+ }
+ }
+ return;
+ }
+
+ /* No matching var found, so make a new clause-pair data structure */
+ rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
+ rqelem->var = var;
+ if (is_lobound)
+ {
+ rqelem->have_lobound = true;
+ rqelem->have_hibound = false;
+ rqelem->lobound = s2;
+ }
+ else
+ {
+ rqelem->have_lobound = false;
+ rqelem->have_hibound = true;
+ rqelem->hibound = s2;
+ }
+ rqelem->next = *rqlist;
+ *rqlist = rqelem;
+}
+
+/*
+ * find_single_rel_for_clauses
+ * Examine each clause in 'clauses' and determine if all clauses
+ * reference only a single relation. If so return that relation,
+ * otherwise return NULL.
+ */
+static RelOptInfo *
+find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
+{
+ int lastrelid = 0;
+ ListCell *l;
+
+ foreach(l, clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+ int relid;
+
+ /*
+ * If we have a list of bare clauses rather than RestrictInfos, we
+ * could pull out their relids the hard way with pull_varnos().
+ * However, currently the extended-stats machinery won't do anything
+ * with non-RestrictInfo clauses anyway, so there's no point in
+ * spending extra cycles; just fail if that's what we have.
+ *
+ * An exception to that rule is if we have a bare BoolExpr AND clause.
+ * We treat this as a special case because the restrictinfo machinery
+ * doesn't build RestrictInfos on top of AND clauses.
+ */
+ if (is_andclause(rinfo))
+ {
+ RelOptInfo *rel;
+
+ rel = find_single_rel_for_clauses(root,
+ ((BoolExpr *) rinfo)->args);
+
+ if (rel == NULL)
+ return NULL;
+ if (lastrelid == 0)
+ lastrelid = rel->relid;
+ else if (rel->relid != lastrelid)
+ return NULL;
+
+ continue;
+ }
+
+ if (!IsA(rinfo, RestrictInfo))
+ return NULL;
+
+ if (bms_is_empty(rinfo->clause_relids))
+ continue; /* we can ignore variable-free clauses */
+ if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
+ return NULL; /* multiple relations in this clause */
+ if (lastrelid == 0)
+ lastrelid = relid; /* first clause referencing a relation */
+ else if (relid != lastrelid)
+ return NULL; /* relation not same as last one */
+ }
+
+ if (lastrelid != 0)
+ return find_base_rel(root, lastrelid);
+
+ return NULL; /* no clauses */
+}
+
+/*
+ * bms_is_subset_singleton
+ *
+ * Same result as bms_is_subset(s, bms_make_singleton(x)),
+ * but a little faster and doesn't leak memory.
+ *
+ * Is this of use anywhere else? If so move to bitmapset.c ...
+ */
+static bool
+bms_is_subset_singleton(const Bitmapset *s, int x)
+{
+ switch (bms_membership(s))
+ {
+ case BMS_EMPTY_SET:
+ return true;
+ case BMS_SINGLETON:
+ return bms_is_member(x, s);
+ case BMS_MULTIPLE:
+ return false;
+ }
+ /* can't get here... */
+ return false;
+}
+
+/*
+ * treat_as_join_clause -
+ * Decide whether an operator clause is to be handled by the
+ * restriction or join estimator. Subroutine for clause_selectivity().
+ */
+static inline bool
+treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
+ int varRelid, SpecialJoinInfo *sjinfo)
+{
+ if (varRelid != 0)
+ {
+ /*
+ * Caller is forcing restriction mode (eg, because we are examining an
+ * inner indexscan qual).
+ */
+ return false;
+ }
+ else if (sjinfo == NULL)
+ {
+ /*
+ * It must be a restriction clause, since it's being evaluated at a
+ * scan node.
+ */
+ return false;
+ }
+ else
+ {
+ /*
+ * Otherwise, it's a join if there's more than one relation used. We
+ * can optimize this calculation if an rinfo was passed.
+ *
+ * XXX Since we know the clause is being evaluated at a join, the
+ * only way it could be single-relation is if it was delayed by outer
+ * joins. Although we can make use of the restriction qual estimators
+ * anyway, it seems likely that we ought to account for the
+ * probability of injected nulls somehow.
+ */
+ if (rinfo)
+ return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
+ else
+ return (NumRelids(root, clause) > 1);
+ }
+}
+
+
+/*
+ * clause_selectivity -
+ * Compute the selectivity of a general boolean expression clause.
+ *
+ * The clause can be either a RestrictInfo or a plain expression. If it's
+ * a RestrictInfo, we try to cache the selectivity for possible re-use,
+ * so passing RestrictInfos is preferred.
+ *
+ * varRelid is either 0 or a rangetable index.
+ *
+ * When varRelid is not 0, only variables belonging to that relation are
+ * considered in computing selectivity; other vars are treated as constants
+ * of unknown values. This is appropriate for estimating the selectivity of
+ * a join clause that is being used as a restriction clause in a scan of a
+ * nestloop join's inner relation --- varRelid should then be the ID of the
+ * inner relation.
+ *
+ * When varRelid is 0, all variables are treated as variables. This
+ * is appropriate for ordinary join clauses and restriction clauses.
+ *
+ * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
+ * if the clause isn't a join clause.
+ *
+ * sjinfo is NULL for a non-join clause, otherwise it provides additional
+ * context information about the join being performed. There are some
+ * special cases:
+ * 1. For a special (not INNER) join, sjinfo is always a member of
+ * root->join_info_list.
+ * 2. For an INNER join, sjinfo is just a transient struct, and only the
+ * relids and jointype fields in it can be trusted.
+ * It is possible for jointype to be different from sjinfo->jointype.
+ * This indicates we are considering a variant join: either with
+ * the LHS and RHS switched, or with one input unique-ified.
+ *
+ * Note: when passing nonzero varRelid, it's normally appropriate to set
+ * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
+ * join clause; because we aren't treating it as a join clause.
+ */
+Selectivity
+clause_selectivity(PlannerInfo *root,
+ Node *clause,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo)
+{
+ return clause_selectivity_ext(root, clause, varRelid,
+ jointype, sjinfo, true);
+}
+
+/*
+ * clause_selectivity_ext -
+ * Extended version of clause_selectivity(). If "use_extended_stats" is
+ * false, all extended statistics will be ignored, and only per-column
+ * statistics will be used.
+ */
+Selectivity
+clause_selectivity_ext(PlannerInfo *root,
+ Node *clause,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ bool use_extended_stats)
+{
+ Selectivity s1 = 0.5; /* default for any unhandled clause type */
+ RestrictInfo *rinfo = NULL;
+ bool cacheable = false;
+
+ if (clause == NULL) /* can this still happen? */
+ return s1;
+
+ if (IsA(clause, RestrictInfo))
+ {
+ rinfo = (RestrictInfo *) clause;
+
+ /*
+ * If the clause is marked pseudoconstant, then it will be used as a
+ * gating qual and should not affect selectivity estimates; hence
+ * return 1.0. The only exception is that a constant FALSE may be
+ * taken as having selectivity 0.0, since it will surely mean no rows
+ * out of the plan. This case is simple enough that we need not
+ * bother caching the result.
+ */
+ if (rinfo->pseudoconstant)
+ {
+ if (!IsA(rinfo->clause, Const))
+ return (Selectivity) 1.0;
+ }
+
+ /*
+ * If the clause is marked redundant, always return 1.0.
+ */
+ if (rinfo->norm_selec > 1)
+ return (Selectivity) 1.0;
+
+ /*
+ * If possible, cache the result of the selectivity calculation for
+ * the clause. We can cache if varRelid is zero or the clause
+ * contains only vars of that relid --- otherwise varRelid will affect
+ * the result, so mustn't cache. Outer join quals might be examined
+ * with either their join's actual jointype or JOIN_INNER, so we need
+ * two cache variables to remember both cases. Note: we assume the
+ * result won't change if we are switching the input relations or
+ * considering a unique-ified case, so we only need one cache variable
+ * for all non-JOIN_INNER cases.
+ */
+ if (varRelid == 0 ||
+ bms_is_subset_singleton(rinfo->clause_relids, varRelid))
+ {
+ /* Cacheable --- do we already have the result? */
+ if (jointype == JOIN_INNER)
+ {
+ if (rinfo->norm_selec >= 0)
+ return rinfo->norm_selec;
+ }
+ else
+ {
+ if (rinfo->outer_selec >= 0)
+ return rinfo->outer_selec;
+ }
+ cacheable = true;
+ }
+
+ /*
+ * Proceed with examination of contained clause. If the clause is an
+ * OR-clause, we want to look at the variant with sub-RestrictInfos,
+ * so that per-subclause selectivities can be cached.
+ */
+ if (rinfo->orclause)
+ clause = (Node *) rinfo->orclause;
+ else
+ clause = (Node *) rinfo->clause;
+ }
+
+ if (IsA(clause, Var))
+ {
+ Var *var = (Var *) clause;
+
+ /*
+ * We probably shouldn't ever see an uplevel Var here, but if we do,
+ * return the default selectivity...
+ */
+ if (var->varlevelsup == 0 &&
+ (varRelid == 0 || varRelid == (int) var->varno))
+ {
+ /* Use the restriction selectivity function for a bool Var */
+ s1 = boolvarsel(root, (Node *) var, varRelid);
+ }
+ }
+ else if (IsA(clause, Const))
+ {
+ /* bool constant is pretty easy... */
+ Const *con = (Const *) clause;
+
+ s1 = con->constisnull ? 0.0 :
+ DatumGetBool(con->constvalue) ? 1.0 : 0.0;
+ }
+ else if (IsA(clause, Param))
+ {
+ /* see if we can replace the Param */
+ Node *subst = estimate_expression_value(root, clause);
+
+ if (IsA(subst, Const))
+ {
+ /* bool constant is pretty easy... */
+ Const *con = (Const *) subst;
+
+ s1 = con->constisnull ? 0.0 :
+ DatumGetBool(con->constvalue) ? 1.0 : 0.0;
+ }
+ else
+ {
+ /* XXX any way to do better than default? */
+ }
+ }
+ else if (is_notclause(clause))
+ {
+ /* inverse of the selectivity of the underlying clause */
+ s1 = 1.0 - clause_selectivity_ext(root,
+ (Node *) get_notclausearg((Expr *) clause),
+ varRelid,
+ jointype,
+ sjinfo,
+ use_extended_stats);
+ }
+ else if (is_andclause(clause))
+ {
+ /* share code with clauselist_selectivity() */
+ s1 = clauselist_selectivity_ext(root,
+ ((BoolExpr *) clause)->args,
+ varRelid,
+ jointype,
+ sjinfo,
+ use_extended_stats);
+ }
+ else if (is_orclause(clause))
+ {
+ /*
+ * Almost the same thing as clauselist_selectivity, but with the
+ * clauses connected by OR.
+ */
+ s1 = clauselist_selectivity_or(root,
+ ((BoolExpr *) clause)->args,
+ varRelid,
+ jointype,
+ sjinfo,
+ use_extended_stats);
+ }
+ else if (is_opclause(clause) || IsA(clause, DistinctExpr))
+ {
+ OpExpr *opclause = (OpExpr *) clause;
+ Oid opno = opclause->opno;
+
+ if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
+ {
+ /* Estimate selectivity for a join clause. */
+ s1 = join_selectivity(root, opno,
+ opclause->args,
+ opclause->inputcollid,
+ jointype,
+ sjinfo);
+ }
+ else
+ {
+ /* Estimate selectivity for a restriction clause. */
+ s1 = restriction_selectivity(root, opno,
+ opclause->args,
+ opclause->inputcollid,
+ varRelid);
+ }
+
+ /*
+ * DistinctExpr has the same representation as OpExpr, but the
+ * contained operator is "=" not "<>", so we must negate the result.
+ * This estimation method doesn't give the right behavior for nulls,
+ * but it's better than doing nothing.
+ */
+ if (IsA(clause, DistinctExpr))
+ s1 = 1.0 - s1;
+ }
+ else if (is_funcclause(clause))
+ {
+ FuncExpr *funcclause = (FuncExpr *) clause;
+
+ /* Try to get an estimate from the support function, if any */
+ s1 = function_selectivity(root,
+ funcclause->funcid,
+ funcclause->args,
+ funcclause->inputcollid,
+ treat_as_join_clause(root, clause, rinfo,
+ varRelid, sjinfo),
+ varRelid,
+ jointype,
+ sjinfo);
+ }
+ else if (IsA(clause, ScalarArrayOpExpr))
+ {
+ /* Use node specific selectivity calculation function */
+ s1 = scalararraysel(root,
+ (ScalarArrayOpExpr *) clause,
+ treat_as_join_clause(root, clause, rinfo,
+ varRelid, sjinfo),
+ varRelid,
+ jointype,
+ sjinfo);
+ }
+ else if (IsA(clause, RowCompareExpr))
+ {
+ /* Use node specific selectivity calculation function */
+ s1 = rowcomparesel(root,
+ (RowCompareExpr *) clause,
+ varRelid,
+ jointype,
+ sjinfo);
+ }
+ else if (IsA(clause, NullTest))
+ {
+ /* Use node specific selectivity calculation function */
+ s1 = nulltestsel(root,
+ ((NullTest *) clause)->nulltesttype,
+ (Node *) ((NullTest *) clause)->arg,
+ varRelid,
+ jointype,
+ sjinfo);
+ }
+ else if (IsA(clause, BooleanTest))
+ {
+ /* Use node specific selectivity calculation function */
+ s1 = booltestsel(root,
+ ((BooleanTest *) clause)->booltesttype,
+ (Node *) ((BooleanTest *) clause)->arg,
+ varRelid,
+ jointype,
+ sjinfo);
+ }
+ else if (IsA(clause, CurrentOfExpr))
+ {
+ /* CURRENT OF selects at most one row of its table */
+ CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
+ RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
+
+ if (crel->tuples > 0)
+ s1 = 1.0 / crel->tuples;
+ }
+ else if (IsA(clause, RelabelType))
+ {
+ /* Not sure this case is needed, but it can't hurt */
+ s1 = clause_selectivity_ext(root,
+ (Node *) ((RelabelType *) clause)->arg,
+ varRelid,
+ jointype,
+ sjinfo,
+ use_extended_stats);
+ }
+ else if (IsA(clause, CoerceToDomain))
+ {
+ /* Not sure this case is needed, but it can't hurt */
+ s1 = clause_selectivity_ext(root,
+ (Node *) ((CoerceToDomain *) clause)->arg,
+ varRelid,
+ jointype,
+ sjinfo,
+ use_extended_stats);
+ }
+ else
+ {
+ /*
+ * For anything else, see if we can consider it as a boolean variable.
+ * This only works if it's an immutable expression in Vars of a single
+ * relation; but there's no point in us checking that here because
+ * boolvarsel() will do it internally, and return a suitable default
+ * selectivity if not.
+ */
+ s1 = boolvarsel(root, clause, varRelid);
+ }
+
+ /* Cache the result if possible */
+ if (cacheable)
+ {
+ if (jointype == JOIN_INNER)
+ rinfo->norm_selec = s1;
+ else
+ rinfo->outer_selec = s1;
+ }
+
+#ifdef SELECTIVITY_DEBUG
+ elog(DEBUG4, "clause_selectivity: s1 %f", s1);
+#endif /* SELECTIVITY_DEBUG */
+
+ return s1;
+}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
new file mode 100644
index 0000000..0ba26b2
--- /dev/null
+++ b/src/backend/optimizer/path/costsize.c
@@ -0,0 +1,6221 @@
+/*-------------------------------------------------------------------------
+ *
+ * costsize.c
+ * Routines to compute (and set) relation sizes and path costs
+ *
+ * Path costs are measured in arbitrary units established by these basic
+ * parameters:
+ *
+ * seq_page_cost Cost of a sequential page fetch
+ * random_page_cost Cost of a non-sequential page fetch
+ * cpu_tuple_cost Cost of typical CPU time to process a tuple
+ * cpu_index_tuple_cost Cost of typical CPU time to process an index tuple
+ * cpu_operator_cost Cost of CPU time to execute an operator or function
+ * parallel_tuple_cost Cost of CPU time to pass a tuple from worker to leader backend
+ * parallel_setup_cost Cost of setting up shared memory for parallelism
+ *
+ * We expect that the kernel will typically do some amount of read-ahead
+ * optimization; this in conjunction with seek costs means that seq_page_cost
+ * is normally considerably less than random_page_cost. (However, if the
+ * database is fully cached in RAM, it is reasonable to set them equal.)
+ *
+ * We also use a rough estimate "effective_cache_size" of the number of
+ * disk pages in Postgres + OS-level disk cache. (We can't simply use
+ * NBuffers for this purpose because that would ignore the effects of
+ * the kernel's disk cache.)
+ *
+ * Obviously, taking constants for these values is an oversimplification,
+ * but it's tough enough to get any useful estimates even at this level of
+ * detail. Note that all of these parameters are user-settable, in case
+ * the default values are drastically off for a particular platform.
+ *
+ * seq_page_cost and random_page_cost can also be overridden for an individual
+ * tablespace, in case some data is on a fast disk and other data is on a slow
+ * disk. Per-tablespace overrides never apply to temporary work files such as
+ * an external sort or a materialize node that overflows work_mem.
+ *
+ * We compute two separate costs for each path:
+ * total_cost: total estimated cost to fetch all tuples
+ * startup_cost: cost that is expended before first tuple is fetched
+ * In some scenarios, such as when there is a LIMIT or we are implementing
+ * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the
+ * path's result. A caller can estimate the cost of fetching a partial
+ * result by interpolating between startup_cost and total_cost. In detail:
+ * actual_cost = startup_cost +
+ * (total_cost - startup_cost) * tuples_to_fetch / path->rows;
+ * Note that a base relation's rows count (and, by extension, plan_rows for
+ * plan nodes below the LIMIT node) are set without regard to any LIMIT, so
+ * that this equation works properly. (Note: while path->rows is never zero
+ * for ordinary relations, it is zero for paths for provably-empty relations,
+ * so beware of division-by-zero.) The LIMIT is applied as a top-level
+ * plan node.
+ *
+ * For largely historical reasons, most of the routines in this module use
+ * the passed result Path only to store their results (rows, startup_cost and
+ * total_cost) into. All the input data they need is passed as separate
+ * parameters, even though much of it could be extracted from the Path.
+ * An exception is made for the cost_XXXjoin() routines, which expect all
+ * the other fields of the passed XXXPath to be filled in, and similarly
+ * cost_index() assumes the passed IndexPath is valid except for its output
+ * values.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/path/costsize.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+#include <math.h>
+
+#include "access/amapi.h"
+#include "access/htup_details.h"
+#include "access/tsmapi.h"
+#include "executor/executor.h"
+#include "executor/nodeAgg.h"
+#include "executor/nodeHash.h"
+#include "executor/nodeMemoize.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/placeholder.h"
+#include "optimizer/plancat.h"
+#include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
+#include "parser/parsetree.h"
+#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
+#include "utils/spccache.h"
+#include "utils/tuplesort.h"
+
+
+#define LOG2(x) (log(x) / 0.693147180559945)
+
+/*
+ * Append and MergeAppend nodes are less expensive than some other operations
+ * which use cpu_tuple_cost; instead of adding a separate GUC, estimate the
+ * per-tuple cost as cpu_tuple_cost multiplied by this value.
+ */
+#define APPEND_CPU_COST_MULTIPLIER 0.5
+
+/*
+ * Maximum value for row estimates. We cap row estimates to this to help
+ * ensure that costs based on these estimates remain within the range of what
+ * double can represent. add_path() wouldn't act sanely given infinite or NaN
+ * cost values.
+ */
+#define MAXIMUM_ROWCOUNT 1e100
+
+double seq_page_cost = DEFAULT_SEQ_PAGE_COST;
+double random_page_cost = DEFAULT_RANDOM_PAGE_COST;
+double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
+double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
+double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
+double parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
+double parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
+double recursive_worktable_factor = DEFAULT_RECURSIVE_WORKTABLE_FACTOR;
+
+int effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
+
+Cost disable_cost = 1.0e10;
+
+int max_parallel_workers_per_gather = 2;
+
+bool enable_seqscan = true;
+bool enable_indexscan = true;
+bool enable_indexonlyscan = true;
+bool enable_bitmapscan = true;
+bool enable_tidscan = true;
+bool enable_sort = true;
+bool enable_incremental_sort = true;
+bool enable_hashagg = true;
+bool enable_nestloop = true;
+bool enable_material = true;
+bool enable_memoize = true;
+bool enable_mergejoin = true;
+bool enable_hashjoin = true;
+bool enable_gathermerge = true;
+bool enable_partitionwise_join = false;
+bool enable_partitionwise_aggregate = false;
+bool enable_parallel_append = true;
+bool enable_parallel_hash = true;
+bool enable_partition_pruning = true;
+bool enable_async_append = true;
+
+typedef struct
+{
+ PlannerInfo *root;
+ QualCost total;
+} cost_qual_eval_context;
+
+static List *extract_nonindex_conditions(List *qual_clauses, List *indexclauses);
+static MergeScanSelCache *cached_scansel(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ PathKey *pathkey);
+static void cost_rescan(PlannerInfo *root, Path *path,
+ Cost *rescan_startup_cost, Cost *rescan_total_cost);
+static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
+static void get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel,
+ ParamPathInfo *param_info,
+ QualCost *qpqual_cost);
+static bool has_indexed_join_quals(NestPath *path);
+static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
+ List *quals);
+static double calc_joinrel_size_estimate(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel,
+ double outer_rows,
+ double inner_rows,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist);
+static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root,
+ Relids outer_relids,
+ Relids inner_relids,
+ SpecialJoinInfo *sjinfo,
+ List **restrictlist);
+static Cost append_nonpartial_cost(List *subpaths, int numpaths,
+ int parallel_workers);
+static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
+static double relation_byte_size(double tuples, int width);
+static double page_size(double tuples, int width);
+static double get_parallel_divisor(Path *path);
+
+
+/*
+ * clamp_row_est
+ * Force a row-count estimate to a sane value.
+ */
+double
+clamp_row_est(double nrows)
+{
+ /*
+ * Avoid infinite and NaN row estimates. Costs derived from such values
+ * are going to be useless. Also force the estimate to be at least one
+ * row, to make explain output look better and to avoid possible
+ * divide-by-zero when interpolating costs. Make it an integer, too.
+ */
+ if (nrows > MAXIMUM_ROWCOUNT || isnan(nrows))
+ nrows = MAXIMUM_ROWCOUNT;
+ else if (nrows <= 1.0)
+ nrows = 1.0;
+ else
+ nrows = rint(nrows);
+
+ return nrows;
+}
+
+/*
+ * clamp_cardinality_to_long
+ * Cast a Cardinality value to a sane long value.
+ */
+long
+clamp_cardinality_to_long(Cardinality x)
+{
+ /*
+ * Just for paranoia's sake, ensure we do something sane with negative or
+ * NaN values.
+ */
+ if (isnan(x))
+ return LONG_MAX;
+ if (x <= 0)
+ return 0;
+
+ /*
+ * If "long" is 64 bits, then LONG_MAX cannot be represented exactly as a
+ * double. Casting it to double and back may well result in overflow due
+ * to rounding, so avoid doing that. We trust that any double value that
+ * compares strictly less than "(double) LONG_MAX" will cast to a
+ * representable "long" value.
+ */
+ return (x < (double) LONG_MAX) ? (long) x : LONG_MAX;
+}
+
+
+/*
+ * cost_seqscan
+ * Determines and returns the cost of scanning a relation sequentially.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_seqscan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
+{
+ Cost startup_cost = 0;
+ Cost cpu_run_cost;
+ Cost disk_run_cost;
+ double spc_seq_page_cost;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+
+ /* Should only be applied to base relations */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RELATION);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ if (!enable_seqscan)
+ startup_cost += disable_cost;
+
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ NULL,
+ &spc_seq_page_cost);
+
+ /*
+ * disk costs
+ */
+ disk_run_cost = spc_seq_page_cost * baserel->pages;
+
+ /* CPU costs */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ cpu_run_cost = cpu_per_tuple * baserel->tuples;
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->pathtarget->cost.startup;
+ cpu_run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ /* Adjust costing for parallelism, if used. */
+ if (path->parallel_workers > 0)
+ {
+ double parallel_divisor = get_parallel_divisor(path);
+
+ /* The CPU cost is divided among all the workers. */
+ cpu_run_cost /= parallel_divisor;
+
+ /*
+ * It may be possible to amortize some of the I/O cost, but probably
+ * not very much, because most operating systems already do aggressive
+ * prefetching. For now, we assume that the disk run cost can't be
+ * amortized at all.
+ */
+
+ /*
+ * In the case of a parallel plan, the row count needs to represent
+ * the number of tuples processed per worker.
+ */
+ path->rows = clamp_row_est(path->rows / parallel_divisor);
+ }
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + cpu_run_cost + disk_run_cost;
+}
+
+/*
+ * cost_samplescan
+ * Determines and returns the cost of scanning a relation using sampling.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_samplescan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ RangeTblEntry *rte;
+ TableSampleClause *tsc;
+ TsmRoutine *tsm;
+ double spc_seq_page_cost,
+ spc_random_page_cost,
+ spc_page_cost;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+
+ /* Should only be applied to base relations with tablesample clauses */
+ Assert(baserel->relid > 0);
+ rte = planner_rt_fetch(baserel->relid, root);
+ Assert(rte->rtekind == RTE_RELATION);
+ tsc = rte->tablesample;
+ Assert(tsc != NULL);
+ tsm = GetTsmRoutine(tsc->tsmhandler);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+ /* if NextSampleBlock is used, assume random access, else sequential */
+ spc_page_cost = (tsm->NextSampleBlock != NULL) ?
+ spc_random_page_cost : spc_seq_page_cost;
+
+ /*
+ * disk costs (recall that baserel->pages has already been set to the
+ * number of pages the sampling method will visit)
+ */
+ run_cost += spc_page_cost * baserel->pages;
+
+ /*
+ * CPU costs (recall that baserel->tuples has already been set to the
+ * number of tuples the sampling method will select). Note that we ignore
+ * execution cost of the TABLESAMPLE parameter expressions; they will be
+ * evaluated only once per scan, and in most usages they'll likely be
+ * simple constants anyway. We also don't charge anything for the
+ * calculations the sampling method might do internally.
+ */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost += cpu_per_tuple * baserel->tuples;
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->pathtarget->cost.startup;
+ run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_gather
+ * Determines and returns the cost of gather path.
+ *
+ * 'rel' is the relation to be operated upon
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ * 'rows' may be used to point to a row estimate; if non-NULL, it overrides
+ * both 'rel' and 'param_info'. This is useful when the path doesn't exactly
+ * correspond to any particular RelOptInfo.
+ */
+void
+cost_gather(GatherPath *path, PlannerInfo *root,
+ RelOptInfo *rel, ParamPathInfo *param_info,
+ double *rows)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+
+ /* Mark the path with the correct row estimate */
+ if (rows)
+ path->path.rows = *rows;
+ else if (param_info)
+ path->path.rows = param_info->ppi_rows;
+ else
+ path->path.rows = rel->rows;
+
+ startup_cost = path->subpath->startup_cost;
+
+ run_cost = path->subpath->total_cost - path->subpath->startup_cost;
+
+ /* Parallel setup and communication cost. */
+ startup_cost += parallel_setup_cost;
+ run_cost += parallel_tuple_cost * path->path.rows;
+
+ path->path.startup_cost = startup_cost;
+ path->path.total_cost = (startup_cost + run_cost);
+}
+
+/*
+ * cost_gather_merge
+ * Determines and returns the cost of gather merge path.
+ *
+ * GatherMerge merges several pre-sorted input streams, using a heap that at
+ * any given instant holds the next tuple from each stream. If there are N
+ * streams, we need about N*log2(N) tuple comparisons to construct the heap at
+ * startup, and then for each output tuple, about log2(N) comparisons to
+ * replace the top heap entry with the next tuple from the same stream.
+ */
+void
+cost_gather_merge(GatherMergePath *path, PlannerInfo *root,
+ RelOptInfo *rel, ParamPathInfo *param_info,
+ Cost input_startup_cost, Cost input_total_cost,
+ double *rows)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ Cost comparison_cost;
+ double N;
+ double logN;
+
+ /* Mark the path with the correct row estimate */
+ if (rows)
+ path->path.rows = *rows;
+ else if (param_info)
+ path->path.rows = param_info->ppi_rows;
+ else
+ path->path.rows = rel->rows;
+
+ if (!enable_gathermerge)
+ startup_cost += disable_cost;
+
+ /*
+ * Add one to the number of workers to account for the leader. This might
+ * be overgenerous since the leader will do less work than other workers
+ * in typical cases, but we'll go with it for now.
+ */
+ Assert(path->num_workers > 0);
+ N = (double) path->num_workers + 1;
+ logN = LOG2(N);
+
+ /* Assumed cost per tuple comparison */
+ comparison_cost = 2.0 * cpu_operator_cost;
+
+ /* Heap creation cost */
+ startup_cost += comparison_cost * N * logN;
+
+ /* Per-tuple heap maintenance cost */
+ run_cost += path->path.rows * comparison_cost * logN;
+
+ /* small cost for heap management, like cost_merge_append */
+ run_cost += cpu_operator_cost * path->path.rows;
+
+ /*
+ * Parallel setup and communication cost. Since Gather Merge, unlike
+ * Gather, requires us to block until a tuple is available from every
+ * worker, we bump the IPC cost up a little bit as compared with Gather.
+ * For lack of a better idea, charge an extra 5%.
+ */
+ startup_cost += parallel_setup_cost;
+ run_cost += parallel_tuple_cost * path->path.rows * 1.05;
+
+ path->path.startup_cost = startup_cost + input_startup_cost;
+ path->path.total_cost = (startup_cost + run_cost + input_total_cost);
+}
+
+/*
+ * cost_index
+ * Determines and returns the cost of scanning a relation using an index.
+ *
+ * 'path' describes the indexscan under consideration, and is complete
+ * except for the fields to be set by this routine
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ * estimates of caching behavior
+ *
+ * In addition to rows, startup_cost and total_cost, cost_index() sets the
+ * path's indextotalcost and indexselectivity fields. These values will be
+ * needed if the IndexPath is used in a BitmapIndexScan.
+ *
+ * NOTE: path->indexquals must contain only clauses usable as index
+ * restrictions. Any additional quals evaluated as qpquals may reduce the
+ * number of returned tuples, but they won't reduce the number of tuples
+ * we have to fetch from the table, so they don't reduce the scan cost.
+ */
+void
+cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
+ bool partial_path)
+{
+ IndexOptInfo *index = path->indexinfo;
+ RelOptInfo *baserel = index->rel;
+ bool indexonly = (path->path.pathtype == T_IndexOnlyScan);
+ amcostestimate_function amcostestimate;
+ List *qpquals;
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ Cost cpu_run_cost = 0;
+ Cost indexStartupCost;
+ Cost indexTotalCost;
+ Selectivity indexSelectivity;
+ double indexCorrelation,
+ csquared;
+ double spc_seq_page_cost,
+ spc_random_page_cost;
+ Cost min_IO_cost,
+ max_IO_cost;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+ double tuples_fetched;
+ double pages_fetched;
+ double rand_heap_pages;
+ double index_pages;
+
+ /* Should only be applied to base relations */
+ Assert(IsA(baserel, RelOptInfo) &&
+ IsA(index, IndexOptInfo));
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RELATION);
+
+ /*
+ * Mark the path with the correct row estimate, and identify which quals
+ * will need to be enforced as qpquals. We need not check any quals that
+ * are implied by the index's predicate, so we can use indrestrictinfo not
+ * baserestrictinfo as the list of relevant restriction clauses for the
+ * rel.
+ */
+ if (path->path.param_info)
+ {
+ path->path.rows = path->path.param_info->ppi_rows;
+ /* qpquals come from the rel's restriction clauses and ppi_clauses */
+ qpquals = list_concat(extract_nonindex_conditions(path->indexinfo->indrestrictinfo,
+ path->indexclauses),
+ extract_nonindex_conditions(path->path.param_info->ppi_clauses,
+ path->indexclauses));
+ }
+ else
+ {
+ path->path.rows = baserel->rows;
+ /* qpquals come from just the rel's restriction clauses */
+ qpquals = extract_nonindex_conditions(path->indexinfo->indrestrictinfo,
+ path->indexclauses);
+ }
+
+ if (!enable_indexscan)
+ startup_cost += disable_cost;
+ /* we don't need to check enable_indexonlyscan; indxpath.c does that */
+
+ /*
+ * Call index-access-method-specific code to estimate the processing cost
+ * for scanning the index, as well as the selectivity of the index (ie,
+ * the fraction of main-table tuples we will have to retrieve) and its
+ * correlation to the main-table tuple order. We need a cast here because
+ * pathnodes.h uses a weak function type to avoid including amapi.h.
+ */
+ amcostestimate = (amcostestimate_function) index->amcostestimate;
+ amcostestimate(root, path, loop_count,
+ &indexStartupCost, &indexTotalCost,
+ &indexSelectivity, &indexCorrelation,
+ &index_pages);
+
+ /*
+ * Save amcostestimate's results for possible use in bitmap scan planning.
+ * We don't bother to save indexStartupCost or indexCorrelation, because a
+ * bitmap scan doesn't care about either.
+ */
+ path->indextotalcost = indexTotalCost;
+ path->indexselectivity = indexSelectivity;
+
+ /* all costs for touching index itself included here */
+ startup_cost += indexStartupCost;
+ run_cost += indexTotalCost - indexStartupCost;
+
+ /* estimate number of main-table tuples fetched */
+ tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
+
+ /* fetch estimated page costs for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+ /*----------
+ * Estimate number of main-table pages fetched, and compute I/O cost.
+ *
+ * When the index ordering is uncorrelated with the table ordering,
+ * we use an approximation proposed by Mackert and Lohman (see
+ * index_pages_fetched() for details) to compute the number of pages
+ * fetched, and then charge spc_random_page_cost per page fetched.
+ *
+ * When the index ordering is exactly correlated with the table ordering
+ * (just after a CLUSTER, for example), the number of pages fetched should
+ * be exactly selectivity * table_size. What's more, all but the first
+ * will be sequential fetches, not the random fetches that occur in the
+ * uncorrelated case. So if the number of pages is more than 1, we
+ * ought to charge
+ * spc_random_page_cost + (pages_fetched - 1) * spc_seq_page_cost
+ * For partially-correlated indexes, we ought to charge somewhere between
+ * these two estimates. We currently interpolate linearly between the
+ * estimates based on the correlation squared (XXX is that appropriate?).
+ *
+ * If it's an index-only scan, then we will not need to fetch any heap
+ * pages for which the visibility map shows all tuples are visible.
+ * Hence, reduce the estimated number of heap fetches accordingly.
+ * We use the measured fraction of the entire heap that is all-visible,
+ * which might not be particularly relevant to the subset of the heap
+ * that this query will fetch; but it's not clear how to do better.
+ *----------
+ */
+ if (loop_count > 1)
+ {
+ /*
+ * For repeated indexscans, the appropriate estimate for the
+ * uncorrelated case is to scale up the number of tuples fetched in
+ * the Mackert and Lohman formula by the number of scans, so that we
+ * estimate the number of pages fetched by all the scans; then
+ * pro-rate the costs for one scan. In this case we assume all the
+ * fetches are random accesses.
+ */
+ pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
+ baserel->pages,
+ (double) index->pages,
+ root);
+
+ if (indexonly)
+ pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+
+ rand_heap_pages = pages_fetched;
+
+ max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
+
+ /*
+ * In the perfectly correlated case, the number of pages touched by
+ * each scan is selectivity * table_size, and we can use the Mackert
+ * and Lohman formula at the page level to estimate how much work is
+ * saved by caching across scans. We still assume all the fetches are
+ * random, though, which is an overestimate that's hard to correct for
+ * without double-counting the cache effects. (But in most cases
+ * where such a plan is actually interesting, only one page would get
+ * fetched per scan anyway, so it shouldn't matter much.)
+ */
+ pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
+
+ pages_fetched = index_pages_fetched(pages_fetched * loop_count,
+ baserel->pages,
+ (double) index->pages,
+ root);
+
+ if (indexonly)
+ pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+
+ min_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
+ }
+ else
+ {
+ /*
+ * Normal case: apply the Mackert and Lohman formula, and then
+ * interpolate between that and the correlation-derived result.
+ */
+ pages_fetched = index_pages_fetched(tuples_fetched,
+ baserel->pages,
+ (double) index->pages,
+ root);
+
+ if (indexonly)
+ pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+
+ rand_heap_pages = pages_fetched;
+
+ /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
+ max_IO_cost = pages_fetched * spc_random_page_cost;
+
+ /* min_IO_cost is for the perfectly correlated case (csquared=1) */
+ pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
+
+ if (indexonly)
+ pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+
+ if (pages_fetched > 0)
+ {
+ min_IO_cost = spc_random_page_cost;
+ if (pages_fetched > 1)
+ min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost;
+ }
+ else
+ min_IO_cost = 0;
+ }
+
+ if (partial_path)
+ {
+ /*
+ * For index only scans compute workers based on number of index pages
+ * fetched; the number of heap pages we fetch might be so small as to
+ * effectively rule out parallelism, which we don't want to do.
+ */
+ if (indexonly)
+ rand_heap_pages = -1;
+
+ /*
+ * Estimate the number of parallel workers required to scan index. Use
+ * the number of heap pages computed considering heap fetches won't be
+ * sequential as for parallel scans the pages are accessed in random
+ * order.
+ */
+ path->path.parallel_workers = compute_parallel_worker(baserel,
+ rand_heap_pages,
+ index_pages,
+ max_parallel_workers_per_gather);
+
+ /*
+ * Fall out if workers can't be assigned for parallel scan, because in
+ * such a case this path will be rejected. So there is no benefit in
+ * doing extra computation.
+ */
+ if (path->path.parallel_workers <= 0)
+ return;
+
+ path->path.parallel_aware = true;
+ }
+
+ /*
+ * Now interpolate based on estimated index order correlation to get total
+ * disk I/O cost for main table accesses.
+ */
+ csquared = indexCorrelation * indexCorrelation;
+
+ run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
+
+ /*
+ * Estimate CPU costs per tuple.
+ *
+ * What we want here is cpu_tuple_cost plus the evaluation costs of any
+ * qual clauses that we have to evaluate as qpquals.
+ */
+ cost_qual_eval(&qpqual_cost, qpquals, root);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+
+ cpu_run_cost += cpu_per_tuple * tuples_fetched;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->path.pathtarget->cost.startup;
+ cpu_run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
+
+ /* Adjust costing for parallelism, if used. */
+ if (path->path.parallel_workers > 0)
+ {
+ double parallel_divisor = get_parallel_divisor(&path->path);
+
+ path->path.rows = clamp_row_est(path->path.rows / parallel_divisor);
+
+ /* The CPU cost is divided among all the workers. */
+ cpu_run_cost /= parallel_divisor;
+ }
+
+ run_cost += cpu_run_cost;
+
+ path->path.startup_cost = startup_cost;
+ path->path.total_cost = startup_cost + run_cost;
+}
+
+/*
+ * extract_nonindex_conditions
+ *
+ * Given a list of quals to be enforced in an indexscan, extract the ones that
+ * will have to be applied as qpquals (ie, the index machinery won't handle
+ * them). Here we detect only whether a qual clause is directly redundant
+ * with some indexclause. If the index path is chosen for use, createplan.c
+ * will try a bit harder to get rid of redundant qual conditions; specifically
+ * it will see if quals can be proven to be implied by the indexquals. But
+ * it does not seem worth the cycles to try to factor that in at this stage,
+ * since we're only trying to estimate qual eval costs. Otherwise this must
+ * match the logic in create_indexscan_plan().
+ *
+ * qual_clauses, and the result, are lists of RestrictInfos.
+ * indexclauses is a list of IndexClauses.
+ */
+static List *
+extract_nonindex_conditions(List *qual_clauses, List *indexclauses)
+{
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, qual_clauses)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+
+ if (rinfo->pseudoconstant)
+ continue; /* we may drop pseudoconstants here */
+ if (is_redundant_with_indexclauses(rinfo, indexclauses))
+ continue; /* dup or derived from same EquivalenceClass */
+ /* ... skip the predicate proof attempt createplan.c will try ... */
+ result = lappend(result, rinfo);
+ }
+ return result;
+}
+
+/*
+ * index_pages_fetched
+ * Estimate the number of pages actually fetched after accounting for
+ * cache effects.
+ *
+ * We use an approximation proposed by Mackert and Lohman, "Index Scans
+ * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
+ * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
+ * The Mackert and Lohman approximation is that the number of pages
+ * fetched is
+ * PF =
+ * min(2TNs/(2T+Ns), T) when T <= b
+ * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b)
+ * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b)
+ * where
+ * T = # pages in table
+ * N = # tuples in table
+ * s = selectivity = fraction of table to be scanned
+ * b = # buffer pages available (we include kernel space here)
+ *
+ * We assume that effective_cache_size is the total number of buffer pages
+ * available for the whole query, and pro-rate that space across all the
+ * tables in the query and the index currently under consideration. (This
+ * ignores space needed for other indexes used by the query, but since we
+ * don't know which indexes will get used, we can't estimate that very well;
+ * and in any case counting all the tables may well be an overestimate, since
+ * depending on the join plan not all the tables may be scanned concurrently.)
+ *
+ * The product Ns is the number of tuples fetched; we pass in that
+ * product rather than calculating it here. "pages" is the number of pages
+ * in the object under consideration (either an index or a table).
+ * "index_pages" is the amount to add to the total table space, which was
+ * computed for us by make_one_rel.
+ *
+ * Caller is expected to have ensured that tuples_fetched is greater than zero
+ * and rounded to integer (see clamp_row_est). The result will likewise be
+ * greater than zero and integral.
+ */
+double
+index_pages_fetched(double tuples_fetched, BlockNumber pages,
+ double index_pages, PlannerInfo *root)
+{
+ double pages_fetched;
+ double total_pages;
+ double T,
+ b;
+
+ /* T is # pages in table, but don't allow it to be zero */
+ T = (pages > 1) ? (double) pages : 1.0;
+
+ /* Compute number of pages assumed to be competing for cache space */
+ total_pages = root->total_table_pages + index_pages;
+ total_pages = Max(total_pages, 1.0);
+ Assert(T <= total_pages);
+
+ /* b is pro-rated share of effective_cache_size */
+ b = (double) effective_cache_size * T / total_pages;
+
+ /* force it positive and integral */
+ if (b <= 1.0)
+ b = 1.0;
+ else
+ b = ceil(b);
+
+ /* This part is the Mackert and Lohman formula */
+ if (T <= b)
+ {
+ pages_fetched =
+ (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+ if (pages_fetched >= T)
+ pages_fetched = T;
+ else
+ pages_fetched = ceil(pages_fetched);
+ }
+ else
+ {
+ double lim;
+
+ lim = (2.0 * T * b) / (2.0 * T - b);
+ if (tuples_fetched <= lim)
+ {
+ pages_fetched =
+ (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+ }
+ else
+ {
+ pages_fetched =
+ b + (tuples_fetched - lim) * (T - b) / T;
+ }
+ pages_fetched = ceil(pages_fetched);
+ }
+ return pages_fetched;
+}
+
+/*
+ * get_indexpath_pages
+ * Determine the total size of the indexes used in a bitmap index path.
+ *
+ * Note: if the same index is used more than once in a bitmap tree, we will
+ * count it multiple times, which perhaps is the wrong thing ... but it's
+ * not completely clear, and detecting duplicates is difficult, so ignore it
+ * for now.
+ */
+static double
+get_indexpath_pages(Path *bitmapqual)
+{
+ double result = 0;
+ ListCell *l;
+
+ if (IsA(bitmapqual, BitmapAndPath))
+ {
+ BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;
+
+ foreach(l, apath->bitmapquals)
+ {
+ result += get_indexpath_pages((Path *) lfirst(l));
+ }
+ }
+ else if (IsA(bitmapqual, BitmapOrPath))
+ {
+ BitmapOrPath *opath = (BitmapOrPath *) bitmapqual;
+
+ foreach(l, opath->bitmapquals)
+ {
+ result += get_indexpath_pages((Path *) lfirst(l));
+ }
+ }
+ else if (IsA(bitmapqual, IndexPath))
+ {
+ IndexPath *ipath = (IndexPath *) bitmapqual;
+
+ result = (double) ipath->indexinfo->pages;
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
+
+ return result;
+}
+
+/*
+ * cost_bitmap_heap_scan
+ * Determines and returns the cost of scanning a relation using a bitmap
+ * index-then-heap plan.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ * 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ * estimates of caching behavior
+ *
+ * Note: the component IndexPaths in bitmapqual should have been costed
+ * using the same loop_count.
+ */
+void
+cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
+ ParamPathInfo *param_info,
+ Path *bitmapqual, double loop_count)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ Cost indexTotalCost;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+ Cost cost_per_page;
+ Cost cpu_run_cost;
+ double tuples_fetched;
+ double pages_fetched;
+ double spc_seq_page_cost,
+ spc_random_page_cost;
+ double T;
+
+ /* Should only be applied to base relations */
+ Assert(IsA(baserel, RelOptInfo));
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RELATION);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ if (!enable_bitmapscan)
+ startup_cost += disable_cost;
+
+ pages_fetched = compute_bitmap_pages(root, baserel, bitmapqual,
+ loop_count, &indexTotalCost,
+ &tuples_fetched);
+
+ startup_cost += indexTotalCost;
+ T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
+
+ /* Fetch estimated page costs for tablespace containing table. */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+ /*
+ * For small numbers of pages we should charge spc_random_page_cost
+ * apiece, while if nearly all the table's pages are being read, it's more
+ * appropriate to charge spc_seq_page_cost apiece. The effect is
+ * nonlinear, too. For lack of a better idea, interpolate like this to
+ * determine the cost per page.
+ */
+ if (pages_fetched >= 2.0)
+ cost_per_page = spc_random_page_cost -
+ (spc_random_page_cost - spc_seq_page_cost)
+ * sqrt(pages_fetched / T);
+ else
+ cost_per_page = spc_random_page_cost;
+
+ run_cost += pages_fetched * cost_per_page;
+
+ /*
+ * Estimate CPU costs per tuple.
+ *
+ * Often the indexquals don't need to be rechecked at each tuple ... but
+ * not always, especially not if there are enough tuples involved that the
+ * bitmaps become lossy. For the moment, just assume they will be
+ * rechecked always. This means we charge the full freight for all the
+ * scan clauses.
+ */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ cpu_run_cost = cpu_per_tuple * tuples_fetched;
+
+ /* Adjust costing for parallelism, if used. */
+ if (path->parallel_workers > 0)
+ {
+ double parallel_divisor = get_parallel_divisor(path);
+
+ /* The CPU cost is divided among all the workers. */
+ cpu_run_cost /= parallel_divisor;
+
+ path->rows = clamp_row_est(path->rows / parallel_divisor);
+ }
+
+
+ run_cost += cpu_run_cost;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->pathtarget->cost.startup;
+ run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_bitmap_tree_node
+ * Extract cost and selectivity from a bitmap tree node (index/and/or)
+ */
+void
+cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec)
+{
+ if (IsA(path, IndexPath))
+ {
+ *cost = ((IndexPath *) path)->indextotalcost;
+ *selec = ((IndexPath *) path)->indexselectivity;
+
+ /*
+ * Charge a small amount per retrieved tuple to reflect the costs of
+ * manipulating the bitmap. This is mostly to make sure that a bitmap
+ * scan doesn't look to be the same cost as an indexscan to retrieve a
+ * single tuple.
+ */
+ *cost += 0.1 * cpu_operator_cost * path->rows;
+ }
+ else if (IsA(path, BitmapAndPath))
+ {
+ *cost = path->total_cost;
+ *selec = ((BitmapAndPath *) path)->bitmapselectivity;
+ }
+ else if (IsA(path, BitmapOrPath))
+ {
+ *cost = path->total_cost;
+ *selec = ((BitmapOrPath *) path)->bitmapselectivity;
+ }
+ else
+ {
+ elog(ERROR, "unrecognized node type: %d", nodeTag(path));
+ *cost = *selec = 0; /* keep compiler quiet */
+ }
+}
+
+/*
+ * cost_bitmap_and_node
+ * Estimate the cost of a BitmapAnd node
+ *
+ * Note that this considers only the costs of index scanning and bitmap
+ * creation, not the eventual heap access. In that sense the object isn't
+ * truly a Path, but it has enough path-like properties (costs in particular)
+ * to warrant treating it as one. We don't bother to set the path rows field,
+ * however.
+ */
+void
+cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root)
+{
+ Cost totalCost;
+ Selectivity selec;
+ ListCell *l;
+
+ /*
+ * We estimate AND selectivity on the assumption that the inputs are
+ * independent. This is probably often wrong, but we don't have the info
+ * to do better.
+ *
+ * The runtime cost of the BitmapAnd itself is estimated at 100x
+ * cpu_operator_cost for each tbm_intersect needed. Probably too small,
+ * definitely too simplistic?
+ */
+ totalCost = 0.0;
+ selec = 1.0;
+ foreach(l, path->bitmapquals)
+ {
+ Path *subpath = (Path *) lfirst(l);
+ Cost subCost;
+ Selectivity subselec;
+
+ cost_bitmap_tree_node(subpath, &subCost, &subselec);
+
+ selec *= subselec;
+
+ totalCost += subCost;
+ if (l != list_head(path->bitmapquals))
+ totalCost += 100.0 * cpu_operator_cost;
+ }
+ path->bitmapselectivity = selec;
+ path->path.rows = 0; /* per above, not used */
+ path->path.startup_cost = totalCost;
+ path->path.total_cost = totalCost;
+}
+
+/*
+ * cost_bitmap_or_node
+ * Estimate the cost of a BitmapOr node
+ *
+ * See comments for cost_bitmap_and_node.
+ */
+void
+cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root)
+{
+ Cost totalCost;
+ Selectivity selec;
+ ListCell *l;
+
+ /*
+ * We estimate OR selectivity on the assumption that the inputs are
+ * non-overlapping, since that's often the case in "x IN (list)" type
+ * situations. Of course, we clamp to 1.0 at the end.
+ *
+ * The runtime cost of the BitmapOr itself is estimated at 100x
+ * cpu_operator_cost for each tbm_union needed. Probably too small,
+ * definitely too simplistic? We are aware that the tbm_unions are
+ * optimized out when the inputs are BitmapIndexScans.
+ */
+ totalCost = 0.0;
+ selec = 0.0;
+ foreach(l, path->bitmapquals)
+ {
+ Path *subpath = (Path *) lfirst(l);
+ Cost subCost;
+ Selectivity subselec;
+
+ cost_bitmap_tree_node(subpath, &subCost, &subselec);
+
+ selec += subselec;
+
+ totalCost += subCost;
+ if (l != list_head(path->bitmapquals) &&
+ !IsA(subpath, IndexPath))
+ totalCost += 100.0 * cpu_operator_cost;
+ }
+ path->bitmapselectivity = Min(selec, 1.0);
+ path->path.rows = 0; /* per above, not used */
+ path->path.startup_cost = totalCost;
+ path->path.total_cost = totalCost;
+}
+
+/*
+ * cost_tidscan
+ * Determines and returns the cost of scanning a relation using TIDs.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'tidquals' is the list of TID-checkable quals
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_tidscan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ bool isCurrentOf = false;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+ QualCost tid_qual_cost;
+ int ntuples;
+ ListCell *l;
+ double spc_random_page_cost;
+
+ /* Should only be applied to base relations */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RELATION);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* Count how many tuples we expect to retrieve */
+ ntuples = 0;
+ foreach(l, tidquals)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+ Expr *qual = rinfo->clause;
+
+ if (IsA(qual, ScalarArrayOpExpr))
+ {
+ /* Each element of the array yields 1 tuple */
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual;
+ Node *arraynode = (Node *) lsecond(saop->args);
+
+ ntuples += estimate_array_length(arraynode);
+ }
+ else if (IsA(qual, CurrentOfExpr))
+ {
+ /* CURRENT OF yields 1 tuple */
+ isCurrentOf = true;
+ ntuples++;
+ }
+ else
+ {
+ /* It's just CTID = something, count 1 tuple */
+ ntuples++;
+ }
+ }
+
+ /*
+ * We must force TID scan for WHERE CURRENT OF, because only nodeTidscan.c
+ * understands how to do it correctly. Therefore, honor enable_tidscan
+ * only when CURRENT OF isn't present. Also note that cost_qual_eval
+ * counts a CurrentOfExpr as having startup cost disable_cost, which we
+ * subtract off here; that's to prevent other plan types such as seqscan
+ * from winning.
+ */
+ if (isCurrentOf)
+ {
+ Assert(baserel->baserestrictcost.startup >= disable_cost);
+ startup_cost -= disable_cost;
+ }
+ else if (!enable_tidscan)
+ startup_cost += disable_cost;
+
+ /*
+ * The TID qual expressions will be computed once, any other baserestrict
+ * quals once per retrieved tuple.
+ */
+ cost_qual_eval(&tid_qual_cost, tidquals, root);
+
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ NULL);
+
+ /* disk costs --- assume each tuple on a different page */
+ run_cost += spc_random_page_cost * ntuples;
+
+ /* Add scanning CPU costs */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ /* XXX currently we assume TID quals are a subset of qpquals */
+ startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
+ tid_qual_cost.per_tuple;
+ run_cost += cpu_per_tuple * ntuples;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->pathtarget->cost.startup;
+ run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_tidrangescan
+ * Determines and sets the costs of scanning a relation using a range of
+ * TIDs for 'path'
+ *
+ * 'baserel' is the relation to be scanned
+ * 'tidrangequals' is the list of TID-checkable range quals
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_tidrangescan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, List *tidrangequals,
+ ParamPathInfo *param_info)
+{
+ Selectivity selectivity;
+ double pages;
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+ QualCost tid_qual_cost;
+ double ntuples;
+ double nseqpages;
+ double spc_random_page_cost;
+ double spc_seq_page_cost;
+
+ /* Should only be applied to base relations */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RELATION);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* Count how many tuples and pages we expect to scan */
+ selectivity = clauselist_selectivity(root, tidrangequals, baserel->relid,
+ JOIN_INNER, NULL);
+ pages = ceil(selectivity * baserel->pages);
+
+ if (pages <= 0.0)
+ pages = 1.0;
+
+ /*
+ * The first page in a range requires a random seek, but each subsequent
+ * page is just a normal sequential page read. NOTE: it's desirable for
+ * TID Range Scans to cost more than the equivalent Sequential Scans,
+ * because Seq Scans have some performance advantages such as scan
+ * synchronization and parallelizability, and we'd prefer one of them to
+ * be picked unless a TID Range Scan really is better.
+ */
+ ntuples = selectivity * baserel->tuples;
+ nseqpages = pages - 1.0;
+
+ if (!enable_tidscan)
+ startup_cost += disable_cost;
+
+ /*
+ * The TID qual expressions will be computed once, any other baserestrict
+ * quals once per retrieved tuple.
+ */
+ cost_qual_eval(&tid_qual_cost, tidrangequals, root);
+
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+ /* disk costs; 1 random page and the remainder as seq pages */
+ run_cost += spc_random_page_cost + spc_seq_page_cost * nseqpages;
+
+ /* Add scanning CPU costs */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ /*
+ * XXX currently we assume TID quals are a subset of qpquals at this
+ * point; they will be removed (if possible) when we create the plan, so
+ * we subtract their cost from the total qpqual cost. (If the TID quals
+ * can't be removed, this is a mistake and we're going to underestimate
+ * the CPU cost a bit.)
+ */
+ startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
+ tid_qual_cost.per_tuple;
+ run_cost += cpu_per_tuple * ntuples;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->pathtarget->cost.startup;
+ run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_subqueryscan
+ * Determines and returns the cost of scanning a subquery RTE.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
+{
+ Cost startup_cost;
+ Cost run_cost;
+ List *qpquals;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+
+ /* Should only be applied to base relations that are subqueries */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_SUBQUERY);
+
+ /*
+ * We compute the rowcount estimate as the subplan's estimate times the
+ * selectivity of relevant restriction clauses. In simple cases this will
+ * come out the same as baserel->rows; but when dealing with parallelized
+ * paths we must do it like this to get the right answer.
+ */
+ if (param_info)
+ qpquals = list_concat_copy(param_info->ppi_clauses,
+ baserel->baserestrictinfo);
+ else
+ qpquals = baserel->baserestrictinfo;
+
+ path->path.rows = clamp_row_est(path->subpath->rows *
+ clauselist_selectivity(root,
+ qpquals,
+ 0,
+ JOIN_INNER,
+ NULL));
+
+ /*
+ * Cost of path is cost of evaluating the subplan, plus cost of evaluating
+ * any restriction clauses and tlist that will be attached to the
+ * SubqueryScan node, plus cpu_tuple_cost to account for selection and
+ * projection overhead.
+ */
+ path->path.startup_cost = path->subpath->startup_cost;
+ path->path.total_cost = path->subpath->total_cost;
+
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost = qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost = cpu_per_tuple * path->subpath->rows;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->path.pathtarget->cost.startup;
+ run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
+
+ path->path.startup_cost += startup_cost;
+ path->path.total_cost += startup_cost + run_cost;
+}
+
+/*
+ * cost_functionscan
+ * Determines and returns the cost of scanning a function RTE.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_functionscan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+ RangeTblEntry *rte;
+ QualCost exprcost;
+
+ /* Should only be applied to base relations that are functions */
+ Assert(baserel->relid > 0);
+ rte = planner_rt_fetch(baserel->relid, root);
+ Assert(rte->rtekind == RTE_FUNCTION);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /*
+ * Estimate costs of executing the function expression(s).
+ *
+ * Currently, nodeFunctionscan.c always executes the functions to
+ * completion before returning any rows, and caches the results in a
+ * tuplestore. So the function eval cost is all startup cost, and per-row
+ * costs are minimal.
+ *
+ * XXX in principle we ought to charge tuplestore spill costs if the
+ * number of rows is large. However, given how phony our rowcount
+ * estimates for functions tend to be, there's not a lot of point in that
+ * refinement right now.
+ */
+ cost_qual_eval_node(&exprcost, (Node *) rte->functions, root);
+
+ startup_cost += exprcost.startup + exprcost.per_tuple;
+
+ /* Add scanning CPU costs */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost += cpu_per_tuple * baserel->tuples;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->pathtarget->cost.startup;
+ run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_tablefuncscan
+ * Determines and returns the cost of scanning a table function.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_tablefuncscan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+ RangeTblEntry *rte;
+ QualCost exprcost;
+
+ /* Should only be applied to base relations that are functions */
+ Assert(baserel->relid > 0);
+ rte = planner_rt_fetch(baserel->relid, root);
+ Assert(rte->rtekind == RTE_TABLEFUNC);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /*
+ * Estimate costs of executing the table func expression(s).
+ *
+ * XXX in principle we ought to charge tuplestore spill costs if the
+ * number of rows is large. However, given how phony our rowcount
+ * estimates for tablefuncs tend to be, there's not a lot of point in that
+ * refinement right now.
+ */
+ cost_qual_eval_node(&exprcost, (Node *) rte->tablefunc, root);
+
+ startup_cost += exprcost.startup + exprcost.per_tuple;
+
+ /* Add scanning CPU costs */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost += cpu_per_tuple * baserel->tuples;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->pathtarget->cost.startup;
+ run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_valuesscan
+ * Determines and returns the cost of scanning a VALUES RTE.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_valuesscan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+
+ /* Should only be applied to base relations that are values lists */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_VALUES);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /*
+ * For now, estimate list evaluation cost at one operator eval per list
+ * (probably pretty bogus, but is it worth being smarter?)
+ */
+ cpu_per_tuple = cpu_operator_cost;
+
+ /* Add scanning CPU costs */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost += cpu_per_tuple * baserel->tuples;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->pathtarget->cost.startup;
+ run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_ctescan
+ * Determines and returns the cost of scanning a CTE RTE.
+ *
+ * Note: this is used for both self-reference and regular CTEs; the
+ * possible cost differences are below the threshold of what we could
+ * estimate accurately anyway. Note that the costs of evaluating the
+ * referenced CTE query are added into the final plan as initplan costs,
+ * and should NOT be counted here.
+ */
+void
+cost_ctescan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+
+ /* Should only be applied to base relations that are CTEs */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_CTE);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* Charge one CPU tuple cost per row for tuplestore manipulation */
+ cpu_per_tuple = cpu_tuple_cost;
+
+ /* Add scanning CPU costs */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost += cpu_per_tuple * baserel->tuples;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->pathtarget->cost.startup;
+ run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_namedtuplestorescan
+ * Determines and returns the cost of scanning a named tuplestore.
+ */
+void
+cost_namedtuplestorescan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+
+ /* Should only be applied to base relations that are Tuplestores */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_NAMEDTUPLESTORE);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* Charge one CPU tuple cost per row for tuplestore manipulation */
+ cpu_per_tuple = cpu_tuple_cost;
+
+ /* Add scanning CPU costs */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost += cpu_per_tuple * baserel->tuples;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_resultscan
+ * Determines and returns the cost of scanning an RTE_RESULT relation.
+ */
+void
+cost_resultscan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+
+ /* Should only be applied to RTE_RESULT base relations */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RESULT);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* We charge qual cost plus cpu_tuple_cost */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost += cpu_per_tuple * baserel->tuples;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_recursive_union
+ * Determines and returns the cost of performing a recursive union,
+ * and also the estimated output size.
+ *
+ * We are given Paths for the nonrecursive and recursive terms.
+ */
+void
+cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
+{
+ Cost startup_cost;
+ Cost total_cost;
+ double total_rows;
+
+ /* We probably have decent estimates for the non-recursive term */
+ startup_cost = nrterm->startup_cost;
+ total_cost = nrterm->total_cost;
+ total_rows = nrterm->rows;
+
+ /*
+ * We arbitrarily assume that about 10 recursive iterations will be
+ * needed, and that we've managed to get a good fix on the cost and output
+ * size of each one of them. These are mighty shaky assumptions but it's
+ * hard to see how to do better.
+ */
+ total_cost += 10 * rterm->total_cost;
+ total_rows += 10 * rterm->rows;
+
+ /*
+ * Also charge cpu_tuple_cost per row to account for the costs of
+ * manipulating the tuplestores. (We don't worry about possible
+ * spill-to-disk costs.)
+ */
+ total_cost += cpu_tuple_cost * total_rows;
+
+ runion->startup_cost = startup_cost;
+ runion->total_cost = total_cost;
+ runion->rows = total_rows;
+ runion->pathtarget->width = Max(nrterm->pathtarget->width,
+ rterm->pathtarget->width);
+}
+
+/*
+ * cost_tuplesort
+ * Determines and returns the cost of sorting a relation using tuplesort,
+ * not including the cost of reading the input data.
+ *
+ * If the total volume of data to sort is less than sort_mem, we will do
+ * an in-memory sort, which requires no I/O and about t*log2(t) tuple
+ * comparisons for t tuples.
+ *
+ * If the total volume exceeds sort_mem, we switch to a tape-style merge
+ * algorithm. There will still be about t*log2(t) tuple comparisons in
+ * total, but we will also need to write and read each tuple once per
+ * merge pass. We expect about ceil(logM(r)) merge passes where r is the
+ * number of initial runs formed and M is the merge order used by tuplesort.c.
+ * Since the average initial run should be about sort_mem, we have
+ * disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
+ * cpu = comparison_cost * t * log2(t)
+ *
+ * If the sort is bounded (i.e., only the first k result tuples are needed)
+ * and k tuples can fit into sort_mem, we use a heap method that keeps only
+ * k tuples in the heap; this will require about t*log2(k) tuple comparisons.
+ *
+ * The disk traffic is assumed to be 3/4ths sequential and 1/4th random
+ * accesses (XXX can't we refine that guess?)
+ *
+ * By default, we charge two operator evals per tuple comparison, which should
+ * be in the right ballpark in most cases. The caller can tweak this by
+ * specifying nonzero comparison_cost; typically that's used for any extra
+ * work that has to be done to prepare the inputs to the comparison operators.
+ *
+ * 'tuples' is the number of tuples in the relation
+ * 'width' is the average tuple width in bytes
+ * 'comparison_cost' is the extra cost per comparison, if any
+ * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
+ * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ */
+static void
+cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+ double tuples, int width,
+ Cost comparison_cost, int sort_mem,
+ double limit_tuples)
+{
+ double input_bytes = relation_byte_size(tuples, width);
+ double output_bytes;
+ double output_tuples;
+ long sort_mem_bytes = sort_mem * 1024L;
+
+ /*
+ * We want to be sure the cost of a sort is never estimated as zero, even
+ * if passed-in tuple count is zero. Besides, mustn't do log(0)...
+ */
+ if (tuples < 2.0)
+ tuples = 2.0;
+
+ /* Include the default cost-per-comparison */
+ comparison_cost += 2.0 * cpu_operator_cost;
+
+ /* Do we have a useful LIMIT? */
+ if (limit_tuples > 0 && limit_tuples < tuples)
+ {
+ output_tuples = limit_tuples;
+ output_bytes = relation_byte_size(output_tuples, width);
+ }
+ else
+ {
+ output_tuples = tuples;
+ output_bytes = input_bytes;
+ }
+
+ if (output_bytes > sort_mem_bytes)
+ {
+ /*
+ * We'll have to use a disk-based sort of all the tuples
+ */
+ double npages = ceil(input_bytes / BLCKSZ);
+ double nruns = input_bytes / sort_mem_bytes;
+ double mergeorder = tuplesort_merge_order(sort_mem_bytes);
+ double log_runs;
+ double npageaccesses;
+
+ /*
+ * CPU costs
+ *
+ * Assume about N log2 N comparisons
+ */
+ *startup_cost = comparison_cost * tuples * LOG2(tuples);
+
+ /* Disk costs */
+
+ /* Compute logM(r) as log(r) / log(M) */
+ if (nruns > mergeorder)
+ log_runs = ceil(log(nruns) / log(mergeorder));
+ else
+ log_runs = 1.0;
+ npageaccesses = 2.0 * npages * log_runs;
+ /* Assume 3/4ths of accesses are sequential, 1/4th are not */
+ *startup_cost += npageaccesses *
+ (seq_page_cost * 0.75 + random_page_cost * 0.25);
+ }
+ else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
+ {
+ /*
+ * We'll use a bounded heap-sort keeping just K tuples in memory, for
+ * a total number of tuple comparisons of N log2 K; but the constant
+ * factor is a bit higher than for quicksort. Tweak it so that the
+ * cost curve is continuous at the crossover point.
+ */
+ *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+ }
+ else
+ {
+ /* We'll use plain quicksort on all the input tuples */
+ *startup_cost = comparison_cost * tuples * LOG2(tuples);
+ }
+
+ /*
+ * Also charge a small amount (arbitrarily set equal to operator cost) per
+ * extracted tuple. We don't charge cpu_tuple_cost because a Sort node
+ * doesn't do qual-checking or projection, so it has less overhead than
+ * most plan nodes. Note it's correct to use tuples not output_tuples
+ * here --- the upper LIMIT will pro-rate the run cost so we'd be double
+ * counting the LIMIT otherwise.
+ */
+ *run_cost = cpu_operator_cost * tuples;
+}
+
+/*
+ * cost_incremental_sort
+ * Determines and returns the cost of sorting a relation incrementally, when
+ * the input path is presorted by a prefix of the pathkeys.
+ *
+ * 'presorted_keys' is the number of leading pathkeys by which the input path
+ * is sorted.
+ *
+ * We estimate the number of groups into which the relation is divided by the
+ * leading pathkeys, and then calculate the cost of sorting a single group
+ * with tuplesort using cost_tuplesort().
+ */
+void
+cost_incremental_sort(Path *path,
+ PlannerInfo *root, List *pathkeys, int presorted_keys,
+ Cost input_startup_cost, Cost input_total_cost,
+ double input_tuples, int width, Cost comparison_cost, int sort_mem,
+ double limit_tuples)
+{
+ Cost startup_cost = 0,
+ run_cost = 0,
+ input_run_cost = input_total_cost - input_startup_cost;
+ double group_tuples,
+ input_groups;
+ Cost group_startup_cost,
+ group_run_cost,
+ group_input_run_cost;
+ List *presortedExprs = NIL;
+ ListCell *l;
+ int i = 0;
+ bool unknown_varno = false;
+
+ Assert(presorted_keys != 0);
+
+ /*
+ * We want to be sure the cost of a sort is never estimated as zero, even
+ * if passed-in tuple count is zero. Besides, mustn't do log(0)...
+ */
+ if (input_tuples < 2.0)
+ input_tuples = 2.0;
+
+ /* Default estimate of number of groups, capped to one group per row. */
+ input_groups = Min(input_tuples, DEFAULT_NUM_DISTINCT);
+
+ /*
+ * Extract presorted keys as list of expressions.
+ *
+ * We need to be careful about Vars containing "varno 0" which might have
+ * been introduced by generate_append_tlist, which would confuse
+ * estimate_num_groups (in fact it'd fail for such expressions). See
+ * recurse_set_operations which has to deal with the same issue.
+ *
+ * Unlike recurse_set_operations we can't access the original target list
+ * here, and even if we could it's not very clear how useful would that be
+ * for a set operation combining multiple tables. So we simply detect if
+ * there are any expressions with "varno 0" and use the default
+ * DEFAULT_NUM_DISTINCT in that case.
+ *
+ * We might also use either 1.0 (a single group) or input_tuples (each row
+ * being a separate group), pretty much the worst and best case for
+ * incremental sort. But those are extreme cases and using something in
+ * between seems reasonable. Furthermore, generate_append_tlist is used
+ * for set operations, which are likely to produce mostly unique output
+ * anyway - from that standpoint the DEFAULT_NUM_DISTINCT is defensive
+ * while maintaining lower startup cost.
+ */
+ foreach(l, pathkeys)
+ {
+ PathKey *key = (PathKey *) lfirst(l);
+ EquivalenceMember *member = (EquivalenceMember *)
+ linitial(key->pk_eclass->ec_members);
+
+ /*
+ * Check if the expression contains Var with "varno 0" so that we
+ * don't call estimate_num_groups in that case.
+ */
+ if (bms_is_member(0, pull_varnos(root, (Node *) member->em_expr)))
+ {
+ unknown_varno = true;
+ break;
+ }
+
+ /* expression not containing any Vars with "varno 0" */
+ presortedExprs = lappend(presortedExprs, member->em_expr);
+
+ i++;
+ if (i >= presorted_keys)
+ break;
+ }
+
+ /* Estimate number of groups with equal presorted keys. */
+ if (!unknown_varno)
+ input_groups = estimate_num_groups(root, presortedExprs, input_tuples,
+ NULL, NULL);
+
+ group_tuples = input_tuples / input_groups;
+ group_input_run_cost = input_run_cost / input_groups;
+
+ /*
+ * Estimate average cost of sorting of one group where presorted keys are
+ * equal. Incremental sort is sensitive to distribution of tuples to the
+ * groups, where we're relying on quite rough assumptions. Thus, we're
+ * pessimistic about incremental sort performance and increase its average
+ * group size by half.
+ */
+ cost_tuplesort(&group_startup_cost, &group_run_cost,
+ 1.5 * group_tuples, width, comparison_cost, sort_mem,
+ limit_tuples);
+
+ /*
+ * Startup cost of incremental sort is the startup cost of its first group
+ * plus the cost of its input.
+ */
+ startup_cost += group_startup_cost
+ + input_startup_cost + group_input_run_cost;
+
+ /*
+ * After we started producing tuples from the first group, the cost of
+ * producing all the tuples is given by the cost to finish processing this
+ * group, plus the total cost to process the remaining groups, plus the
+ * remaining cost of input.
+ */
+ run_cost += group_run_cost
+ + (group_run_cost + group_startup_cost) * (input_groups - 1)
+ + group_input_run_cost * (input_groups - 1);
+
+ /*
+ * Incremental sort adds some overhead by itself. Firstly, it has to
+ * detect the sort groups. This is roughly equal to one extra copy and
+ * comparison per tuple. Secondly, it has to reset the tuplesort context
+ * for every group.
+ */
+ run_cost += (cpu_tuple_cost + comparison_cost) * input_tuples;
+ run_cost += 2.0 * cpu_tuple_cost * input_groups;
+
+ path->rows = input_tuples;
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_sort
+ * Determines and returns the cost of sorting a relation, including
+ * the cost of reading the input data.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. Since this routine doesn't
+ * currently do anything with pathkeys anyway, that doesn't matter...
+ * but if it ever does, it should react gracefully to lack of key data.
+ * (Actually, the thing we'd most likely be interested in is just the number
+ * of sort keys, which all callers *could* supply.)
+ */
+void
+cost_sort(Path *path, PlannerInfo *root,
+ List *pathkeys, Cost input_cost, double tuples, int width,
+ Cost comparison_cost, int sort_mem,
+ double limit_tuples)
+
+{
+ Cost startup_cost;
+ Cost run_cost;
+
+ cost_tuplesort(&startup_cost, &run_cost,
+ tuples, width,
+ comparison_cost, sort_mem,
+ limit_tuples);
+
+ if (!enable_sort)
+ startup_cost += disable_cost;
+
+ startup_cost += input_cost;
+
+ path->rows = tuples;
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * append_nonpartial_cost
+ * Estimate the cost of the non-partial paths in a Parallel Append.
+ * The non-partial paths are assumed to be the first "numpaths" paths
+ * from the subpaths list, and to be in order of decreasing cost.
+ */
+static Cost
+append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
+{
+ Cost *costarr;
+ int arrlen;
+ ListCell *l;
+ ListCell *cell;
+ int i;
+ int path_index;
+ int min_index;
+ int max_index;
+
+ if (numpaths == 0)
+ return 0;
+
+ /*
+ * Array length is number of workers or number of relevant paths,
+ * whichever is less.
+ */
+ arrlen = Min(parallel_workers, numpaths);
+ costarr = (Cost *) palloc(sizeof(Cost) * arrlen);
+
+ /* The first few paths will each be claimed by a different worker. */
+ path_index = 0;
+ foreach(cell, subpaths)
+ {
+ Path *subpath = (Path *) lfirst(cell);
+
+ if (path_index == arrlen)
+ break;
+ costarr[path_index++] = subpath->total_cost;
+ }
+
+ /*
+ * Since subpaths are sorted by decreasing cost, the last one will have
+ * the minimum cost.
+ */
+ min_index = arrlen - 1;
+
+ /*
+ * For each of the remaining subpaths, add its cost to the array element
+ * with minimum cost.
+ */
+ for_each_cell(l, subpaths, cell)
+ {
+ Path *subpath = (Path *) lfirst(l);
+ int i;
+
+ /* Consider only the non-partial paths */
+ if (path_index++ == numpaths)
+ break;
+
+ costarr[min_index] += subpath->total_cost;
+
+ /* Update the new min cost array index */
+ for (min_index = i = 0; i < arrlen; i++)
+ {
+ if (costarr[i] < costarr[min_index])
+ min_index = i;
+ }
+ }
+
+ /* Return the highest cost from the array */
+ for (max_index = i = 0; i < arrlen; i++)
+ {
+ if (costarr[i] > costarr[max_index])
+ max_index = i;
+ }
+
+ return costarr[max_index];
+}
+
+/*
+ * cost_append
+ * Determines and returns the cost of an Append node.
+ */
+void
+cost_append(AppendPath *apath)
+{
+ ListCell *l;
+
+ apath->path.startup_cost = 0;
+ apath->path.total_cost = 0;
+ apath->path.rows = 0;
+
+ if (apath->subpaths == NIL)
+ return;
+
+ if (!apath->path.parallel_aware)
+ {
+ List *pathkeys = apath->path.pathkeys;
+
+ if (pathkeys == NIL)
+ {
+ Path *subpath = (Path *) linitial(apath->subpaths);
+
+ /*
+ * For an unordered, non-parallel-aware Append we take the startup
+ * cost as the startup cost of the first subpath.
+ */
+ apath->path.startup_cost = subpath->startup_cost;
+
+ /* Compute rows and costs as sums of subplan rows and costs. */
+ foreach(l, apath->subpaths)
+ {
+ Path *subpath = (Path *) lfirst(l);
+
+ apath->path.rows += subpath->rows;
+ apath->path.total_cost += subpath->total_cost;
+ }
+ }
+ else
+ {
+ /*
+ * For an ordered, non-parallel-aware Append we take the startup
+ * cost as the sum of the subpath startup costs. This ensures
+ * that we don't underestimate the startup cost when a query's
+ * LIMIT is such that several of the children have to be run to
+ * satisfy it. This might be overkill --- another plausible hack
+ * would be to take the Append's startup cost as the maximum of
+ * the child startup costs. But we don't want to risk believing
+ * that an ORDER BY LIMIT query can be satisfied at small cost
+ * when the first child has small startup cost but later ones
+ * don't. (If we had the ability to deal with nonlinear cost
+ * interpolation for partial retrievals, we would not need to be
+ * so conservative about this.)
+ *
+ * This case is also different from the above in that we have to
+ * account for possibly injecting sorts into subpaths that aren't
+ * natively ordered.
+ */
+ foreach(l, apath->subpaths)
+ {
+ Path *subpath = (Path *) lfirst(l);
+ Path sort_path; /* dummy for result of cost_sort */
+
+ if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ {
+ /*
+ * We'll need to insert a Sort node, so include costs for
+ * that. We can use the parent's LIMIT if any, since we
+ * certainly won't pull more than that many tuples from
+ * any child.
+ */
+ cost_sort(&sort_path,
+ NULL, /* doesn't currently need root */
+ pathkeys,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ apath->limit_tuples);
+ subpath = &sort_path;
+ }
+
+ apath->path.rows += subpath->rows;
+ apath->path.startup_cost += subpath->startup_cost;
+ apath->path.total_cost += subpath->total_cost;
+ }
+ }
+ }
+ else /* parallel-aware */
+ {
+ int i = 0;
+ double parallel_divisor = get_parallel_divisor(&apath->path);
+
+ /* Parallel-aware Append never produces ordered output. */
+ Assert(apath->path.pathkeys == NIL);
+
+ /* Calculate startup cost. */
+ foreach(l, apath->subpaths)
+ {
+ Path *subpath = (Path *) lfirst(l);
+
+ /*
+ * Append will start returning tuples when the child node having
+ * lowest startup cost is done setting up. We consider only the
+ * first few subplans that immediately get a worker assigned.
+ */
+ if (i == 0)
+ apath->path.startup_cost = subpath->startup_cost;
+ else if (i < apath->path.parallel_workers)
+ apath->path.startup_cost = Min(apath->path.startup_cost,
+ subpath->startup_cost);
+
+ /*
+ * Apply parallel divisor to subpaths. Scale the number of rows
+ * for each partial subpath based on the ratio of the parallel
+ * divisor originally used for the subpath to the one we adopted.
+ * Also add the cost of partial paths to the total cost, but
+ * ignore non-partial paths for now.
+ */
+ if (i < apath->first_partial_path)
+ apath->path.rows += subpath->rows / parallel_divisor;
+ else
+ {
+ double subpath_parallel_divisor;
+
+ subpath_parallel_divisor = get_parallel_divisor(subpath);
+ apath->path.rows += subpath->rows * (subpath_parallel_divisor /
+ parallel_divisor);
+ apath->path.total_cost += subpath->total_cost;
+ }
+
+ apath->path.rows = clamp_row_est(apath->path.rows);
+
+ i++;
+ }
+
+ /* Add cost for non-partial subpaths. */
+ apath->path.total_cost +=
+ append_nonpartial_cost(apath->subpaths,
+ apath->first_partial_path,
+ apath->path.parallel_workers);
+ }
+
+ /*
+ * Although Append does not do any selection or projection, it's not free;
+ * add a small per-tuple overhead.
+ */
+ apath->path.total_cost +=
+ cpu_tuple_cost * APPEND_CPU_COST_MULTIPLIER * apath->path.rows;
+}
+
+/*
+ * cost_merge_append
+ * Determines and returns the cost of a MergeAppend node.
+ *
+ * MergeAppend merges several pre-sorted input streams, using a heap that
+ * at any given instant holds the next tuple from each stream. If there
+ * are N streams, we need about N*log2(N) tuple comparisons to construct
+ * the heap at startup, and then for each output tuple, about log2(N)
+ * comparisons to replace the top entry.
+ *
+ * (The effective value of N will drop once some of the input streams are
+ * exhausted, but it seems unlikely to be worth trying to account for that.)
+ *
+ * The heap is never spilled to disk, since we assume N is not very large.
+ * So this is much simpler than cost_sort.
+ *
+ * As in cost_sort, we charge two operator evals per tuple comparison.
+ *
+ * 'pathkeys' is a list of sort keys
+ * 'n_streams' is the number of input streams
+ * 'input_startup_cost' is the sum of the input streams' startup costs
+ * 'input_total_cost' is the sum of the input streams' total costs
+ * 'tuples' is the number of tuples in all the streams
+ */
+void
+cost_merge_append(Path *path, PlannerInfo *root,
+ List *pathkeys, int n_streams,
+ Cost input_startup_cost, Cost input_total_cost,
+ double tuples)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ Cost comparison_cost;
+ double N;
+ double logN;
+
+ /*
+ * Avoid log(0)...
+ */
+ N = (n_streams < 2) ? 2.0 : (double) n_streams;
+ logN = LOG2(N);
+
+ /* Assumed cost per tuple comparison */
+ comparison_cost = 2.0 * cpu_operator_cost;
+
+ /* Heap creation cost */
+ startup_cost += comparison_cost * N * logN;
+
+ /* Per-tuple heap maintenance cost */
+ run_cost += tuples * comparison_cost * logN;
+
+ /*
+ * Although MergeAppend does not do any selection or projection, it's not
+ * free; add a small per-tuple overhead.
+ */
+ run_cost += cpu_tuple_cost * APPEND_CPU_COST_MULTIPLIER * tuples;
+
+ path->startup_cost = startup_cost + input_startup_cost;
+ path->total_cost = startup_cost + run_cost + input_total_cost;
+}
+
+/*
+ * cost_material
+ * Determines and returns the cost of materializing a relation, including
+ * the cost of reading the input data.
+ *
+ * If the total volume of data to materialize exceeds work_mem, we will need
+ * to write it to disk, so the cost is much higher in that case.
+ *
+ * Note that here we are estimating the costs for the first scan of the
+ * relation, so the materialization is all overhead --- any savings will
+ * occur only on rescan, which is estimated in cost_rescan.
+ */
+void
+cost_material(Path *path,
+ Cost input_startup_cost, Cost input_total_cost,
+ double tuples, int width)
+{
+ Cost startup_cost = input_startup_cost;
+ Cost run_cost = input_total_cost - input_startup_cost;
+ double nbytes = relation_byte_size(tuples, width);
+ long work_mem_bytes = work_mem * 1024L;
+
+ path->rows = tuples;
+
+ /*
+ * Whether spilling or not, charge 2x cpu_operator_cost per tuple to
+ * reflect bookkeeping overhead. (This rate must be more than what
+ * cost_rescan charges for materialize, ie, cpu_operator_cost per tuple;
+ * if it is exactly the same then there will be a cost tie between
+ * nestloop with A outer, materialized B inner and nestloop with B outer,
+ * materialized A inner. The extra cost ensures we'll prefer
+ * materializing the smaller rel.) Note that this is normally a good deal
+ * less than cpu_tuple_cost; which is OK because a Material plan node
+ * doesn't do qual-checking or projection, so it's got less overhead than
+ * most plan nodes.
+ */
+ run_cost += 2 * cpu_operator_cost * tuples;
+
+ /*
+ * If we will spill to disk, charge at the rate of seq_page_cost per page.
+ * This cost is assumed to be evenly spread through the plan run phase,
+ * which isn't exactly accurate but our cost model doesn't allow for
+ * nonuniform costs within the run phase.
+ */
+ if (nbytes > work_mem_bytes)
+ {
+ double npages = ceil(nbytes / BLCKSZ);
+
+ run_cost += seq_page_cost * npages;
+ }
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_memoize_rescan
+ * Determines the estimated cost of rescanning a Memoize node.
+ *
+ * In order to estimate this, we must gain knowledge of how often we expect to
+ * be called and how many distinct sets of parameters we are likely to be
+ * called with. If we expect a good cache hit ratio, then we can set our
+ * costs to account for that hit ratio, plus a little bit of cost for the
+ * caching itself. Caching will not work out well if we expect to be called
+ * with too many distinct parameter values. The worst-case here is that we
+ * never see any parameter value twice, in which case we'd never get a cache
+ * hit and caching would be a complete waste of effort.
+ */
+static void
+cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
+ Cost *rescan_startup_cost, Cost *rescan_total_cost)
+{
+ EstimationInfo estinfo;
+ Cost input_startup_cost = mpath->subpath->startup_cost;
+ Cost input_total_cost = mpath->subpath->total_cost;
+ double tuples = mpath->subpath->rows;
+ double calls = mpath->calls;
+ int width = mpath->subpath->pathtarget->width;
+
+ double hash_mem_bytes;
+ double est_entry_bytes;
+ double est_cache_entries;
+ double ndistinct;
+ double evict_ratio;
+ double hit_ratio;
+ Cost startup_cost;
+ Cost total_cost;
+
+ /* available cache space */
+ hash_mem_bytes = get_hash_memory_limit();
+
+ /*
+ * Set the number of bytes each cache entry should consume in the cache.
+ * To provide us with better estimations on how many cache entries we can
+ * store at once, we make a call to the executor here to ask it what
+ * memory overheads there are for a single cache entry.
+ *
+ * XXX we also store the cache key, but that's not accounted for here.
+ */
+ est_entry_bytes = relation_byte_size(tuples, width) +
+ ExecEstimateCacheEntryOverheadBytes(tuples);
+
+ /* estimate on the upper limit of cache entries we can hold at once */
+ est_cache_entries = floor(hash_mem_bytes / est_entry_bytes);
+
+ /* estimate on the distinct number of parameter values */
+ ndistinct = estimate_num_groups(root, mpath->param_exprs, calls, NULL,
+ &estinfo);
+
+ /*
+ * When the estimation fell back on using a default value, it's a bit too
+ * risky to assume that it's ok to use a Memoize node. The use of a
+ * default could cause us to use a Memoize node when it's really
+ * inappropriate to do so. If we see that this has been done, then we'll
+ * assume that every call will have unique parameters, which will almost
+ * certainly mean a MemoizePath will never survive add_path().
+ */
+ if ((estinfo.flags & SELFLAG_USED_DEFAULT) != 0)
+ ndistinct = calls;
+
+ /*
+ * Since we've already estimated the maximum number of entries we can
+ * store at once and know the estimated number of distinct values we'll be
+ * called with, we'll take this opportunity to set the path's est_entries.
+ * This will ultimately determine the hash table size that the executor
+ * will use. If we leave this at zero, the executor will just choose the
+ * size itself. Really this is not the right place to do this, but it's
+ * convenient since everything is already calculated.
+ */
+ mpath->est_entries = Min(Min(ndistinct, est_cache_entries),
+ PG_UINT32_MAX);
+
+ /*
+ * When the number of distinct parameter values is above the amount we can
+ * store in the cache, then we'll have to evict some entries from the
+ * cache. This is not free. Here we estimate how often we'll incur the
+ * cost of that eviction.
+ */
+ evict_ratio = 1.0 - Min(est_cache_entries, ndistinct) / ndistinct;
+
+ /*
+ * In order to estimate how costly a single scan will be, we need to
+ * attempt to estimate what the cache hit ratio will be. To do that we
+ * must look at how many scans are estimated in total for this node and
+ * how many of those scans we expect to get a cache hit.
+ */
+ hit_ratio = 1.0 / ndistinct * Min(est_cache_entries, ndistinct) -
+ (ndistinct / calls);
+
+ /* Ensure we don't go negative */
+ hit_ratio = Max(hit_ratio, 0.0);
+
+ /*
+ * Set the total_cost accounting for the expected cache hit ratio. We
+ * also add on a cpu_operator_cost to account for a cache lookup. This
+ * will happen regardless of whether it's a cache hit or not.
+ */
+ total_cost = input_total_cost * (1.0 - hit_ratio) + cpu_operator_cost;
+
+ /* Now adjust the total cost to account for cache evictions */
+
+ /* Charge a cpu_tuple_cost for evicting the actual cache entry */
+ total_cost += cpu_tuple_cost * evict_ratio;
+
+ /*
+ * Charge a 10th of cpu_operator_cost to evict every tuple in that entry.
+ * The per-tuple eviction is really just a pfree, so charging a whole
+ * cpu_operator_cost seems a little excessive.
+ */
+ total_cost += cpu_operator_cost / 10.0 * evict_ratio * tuples;
+
+ /*
+ * Now adjust for storing things in the cache, since that's not free
+ * either. Everything must go in the cache. We don't proportion this
+ * over any ratio, just apply it once for the scan. We charge a
+ * cpu_tuple_cost for the creation of the cache entry and also a
+ * cpu_operator_cost for each tuple we expect to cache.
+ */
+ total_cost += cpu_tuple_cost + cpu_operator_cost * tuples;
+
+ /*
+ * Getting the first row must be also be proportioned according to the
+ * expected cache hit ratio.
+ */
+ startup_cost = input_startup_cost * (1.0 - hit_ratio);
+
+ /*
+ * Additionally we charge a cpu_tuple_cost to account for cache lookups,
+ * which we'll do regardless of whether it was a cache hit or not.
+ */
+ startup_cost += cpu_tuple_cost;
+
+ *rescan_startup_cost = startup_cost;
+ *rescan_total_cost = total_cost;
+}
+
+/*
+ * cost_agg
+ * Determines and returns the cost of performing an Agg plan node,
+ * including the cost of its input.
+ *
+ * aggcosts can be NULL when there are no actual aggregate functions (i.e.,
+ * we are using a hashed Agg node just to do grouping).
+ *
+ * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
+ * are for appropriately-sorted input.
+ */
+void
+cost_agg(Path *path, PlannerInfo *root,
+ AggStrategy aggstrategy, const AggClauseCosts *aggcosts,
+ int numGroupCols, double numGroups,
+ List *quals,
+ Cost input_startup_cost, Cost input_total_cost,
+ double input_tuples, double input_width)
+{
+ double output_tuples;
+ Cost startup_cost;
+ Cost total_cost;
+ AggClauseCosts dummy_aggcosts;
+
+ /* Use all-zero per-aggregate costs if NULL is passed */
+ if (aggcosts == NULL)
+ {
+ Assert(aggstrategy == AGG_HASHED);
+ MemSet(&dummy_aggcosts, 0, sizeof(AggClauseCosts));
+ aggcosts = &dummy_aggcosts;
+ }
+
+ /*
+ * The transCost.per_tuple component of aggcosts should be charged once
+ * per input tuple, corresponding to the costs of evaluating the aggregate
+ * transfns and their input expressions. The finalCost.per_tuple component
+ * is charged once per output tuple, corresponding to the costs of
+ * evaluating the finalfns. Startup costs are of course charged but once.
+ *
+ * If we are grouping, we charge an additional cpu_operator_cost per
+ * grouping column per input tuple for grouping comparisons.
+ *
+ * We will produce a single output tuple if not grouping, and a tuple per
+ * group otherwise. We charge cpu_tuple_cost for each output tuple.
+ *
+ * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the
+ * same total CPU cost, but AGG_SORTED has lower startup cost. If the
+ * input path is already sorted appropriately, AGG_SORTED should be
+ * preferred (since it has no risk of memory overflow). This will happen
+ * as long as the computed total costs are indeed exactly equal --- but if
+ * there's roundoff error we might do the wrong thing. So be sure that
+ * the computations below form the same intermediate values in the same
+ * order.
+ */
+ if (aggstrategy == AGG_PLAIN)
+ {
+ startup_cost = input_total_cost;
+ startup_cost += aggcosts->transCost.startup;
+ startup_cost += aggcosts->transCost.per_tuple * input_tuples;
+ startup_cost += aggcosts->finalCost.startup;
+ startup_cost += aggcosts->finalCost.per_tuple;
+ /* we aren't grouping */
+ total_cost = startup_cost + cpu_tuple_cost;
+ output_tuples = 1;
+ }
+ else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
+ {
+ /* Here we are able to deliver output on-the-fly */
+ startup_cost = input_startup_cost;
+ total_cost = input_total_cost;
+ if (aggstrategy == AGG_MIXED && !enable_hashagg)
+ {
+ startup_cost += disable_cost;
+ total_cost += disable_cost;
+ }
+ /* calcs phrased this way to match HASHED case, see note above */
+ total_cost += aggcosts->transCost.startup;
+ total_cost += aggcosts->transCost.per_tuple * input_tuples;
+ total_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
+ total_cost += aggcosts->finalCost.startup;
+ total_cost += aggcosts->finalCost.per_tuple * numGroups;
+ total_cost += cpu_tuple_cost * numGroups;
+ output_tuples = numGroups;
+ }
+ else
+ {
+ /* must be AGG_HASHED */
+ startup_cost = input_total_cost;
+ if (!enable_hashagg)
+ startup_cost += disable_cost;
+ startup_cost += aggcosts->transCost.startup;
+ startup_cost += aggcosts->transCost.per_tuple * input_tuples;
+ /* cost of computing hash value */
+ startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
+ startup_cost += aggcosts->finalCost.startup;
+
+ total_cost = startup_cost;
+ total_cost += aggcosts->finalCost.per_tuple * numGroups;
+ /* cost of retrieving from hash table */
+ total_cost += cpu_tuple_cost * numGroups;
+ output_tuples = numGroups;
+ }
+
+ /*
+ * Add the disk costs of hash aggregation that spills to disk.
+ *
+ * Groups that go into the hash table stay in memory until finalized, so
+ * spilling and reprocessing tuples doesn't incur additional invocations
+ * of transCost or finalCost. Furthermore, the computed hash value is
+ * stored with the spilled tuples, so we don't incur extra invocations of
+ * the hash function.
+ *
+ * Hash Agg begins returning tuples after the first batch is complete.
+ * Accrue writes (spilled tuples) to startup_cost and to total_cost;
+ * accrue reads only to total_cost.
+ */
+ if (aggstrategy == AGG_HASHED || aggstrategy == AGG_MIXED)
+ {
+ double pages;
+ double pages_written = 0.0;
+ double pages_read = 0.0;
+ double spill_cost;
+ double hashentrysize;
+ double nbatches;
+ Size mem_limit;
+ uint64 ngroups_limit;
+ int num_partitions;
+ int depth;
+
+ /*
+ * Estimate number of batches based on the computed limits. If less
+ * than or equal to one, all groups are expected to fit in memory;
+ * otherwise we expect to spill.
+ */
+ hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos),
+ input_width,
+ aggcosts->transitionSpace);
+ hash_agg_set_limits(hashentrysize, numGroups, 0, &mem_limit,
+ &ngroups_limit, &num_partitions);
+
+ nbatches = Max((numGroups * hashentrysize) / mem_limit,
+ numGroups / ngroups_limit);
+
+ nbatches = Max(ceil(nbatches), 1.0);
+ num_partitions = Max(num_partitions, 2);
+
+ /*
+ * The number of partitions can change at different levels of
+ * recursion; but for the purposes of this calculation assume it stays
+ * constant.
+ */
+ depth = ceil(log(nbatches) / log(num_partitions));
+
+ /*
+ * Estimate number of pages read and written. For each level of
+ * recursion, a tuple must be written and then later read.
+ */
+ pages = relation_byte_size(input_tuples, input_width) / BLCKSZ;
+ pages_written = pages_read = pages * depth;
+
+ /*
+ * HashAgg has somewhat worse IO behavior than Sort on typical
+ * hardware/OS combinations. Account for this with a generic penalty.
+ */
+ pages_read *= 2.0;
+ pages_written *= 2.0;
+
+ startup_cost += pages_written * random_page_cost;
+ total_cost += pages_written * random_page_cost;
+ total_cost += pages_read * seq_page_cost;
+
+ /* account for CPU cost of spilling a tuple and reading it back */
+ spill_cost = depth * input_tuples * 2.0 * cpu_tuple_cost;
+ startup_cost += spill_cost;
+ total_cost += spill_cost;
+ }
+
+ /*
+ * If there are quals (HAVING quals), account for their cost and
+ * selectivity.
+ */
+ if (quals)
+ {
+ QualCost qual_cost;
+
+ cost_qual_eval(&qual_cost, quals, root);
+ startup_cost += qual_cost.startup;
+ total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple;
+
+ output_tuples = clamp_row_est(output_tuples *
+ clauselist_selectivity(root,
+ quals,
+ 0,
+ JOIN_INNER,
+ NULL));
+ }
+
+ path->rows = output_tuples;
+ path->startup_cost = startup_cost;
+ path->total_cost = total_cost;
+}
+
+/*
+ * cost_windowagg
+ * Determines and returns the cost of performing a WindowAgg plan node,
+ * including the cost of its input.
+ *
+ * Input is assumed already properly sorted.
+ */
+void
+cost_windowagg(Path *path, PlannerInfo *root,
+ List *windowFuncs, int numPartCols, int numOrderCols,
+ Cost input_startup_cost, Cost input_total_cost,
+ double input_tuples)
+{
+ Cost startup_cost;
+ Cost total_cost;
+ ListCell *lc;
+
+ startup_cost = input_startup_cost;
+ total_cost = input_total_cost;
+
+ /*
+ * Window functions are assumed to cost their stated execution cost, plus
+ * the cost of evaluating their input expressions, per tuple. Since they
+ * may in fact evaluate their inputs at multiple rows during each cycle,
+ * this could be a drastic underestimate; but without a way to know how
+ * many rows the window function will fetch, it's hard to do better. In
+ * any case, it's a good estimate for all the built-in window functions,
+ * so we'll just do this for now.
+ */
+ foreach(lc, windowFuncs)
+ {
+ WindowFunc *wfunc = lfirst_node(WindowFunc, lc);
+ Cost wfunccost;
+ QualCost argcosts;
+
+ argcosts.startup = argcosts.per_tuple = 0;
+ add_function_cost(root, wfunc->winfnoid, (Node *) wfunc,
+ &argcosts);
+ startup_cost += argcosts.startup;
+ wfunccost = argcosts.per_tuple;
+
+ /* also add the input expressions' cost to per-input-row costs */
+ cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root);
+ startup_cost += argcosts.startup;
+ wfunccost += argcosts.per_tuple;
+
+ /*
+ * Add the filter's cost to per-input-row costs. XXX We should reduce
+ * input expression costs according to filter selectivity.
+ */
+ cost_qual_eval_node(&argcosts, (Node *) wfunc->aggfilter, root);
+ startup_cost += argcosts.startup;
+ wfunccost += argcosts.per_tuple;
+
+ total_cost += wfunccost * input_tuples;
+ }
+
+ /*
+ * We also charge cpu_operator_cost per grouping column per tuple for
+ * grouping comparisons, plus cpu_tuple_cost per tuple for general
+ * overhead.
+ *
+ * XXX this neglects costs of spooling the data to disk when it overflows
+ * work_mem. Sooner or later that should get accounted for.
+ */
+ total_cost += cpu_operator_cost * (numPartCols + numOrderCols) * input_tuples;
+ total_cost += cpu_tuple_cost * input_tuples;
+
+ path->rows = input_tuples;
+ path->startup_cost = startup_cost;
+ path->total_cost = total_cost;
+}
+
+/*
+ * cost_group
+ * Determines and returns the cost of performing a Group plan node,
+ * including the cost of its input.
+ *
+ * Note: caller must ensure that input costs are for appropriately-sorted
+ * input.
+ */
+void
+cost_group(Path *path, PlannerInfo *root,
+ int numGroupCols, double numGroups,
+ List *quals,
+ Cost input_startup_cost, Cost input_total_cost,
+ double input_tuples)
+{
+ double output_tuples;
+ Cost startup_cost;
+ Cost total_cost;
+
+ output_tuples = numGroups;
+ startup_cost = input_startup_cost;
+ total_cost = input_total_cost;
+
+ /*
+ * Charge one cpu_operator_cost per comparison per input tuple. We assume
+ * all columns get compared at most of the tuples.
+ */
+ total_cost += cpu_operator_cost * input_tuples * numGroupCols;
+
+ /*
+ * If there are quals (HAVING quals), account for their cost and
+ * selectivity.
+ */
+ if (quals)
+ {
+ QualCost qual_cost;
+
+ cost_qual_eval(&qual_cost, quals, root);
+ startup_cost += qual_cost.startup;
+ total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple;
+
+ output_tuples = clamp_row_est(output_tuples *
+ clauselist_selectivity(root,
+ quals,
+ 0,
+ JOIN_INNER,
+ NULL));
+ }
+
+ path->rows = output_tuples;
+ path->startup_cost = startup_cost;
+ path->total_cost = total_cost;
+}
+
+/*
+ * initial_cost_nestloop
+ * Preliminary estimate of the cost of a nestloop join path.
+ *
+ * This must quickly produce lower-bound estimates of the path's startup and
+ * total costs. If we are unable to eliminate the proposed path from
+ * consideration using the lower bounds, final_cost_nestloop will be called
+ * to obtain the final estimates.
+ *
+ * The exact division of labor between this function and final_cost_nestloop
+ * is private to them, and represents a tradeoff between speed of the initial
+ * estimate and getting a tight lower bound. We choose to not examine the
+ * join quals here, since that's by far the most expensive part of the
+ * calculations. The end result is that CPU-cost considerations must be
+ * left for the second phase; and for SEMI/ANTI joins, we must also postpone
+ * incorporation of the inner path's run cost.
+ *
+ * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
+ * other data to be used by final_cost_nestloop
+ * 'jointype' is the type of join to be performed
+ * 'outer_path' is the outer input to the join
+ * 'inner_path' is the inner input to the join
+ * 'extra' contains miscellaneous information about the join
+ */
+void
+initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
+ JoinType jointype,
+ Path *outer_path, Path *inner_path,
+ JoinPathExtraData *extra)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ double outer_path_rows = outer_path->rows;
+ Cost inner_rescan_start_cost;
+ Cost inner_rescan_total_cost;
+ Cost inner_run_cost;
+ Cost inner_rescan_run_cost;
+
+ /* estimate costs to rescan the inner relation */
+ cost_rescan(root, inner_path,
+ &inner_rescan_start_cost,
+ &inner_rescan_total_cost);
+
+ /* cost of source data */
+
+ /*
+ * NOTE: clearly, we must pay both outer and inner paths' startup_cost
+ * before we can start returning tuples, so the join's startup cost is
+ * their sum. We'll also pay the inner path's rescan startup cost
+ * multiple times.
+ */
+ startup_cost += outer_path->startup_cost + inner_path->startup_cost;
+ run_cost += outer_path->total_cost - outer_path->startup_cost;
+ if (outer_path_rows > 1)
+ run_cost += (outer_path_rows - 1) * inner_rescan_start_cost;
+
+ inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
+ inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost;
+
+ if (jointype == JOIN_SEMI || jointype == JOIN_ANTI ||
+ extra->inner_unique)
+ {
+ /*
+ * With a SEMI or ANTI join, or if the innerrel is known unique, the
+ * executor will stop after the first match.
+ *
+ * Getting decent estimates requires inspection of the join quals,
+ * which we choose to postpone to final_cost_nestloop.
+ */
+
+ /* Save private data for final_cost_nestloop */
+ workspace->inner_run_cost = inner_run_cost;
+ workspace->inner_rescan_run_cost = inner_rescan_run_cost;
+ }
+ else
+ {
+ /* Normal case; we'll scan whole input rel for each outer row */
+ run_cost += inner_run_cost;
+ if (outer_path_rows > 1)
+ run_cost += (outer_path_rows - 1) * inner_rescan_run_cost;
+ }
+
+ /* CPU costs left for later */
+
+ /* Public result fields */
+ workspace->startup_cost = startup_cost;
+ workspace->total_cost = startup_cost + run_cost;
+ /* Save private data for final_cost_nestloop */
+ workspace->run_cost = run_cost;
+}
+
+/*
+ * final_cost_nestloop
+ * Final estimate of the cost and result size of a nestloop join path.
+ *
+ * 'path' is already filled in except for the rows and cost fields
+ * 'workspace' is the result from initial_cost_nestloop
+ * 'extra' contains miscellaneous information about the join
+ */
+void
+final_cost_nestloop(PlannerInfo *root, NestPath *path,
+ JoinCostWorkspace *workspace,
+ JoinPathExtraData *extra)
+{
+ Path *outer_path = path->jpath.outerjoinpath;
+ Path *inner_path = path->jpath.innerjoinpath;
+ double outer_path_rows = outer_path->rows;
+ double inner_path_rows = inner_path->rows;
+ Cost startup_cost = workspace->startup_cost;
+ Cost run_cost = workspace->run_cost;
+ Cost cpu_per_tuple;
+ QualCost restrict_qual_cost;
+ double ntuples;
+
+ /* Protect some assumptions below that rowcounts aren't zero */
+ if (outer_path_rows <= 0)
+ outer_path_rows = 1;
+ if (inner_path_rows <= 0)
+ inner_path_rows = 1;
+ /* Mark the path with the correct row estimate */
+ if (path->jpath.path.param_info)
+ path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
+ else
+ path->jpath.path.rows = path->jpath.path.parent->rows;
+
+ /* For partial paths, scale row estimate. */
+ if (path->jpath.path.parallel_workers > 0)
+ {
+ double parallel_divisor = get_parallel_divisor(&path->jpath.path);
+
+ path->jpath.path.rows =
+ clamp_row_est(path->jpath.path.rows / parallel_divisor);
+ }
+
+ /*
+ * We could include disable_cost in the preliminary estimate, but that
+ * would amount to optimizing for the case where the join method is
+ * disabled, which doesn't seem like the way to bet.
+ */
+ if (!enable_nestloop)
+ startup_cost += disable_cost;
+
+ /* cost of inner-relation source data (we already dealt with outer rel) */
+
+ if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI ||
+ extra->inner_unique)
+ {
+ /*
+ * With a SEMI or ANTI join, or if the innerrel is known unique, the
+ * executor will stop after the first match.
+ */
+ Cost inner_run_cost = workspace->inner_run_cost;
+ Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost;
+ double outer_matched_rows;
+ double outer_unmatched_rows;
+ Selectivity inner_scan_frac;
+
+ /*
+ * For an outer-rel row that has at least one match, we can expect the
+ * inner scan to stop after a fraction 1/(match_count+1) of the inner
+ * rows, if the matches are evenly distributed. Since they probably
+ * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
+ * that fraction. (If we used a larger fuzz factor, we'd have to
+ * clamp inner_scan_frac to at most 1.0; but since match_count is at
+ * least 1, no such clamp is needed now.)
+ */
+ outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac);
+ outer_unmatched_rows = outer_path_rows - outer_matched_rows;
+ inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0);
+
+ /*
+ * Compute number of tuples processed (not number emitted!). First,
+ * account for successfully-matched outer rows.
+ */
+ ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
+
+ /*
+ * Now we need to estimate the actual costs of scanning the inner
+ * relation, which may be quite a bit less than N times inner_run_cost
+ * due to early scan stops. We consider two cases. If the inner path
+ * is an indexscan using all the joinquals as indexquals, then an
+ * unmatched outer row results in an indexscan returning no rows,
+ * which is probably quite cheap. Otherwise, the executor will have
+ * to scan the whole inner rel for an unmatched row; not so cheap.
+ */
+ if (has_indexed_join_quals(path))
+ {
+ /*
+ * Successfully-matched outer rows will only require scanning
+ * inner_scan_frac of the inner relation. In this case, we don't
+ * need to charge the full inner_run_cost even when that's more
+ * than inner_rescan_run_cost, because we can assume that none of
+ * the inner scans ever scan the whole inner relation. So it's
+ * okay to assume that all the inner scan executions can be
+ * fractions of the full cost, even if materialization is reducing
+ * the rescan cost. At this writing, it's impossible to get here
+ * for a materialized inner scan, so inner_run_cost and
+ * inner_rescan_run_cost will be the same anyway; but just in
+ * case, use inner_run_cost for the first matched tuple and
+ * inner_rescan_run_cost for additional ones.
+ */
+ run_cost += inner_run_cost * inner_scan_frac;
+ if (outer_matched_rows > 1)
+ run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
+
+ /*
+ * Add the cost of inner-scan executions for unmatched outer rows.
+ * We estimate this as the same cost as returning the first tuple
+ * of a nonempty scan. We consider that these are all rescans,
+ * since we used inner_run_cost once already.
+ */
+ run_cost += outer_unmatched_rows *
+ inner_rescan_run_cost / inner_path_rows;
+
+ /*
+ * We won't be evaluating any quals at all for unmatched rows, so
+ * don't add them to ntuples.
+ */
+ }
+ else
+ {
+ /*
+ * Here, a complicating factor is that rescans may be cheaper than
+ * first scans. If we never scan all the way to the end of the
+ * inner rel, it might be (depending on the plan type) that we'd
+ * never pay the whole inner first-scan run cost. However it is
+ * difficult to estimate whether that will happen (and it could
+ * not happen if there are any unmatched outer rows!), so be
+ * conservative and always charge the whole first-scan cost once.
+ * We consider this charge to correspond to the first unmatched
+ * outer row, unless there isn't one in our estimate, in which
+ * case blame it on the first matched row.
+ */
+
+ /* First, count all unmatched join tuples as being processed */
+ ntuples += outer_unmatched_rows * inner_path_rows;
+
+ /* Now add the forced full scan, and decrement appropriate count */
+ run_cost += inner_run_cost;
+ if (outer_unmatched_rows >= 1)
+ outer_unmatched_rows -= 1;
+ else
+ outer_matched_rows -= 1;
+
+ /* Add inner run cost for additional outer tuples having matches */
+ if (outer_matched_rows > 0)
+ run_cost += outer_matched_rows * inner_rescan_run_cost * inner_scan_frac;
+
+ /* Add inner run cost for additional unmatched outer tuples */
+ if (outer_unmatched_rows > 0)
+ run_cost += outer_unmatched_rows * inner_rescan_run_cost;
+ }
+ }
+ else
+ {
+ /* Normal-case source costs were included in preliminary estimate */
+
+ /* Compute number of tuples processed (not number emitted!) */
+ ntuples = outer_path_rows * inner_path_rows;
+ }
+
+ /* CPU costs */
+ cost_qual_eval(&restrict_qual_cost, path->jpath.joinrestrictinfo, root);
+ startup_cost += restrict_qual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
+ run_cost += cpu_per_tuple * ntuples;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->jpath.path.pathtarget->cost.startup;
+ run_cost += path->jpath.path.pathtarget->cost.per_tuple * path->jpath.path.rows;
+
+ path->jpath.path.startup_cost = startup_cost;
+ path->jpath.path.total_cost = startup_cost + run_cost;
+}
+
+/*
+ * initial_cost_mergejoin
+ * Preliminary estimate of the cost of a mergejoin path.
+ *
+ * This must quickly produce lower-bound estimates of the path's startup and
+ * total costs. If we are unable to eliminate the proposed path from
+ * consideration using the lower bounds, final_cost_mergejoin will be called
+ * to obtain the final estimates.
+ *
+ * The exact division of labor between this function and final_cost_mergejoin
+ * is private to them, and represents a tradeoff between speed of the initial
+ * estimate and getting a tight lower bound. We choose to not examine the
+ * join quals here, except for obtaining the scan selectivity estimate which
+ * is really essential (but fortunately, use of caching keeps the cost of
+ * getting that down to something reasonable).
+ * We also assume that cost_sort is cheap enough to use here.
+ *
+ * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
+ * other data to be used by final_cost_mergejoin
+ * 'jointype' is the type of join to be performed
+ * 'mergeclauses' is the list of joinclauses to be used as merge clauses
+ * 'outer_path' is the outer input to the join
+ * 'inner_path' is the inner input to the join
+ * 'outersortkeys' is the list of sort keys for the outer path
+ * 'innersortkeys' is the list of sort keys for the inner path
+ * 'extra' contains miscellaneous information about the join
+ *
+ * Note: outersortkeys and innersortkeys should be NIL if no explicit
+ * sort is needed because the respective source path is already ordered.
+ */
+void
+initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
+ JoinType jointype,
+ List *mergeclauses,
+ Path *outer_path, Path *inner_path,
+ List *outersortkeys, List *innersortkeys,
+ JoinPathExtraData *extra)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ double outer_path_rows = outer_path->rows;
+ double inner_path_rows = inner_path->rows;
+ Cost inner_run_cost;
+ double outer_rows,
+ inner_rows,
+ outer_skip_rows,
+ inner_skip_rows;
+ Selectivity outerstartsel,
+ outerendsel,
+ innerstartsel,
+ innerendsel;
+ Path sort_path; /* dummy for result of cost_sort */
+
+ /* Protect some assumptions below that rowcounts aren't zero */
+ if (outer_path_rows <= 0)
+ outer_path_rows = 1;
+ if (inner_path_rows <= 0)
+ inner_path_rows = 1;
+
+ /*
+ * A merge join will stop as soon as it exhausts either input stream
+ * (unless it's an outer join, in which case the outer side has to be
+ * scanned all the way anyway). Estimate fraction of the left and right
+ * inputs that will actually need to be scanned. Likewise, we can
+ * estimate the number of rows that will be skipped before the first join
+ * pair is found, which should be factored into startup cost. We use only
+ * the first (most significant) merge clause for this purpose. Since
+ * mergejoinscansel() is a fairly expensive computation, we cache the
+ * results in the merge clause RestrictInfo.
+ */
+ if (mergeclauses && jointype != JOIN_FULL)
+ {
+ RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses);
+ List *opathkeys;
+ List *ipathkeys;
+ PathKey *opathkey;
+ PathKey *ipathkey;
+ MergeScanSelCache *cache;
+
+ /* Get the input pathkeys to determine the sort-order details */
+ opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys;
+ ipathkeys = innersortkeys ? innersortkeys : inner_path->pathkeys;
+ Assert(opathkeys);
+ Assert(ipathkeys);
+ opathkey = (PathKey *) linitial(opathkeys);
+ ipathkey = (PathKey *) linitial(ipathkeys);
+ /* debugging check */
+ if (opathkey->pk_opfamily != ipathkey->pk_opfamily ||
+ opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation ||
+ opathkey->pk_strategy != ipathkey->pk_strategy ||
+ opathkey->pk_nulls_first != ipathkey->pk_nulls_first)
+ elog(ERROR, "left and right pathkeys do not match in mergejoin");
+
+ /* Get the selectivity with caching */
+ cache = cached_scansel(root, firstclause, opathkey);
+
+ if (bms_is_subset(firstclause->left_relids,
+ outer_path->parent->relids))
+ {
+ /* left side of clause is outer */
+ outerstartsel = cache->leftstartsel;
+ outerendsel = cache->leftendsel;
+ innerstartsel = cache->rightstartsel;
+ innerendsel = cache->rightendsel;
+ }
+ else
+ {
+ /* left side of clause is inner */
+ outerstartsel = cache->rightstartsel;
+ outerendsel = cache->rightendsel;
+ innerstartsel = cache->leftstartsel;
+ innerendsel = cache->leftendsel;
+ }
+ if (jointype == JOIN_LEFT ||
+ jointype == JOIN_ANTI)
+ {
+ outerstartsel = 0.0;
+ outerendsel = 1.0;
+ }
+ else if (jointype == JOIN_RIGHT)
+ {
+ innerstartsel = 0.0;
+ innerendsel = 1.0;
+ }
+ }
+ else
+ {
+ /* cope with clauseless or full mergejoin */
+ outerstartsel = innerstartsel = 0.0;
+ outerendsel = innerendsel = 1.0;
+ }
+
+ /*
+ * Convert selectivities to row counts. We force outer_rows and
+ * inner_rows to be at least 1, but the skip_rows estimates can be zero.
+ */
+ outer_skip_rows = rint(outer_path_rows * outerstartsel);
+ inner_skip_rows = rint(inner_path_rows * innerstartsel);
+ outer_rows = clamp_row_est(outer_path_rows * outerendsel);
+ inner_rows = clamp_row_est(inner_path_rows * innerendsel);
+
+ Assert(outer_skip_rows <= outer_rows);
+ Assert(inner_skip_rows <= inner_rows);
+
+ /*
+ * Readjust scan selectivities to account for above rounding. This is
+ * normally an insignificant effect, but when there are only a few rows in
+ * the inputs, failing to do this makes for a large percentage error.
+ */
+ outerstartsel = outer_skip_rows / outer_path_rows;
+ innerstartsel = inner_skip_rows / inner_path_rows;
+ outerendsel = outer_rows / outer_path_rows;
+ innerendsel = inner_rows / inner_path_rows;
+
+ Assert(outerstartsel <= outerendsel);
+ Assert(innerstartsel <= innerendsel);
+
+ /* cost of source data */
+
+ if (outersortkeys) /* do we need to sort outer? */
+ {
+ cost_sort(&sort_path,
+ root,
+ outersortkeys,
+ outer_path->total_cost,
+ outer_path_rows,
+ outer_path->pathtarget->width,
+ 0.0,
+ work_mem,
+ -1.0);
+ startup_cost += sort_path.startup_cost;
+ startup_cost += (sort_path.total_cost - sort_path.startup_cost)
+ * outerstartsel;
+ run_cost += (sort_path.total_cost - sort_path.startup_cost)
+ * (outerendsel - outerstartsel);
+ }
+ else
+ {
+ startup_cost += outer_path->startup_cost;
+ startup_cost += (outer_path->total_cost - outer_path->startup_cost)
+ * outerstartsel;
+ run_cost += (outer_path->total_cost - outer_path->startup_cost)
+ * (outerendsel - outerstartsel);
+ }
+
+ if (innersortkeys) /* do we need to sort inner? */
+ {
+ cost_sort(&sort_path,
+ root,
+ innersortkeys,
+ inner_path->total_cost,
+ inner_path_rows,
+ inner_path->pathtarget->width,
+ 0.0,
+ work_mem,
+ -1.0);
+ startup_cost += sort_path.startup_cost;
+ startup_cost += (sort_path.total_cost - sort_path.startup_cost)
+ * innerstartsel;
+ inner_run_cost = (sort_path.total_cost - sort_path.startup_cost)
+ * (innerendsel - innerstartsel);
+ }
+ else
+ {
+ startup_cost += inner_path->startup_cost;
+ startup_cost += (inner_path->total_cost - inner_path->startup_cost)
+ * innerstartsel;
+ inner_run_cost = (inner_path->total_cost - inner_path->startup_cost)
+ * (innerendsel - innerstartsel);
+ }
+
+ /*
+ * We can't yet determine whether rescanning occurs, or whether
+ * materialization of the inner input should be done. The minimum
+ * possible inner input cost, regardless of rescan and materialization
+ * considerations, is inner_run_cost. We include that in
+ * workspace->total_cost, but not yet in run_cost.
+ */
+
+ /* CPU costs left for later */
+
+ /* Public result fields */
+ workspace->startup_cost = startup_cost;
+ workspace->total_cost = startup_cost + run_cost + inner_run_cost;
+ /* Save private data for final_cost_mergejoin */
+ workspace->run_cost = run_cost;
+ workspace->inner_run_cost = inner_run_cost;
+ workspace->outer_rows = outer_rows;
+ workspace->inner_rows = inner_rows;
+ workspace->outer_skip_rows = outer_skip_rows;
+ workspace->inner_skip_rows = inner_skip_rows;
+}
+
+/*
+ * final_cost_mergejoin
+ * Final estimate of the cost and result size of a mergejoin path.
+ *
+ * Unlike other costsize functions, this routine makes two actual decisions:
+ * whether the executor will need to do mark/restore, and whether we should
+ * materialize the inner path. It would be logically cleaner to build
+ * separate paths testing these alternatives, but that would require repeating
+ * most of the cost calculations, which are not all that cheap. Since the
+ * choice will not affect output pathkeys or startup cost, only total cost,
+ * there is no possibility of wanting to keep more than one path. So it seems
+ * best to make the decisions here and record them in the path's
+ * skip_mark_restore and materialize_inner fields.
+ *
+ * Mark/restore overhead is usually required, but can be skipped if we know
+ * that the executor need find only one match per outer tuple, and that the
+ * mergeclauses are sufficient to identify a match.
+ *
+ * We materialize the inner path if we need mark/restore and either the inner
+ * path can't support mark/restore, or it's cheaper to use an interposed
+ * Material node to handle mark/restore.
+ *
+ * 'path' is already filled in except for the rows and cost fields and
+ * skip_mark_restore and materialize_inner
+ * 'workspace' is the result from initial_cost_mergejoin
+ * 'extra' contains miscellaneous information about the join
+ */
+void
+final_cost_mergejoin(PlannerInfo *root, MergePath *path,
+ JoinCostWorkspace *workspace,
+ JoinPathExtraData *extra)
+{
+ Path *outer_path = path->jpath.outerjoinpath;
+ Path *inner_path = path->jpath.innerjoinpath;
+ double inner_path_rows = inner_path->rows;
+ List *mergeclauses = path->path_mergeclauses;
+ List *innersortkeys = path->innersortkeys;
+ Cost startup_cost = workspace->startup_cost;
+ Cost run_cost = workspace->run_cost;
+ Cost inner_run_cost = workspace->inner_run_cost;
+ double outer_rows = workspace->outer_rows;
+ double inner_rows = workspace->inner_rows;
+ double outer_skip_rows = workspace->outer_skip_rows;
+ double inner_skip_rows = workspace->inner_skip_rows;
+ Cost cpu_per_tuple,
+ bare_inner_cost,
+ mat_inner_cost;
+ QualCost merge_qual_cost;
+ QualCost qp_qual_cost;
+ double mergejointuples,
+ rescannedtuples;
+ double rescanratio;
+
+ /* Protect some assumptions below that rowcounts aren't zero */
+ if (inner_path_rows <= 0)
+ inner_path_rows = 1;
+
+ /* Mark the path with the correct row estimate */
+ if (path->jpath.path.param_info)
+ path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
+ else
+ path->jpath.path.rows = path->jpath.path.parent->rows;
+
+ /* For partial paths, scale row estimate. */
+ if (path->jpath.path.parallel_workers > 0)
+ {
+ double parallel_divisor = get_parallel_divisor(&path->jpath.path);
+
+ path->jpath.path.rows =
+ clamp_row_est(path->jpath.path.rows / parallel_divisor);
+ }
+
+ /*
+ * We could include disable_cost in the preliminary estimate, but that
+ * would amount to optimizing for the case where the join method is
+ * disabled, which doesn't seem like the way to bet.
+ */
+ if (!enable_mergejoin)
+ startup_cost += disable_cost;
+
+ /*
+ * Compute cost of the mergequals and qpquals (other restriction clauses)
+ * separately.
+ */
+ cost_qual_eval(&merge_qual_cost, mergeclauses, root);
+ cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
+ qp_qual_cost.startup -= merge_qual_cost.startup;
+ qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
+
+ /*
+ * With a SEMI or ANTI join, or if the innerrel is known unique, the
+ * executor will stop scanning for matches after the first match. When
+ * all the joinclauses are merge clauses, this means we don't ever need to
+ * back up the merge, and so we can skip mark/restore overhead.
+ */
+ if ((path->jpath.jointype == JOIN_SEMI ||
+ path->jpath.jointype == JOIN_ANTI ||
+ extra->inner_unique) &&
+ (list_length(path->jpath.joinrestrictinfo) ==
+ list_length(path->path_mergeclauses)))
+ path->skip_mark_restore = true;
+ else
+ path->skip_mark_restore = false;
+
+ /*
+ * Get approx # tuples passing the mergequals. We use approx_tuple_count
+ * here because we need an estimate done with JOIN_INNER semantics.
+ */
+ mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
+
+ /*
+ * When there are equal merge keys in the outer relation, the mergejoin
+ * must rescan any matching tuples in the inner relation. This means
+ * re-fetching inner tuples; we have to estimate how often that happens.
+ *
+ * For regular inner and outer joins, the number of re-fetches can be
+ * estimated approximately as size of merge join output minus size of
+ * inner relation. Assume that the distinct key values are 1, 2, ..., and
+ * denote the number of values of each key in the outer relation as m1,
+ * m2, ...; in the inner relation, n1, n2, ... Then we have
+ *
+ * size of join = m1 * n1 + m2 * n2 + ...
+ *
+ * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *
+ * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
+ * relation
+ *
+ * This equation works correctly for outer tuples having no inner match
+ * (nk = 0), but not for inner tuples having no outer match (mk = 0); we
+ * are effectively subtracting those from the number of rescanned tuples,
+ * when we should not. Can we do better without expensive selectivity
+ * computations?
+ *
+ * The whole issue is moot if we are working from a unique-ified outer
+ * input, or if we know we don't need to mark/restore at all.
+ */
+ if (IsA(outer_path, UniquePath) || path->skip_mark_restore)
+ rescannedtuples = 0;
+ else
+ {
+ rescannedtuples = mergejointuples - inner_path_rows;
+ /* Must clamp because of possible underestimate */
+ if (rescannedtuples < 0)
+ rescannedtuples = 0;
+ }
+
+ /*
+ * We'll inflate various costs this much to account for rescanning. Note
+ * that this is to be multiplied by something involving inner_rows, or
+ * another number related to the portion of the inner rel we'll scan.
+ */
+ rescanratio = 1.0 + (rescannedtuples / inner_rows);
+
+ /*
+ * Decide whether we want to materialize the inner input to shield it from
+ * mark/restore and performing re-fetches. Our cost model for regular
+ * re-fetches is that a re-fetch costs the same as an original fetch,
+ * which is probably an overestimate; but on the other hand we ignore the
+ * bookkeeping costs of mark/restore. Not clear if it's worth developing
+ * a more refined model. So we just need to inflate the inner run cost by
+ * rescanratio.
+ */
+ bare_inner_cost = inner_run_cost * rescanratio;
+
+ /*
+ * When we interpose a Material node the re-fetch cost is assumed to be
+ * just cpu_operator_cost per tuple, independently of the underlying
+ * plan's cost; and we charge an extra cpu_operator_cost per original
+ * fetch as well. Note that we're assuming the materialize node will
+ * never spill to disk, since it only has to remember tuples back to the
+ * last mark. (If there are a huge number of duplicates, our other cost
+ * factors will make the path so expensive that it probably won't get
+ * chosen anyway.) So we don't use cost_rescan here.
+ *
+ * Note: keep this estimate in sync with create_mergejoin_plan's labeling
+ * of the generated Material node.
+ */
+ mat_inner_cost = inner_run_cost +
+ cpu_operator_cost * inner_rows * rescanratio;
+
+ /*
+ * If we don't need mark/restore at all, we don't need materialization.
+ */
+ if (path->skip_mark_restore)
+ path->materialize_inner = false;
+
+ /*
+ * Prefer materializing if it looks cheaper, unless the user has asked to
+ * suppress materialization.
+ */
+ else if (enable_material && mat_inner_cost < bare_inner_cost)
+ path->materialize_inner = true;
+
+ /*
+ * Even if materializing doesn't look cheaper, we *must* do it if the
+ * inner path is to be used directly (without sorting) and it doesn't
+ * support mark/restore.
+ *
+ * Since the inner side must be ordered, and only Sorts and IndexScans can
+ * create order to begin with, and they both support mark/restore, you
+ * might think there's no problem --- but you'd be wrong. Nestloop and
+ * merge joins can *preserve* the order of their inputs, so they can be
+ * selected as the input of a mergejoin, and they don't support
+ * mark/restore at present.
+ *
+ * We don't test the value of enable_material here, because
+ * materialization is required for correctness in this case, and turning
+ * it off does not entitle us to deliver an invalid plan.
+ */
+ else if (innersortkeys == NIL &&
+ !ExecSupportsMarkRestore(inner_path))
+ path->materialize_inner = true;
+
+ /*
+ * Also, force materializing if the inner path is to be sorted and the
+ * sort is expected to spill to disk. This is because the final merge
+ * pass can be done on-the-fly if it doesn't have to support mark/restore.
+ * We don't try to adjust the cost estimates for this consideration,
+ * though.
+ *
+ * Since materialization is a performance optimization in this case,
+ * rather than necessary for correctness, we skip it if enable_material is
+ * off.
+ */
+ else if (enable_material && innersortkeys != NIL &&
+ relation_byte_size(inner_path_rows,
+ inner_path->pathtarget->width) >
+ (work_mem * 1024L))
+ path->materialize_inner = true;
+ else
+ path->materialize_inner = false;
+
+ /* Charge the right incremental cost for the chosen case */
+ if (path->materialize_inner)
+ run_cost += mat_inner_cost;
+ else
+ run_cost += bare_inner_cost;
+
+ /* CPU costs */
+
+ /*
+ * The number of tuple comparisons needed is approximately number of outer
+ * rows plus number of inner rows plus number of rescanned tuples (can we
+ * refine this?). At each one, we need to evaluate the mergejoin quals.
+ */
+ startup_cost += merge_qual_cost.startup;
+ startup_cost += merge_qual_cost.per_tuple *
+ (outer_skip_rows + inner_skip_rows * rescanratio);
+ run_cost += merge_qual_cost.per_tuple *
+ ((outer_rows - outer_skip_rows) +
+ (inner_rows - inner_skip_rows) * rescanratio);
+
+ /*
+ * For each tuple that gets through the mergejoin proper, we charge
+ * cpu_tuple_cost plus the cost of evaluating additional restriction
+ * clauses that are to be applied at the join. (This is pessimistic since
+ * not all of the quals may get evaluated at each tuple.)
+ *
+ * Note: we could adjust for SEMI/ANTI joins skipping some qual
+ * evaluations here, but it's probably not worth the trouble.
+ */
+ startup_cost += qp_qual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
+ run_cost += cpu_per_tuple * mergejointuples;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->jpath.path.pathtarget->cost.startup;
+ run_cost += path->jpath.path.pathtarget->cost.per_tuple * path->jpath.path.rows;
+
+ path->jpath.path.startup_cost = startup_cost;
+ path->jpath.path.total_cost = startup_cost + run_cost;
+}
+
+/*
+ * run mergejoinscansel() with caching
+ */
+static MergeScanSelCache *
+cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
+{
+ MergeScanSelCache *cache;
+ ListCell *lc;
+ Selectivity leftstartsel,
+ leftendsel,
+ rightstartsel,
+ rightendsel;
+ MemoryContext oldcontext;
+
+ /* Do we have this result already? */
+ foreach(lc, rinfo->scansel_cache)
+ {
+ cache = (MergeScanSelCache *) lfirst(lc);
+ if (cache->opfamily == pathkey->pk_opfamily &&
+ cache->collation == pathkey->pk_eclass->ec_collation &&
+ cache->strategy == pathkey->pk_strategy &&
+ cache->nulls_first == pathkey->pk_nulls_first)
+ return cache;
+ }
+
+ /* Nope, do the computation */
+ mergejoinscansel(root,
+ (Node *) rinfo->clause,
+ pathkey->pk_opfamily,
+ pathkey->pk_strategy,
+ pathkey->pk_nulls_first,
+ &leftstartsel,
+ &leftendsel,
+ &rightstartsel,
+ &rightendsel);
+
+ /* Cache the result in suitably long-lived workspace */
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ cache = (MergeScanSelCache *) palloc(sizeof(MergeScanSelCache));
+ cache->opfamily = pathkey->pk_opfamily;
+ cache->collation = pathkey->pk_eclass->ec_collation;
+ cache->strategy = pathkey->pk_strategy;
+ cache->nulls_first = pathkey->pk_nulls_first;
+ cache->leftstartsel = leftstartsel;
+ cache->leftendsel = leftendsel;
+ cache->rightstartsel = rightstartsel;
+ cache->rightendsel = rightendsel;
+
+ rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return cache;
+}
+
+/*
+ * initial_cost_hashjoin
+ * Preliminary estimate of the cost of a hashjoin path.
+ *
+ * This must quickly produce lower-bound estimates of the path's startup and
+ * total costs. If we are unable to eliminate the proposed path from
+ * consideration using the lower bounds, final_cost_hashjoin will be called
+ * to obtain the final estimates.
+ *
+ * The exact division of labor between this function and final_cost_hashjoin
+ * is private to them, and represents a tradeoff between speed of the initial
+ * estimate and getting a tight lower bound. We choose to not examine the
+ * join quals here (other than by counting the number of hash clauses),
+ * so we can't do much with CPU costs. We do assume that
+ * ExecChooseHashTableSize is cheap enough to use here.
+ *
+ * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
+ * other data to be used by final_cost_hashjoin
+ * 'jointype' is the type of join to be performed
+ * 'hashclauses' is the list of joinclauses to be used as hash clauses
+ * 'outer_path' is the outer input to the join
+ * 'inner_path' is the inner input to the join
+ * 'extra' contains miscellaneous information about the join
+ * 'parallel_hash' indicates that inner_path is partial and that a shared
+ * hash table will be built in parallel
+ */
+void
+initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
+ JoinType jointype,
+ List *hashclauses,
+ Path *outer_path, Path *inner_path,
+ JoinPathExtraData *extra,
+ bool parallel_hash)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ double outer_path_rows = outer_path->rows;
+ double inner_path_rows = inner_path->rows;
+ double inner_path_rows_total = inner_path_rows;
+ int num_hashclauses = list_length(hashclauses);
+ int numbuckets;
+ int numbatches;
+ int num_skew_mcvs;
+ size_t space_allowed; /* unused */
+
+ /* cost of source data */
+ startup_cost += outer_path->startup_cost;
+ run_cost += outer_path->total_cost - outer_path->startup_cost;
+ startup_cost += inner_path->total_cost;
+
+ /*
+ * Cost of computing hash function: must do it once per input tuple. We
+ * charge one cpu_operator_cost for each column's hash function. Also,
+ * tack on one cpu_tuple_cost per inner row, to model the costs of
+ * inserting the row into the hashtable.
+ *
+ * XXX when a hashclause is more complex than a single operator, we really
+ * should charge the extra eval costs of the left or right side, as
+ * appropriate, here. This seems more work than it's worth at the moment.
+ */
+ startup_cost += (cpu_operator_cost * num_hashclauses + cpu_tuple_cost)
+ * inner_path_rows;
+ run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;
+
+ /*
+ * If this is a parallel hash build, then the value we have for
+ * inner_rows_total currently refers only to the rows returned by each
+ * participant. For shared hash table size estimation, we need the total
+ * number, so we need to undo the division.
+ */
+ if (parallel_hash)
+ inner_path_rows_total *= get_parallel_divisor(inner_path);
+
+ /*
+ * Get hash table size that executor would use for inner relation.
+ *
+ * XXX for the moment, always assume that skew optimization will be
+ * performed. As long as SKEW_HASH_MEM_PERCENT is small, it's not worth
+ * trying to determine that for sure.
+ *
+ * XXX at some point it might be interesting to try to account for skew
+ * optimization in the cost estimate, but for now, we don't.
+ */
+ ExecChooseHashTableSize(inner_path_rows_total,
+ inner_path->pathtarget->width,
+ true, /* useskew */
+ parallel_hash, /* try_combined_hash_mem */
+ outer_path->parallel_workers,
+ &space_allowed,
+ &numbuckets,
+ &numbatches,
+ &num_skew_mcvs);
+
+ /*
+ * If inner relation is too big then we will need to "batch" the join,
+ * which implies writing and reading most of the tuples to disk an extra
+ * time. Charge seq_page_cost per page, since the I/O should be nice and
+ * sequential. Writing the inner rel counts as startup cost, all the rest
+ * as run cost.
+ */
+ if (numbatches > 1)
+ {
+ double outerpages = page_size(outer_path_rows,
+ outer_path->pathtarget->width);
+ double innerpages = page_size(inner_path_rows,
+ inner_path->pathtarget->width);
+
+ startup_cost += seq_page_cost * innerpages;
+ run_cost += seq_page_cost * (innerpages + 2 * outerpages);
+ }
+
+ /* CPU costs left for later */
+
+ /* Public result fields */
+ workspace->startup_cost = startup_cost;
+ workspace->total_cost = startup_cost + run_cost;
+ /* Save private data for final_cost_hashjoin */
+ workspace->run_cost = run_cost;
+ workspace->numbuckets = numbuckets;
+ workspace->numbatches = numbatches;
+ workspace->inner_rows_total = inner_path_rows_total;
+}
+
+/*
+ * final_cost_hashjoin
+ * Final estimate of the cost and result size of a hashjoin path.
+ *
+ * Note: the numbatches estimate is also saved into 'path' for use later
+ *
+ * 'path' is already filled in except for the rows and cost fields and
+ * num_batches
+ * 'workspace' is the result from initial_cost_hashjoin
+ * 'extra' contains miscellaneous information about the join
+ */
+void
+final_cost_hashjoin(PlannerInfo *root, HashPath *path,
+ JoinCostWorkspace *workspace,
+ JoinPathExtraData *extra)
+{
+ Path *outer_path = path->jpath.outerjoinpath;
+ Path *inner_path = path->jpath.innerjoinpath;
+ double outer_path_rows = outer_path->rows;
+ double inner_path_rows = inner_path->rows;
+ double inner_path_rows_total = workspace->inner_rows_total;
+ List *hashclauses = path->path_hashclauses;
+ Cost startup_cost = workspace->startup_cost;
+ Cost run_cost = workspace->run_cost;
+ int numbuckets = workspace->numbuckets;
+ int numbatches = workspace->numbatches;
+ Cost cpu_per_tuple;
+ QualCost hash_qual_cost;
+ QualCost qp_qual_cost;
+ double hashjointuples;
+ double virtualbuckets;
+ Selectivity innerbucketsize;
+ Selectivity innermcvfreq;
+ ListCell *hcl;
+
+ /* Mark the path with the correct row estimate */
+ if (path->jpath.path.param_info)
+ path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
+ else
+ path->jpath.path.rows = path->jpath.path.parent->rows;
+
+ /* For partial paths, scale row estimate. */
+ if (path->jpath.path.parallel_workers > 0)
+ {
+ double parallel_divisor = get_parallel_divisor(&path->jpath.path);
+
+ path->jpath.path.rows =
+ clamp_row_est(path->jpath.path.rows / parallel_divisor);
+ }
+
+ /*
+ * We could include disable_cost in the preliminary estimate, but that
+ * would amount to optimizing for the case where the join method is
+ * disabled, which doesn't seem like the way to bet.
+ */
+ if (!enable_hashjoin)
+ startup_cost += disable_cost;
+
+ /* mark the path with estimated # of batches */
+ path->num_batches = numbatches;
+
+ /* store the total number of tuples (sum of partial row estimates) */
+ path->inner_rows_total = inner_path_rows_total;
+
+ /* and compute the number of "virtual" buckets in the whole join */
+ virtualbuckets = (double) numbuckets * (double) numbatches;
+
+ /*
+ * Determine bucketsize fraction and MCV frequency for the inner relation.
+ * We use the smallest bucketsize or MCV frequency estimated for any
+ * individual hashclause; this is undoubtedly conservative.
+ *
+ * BUT: if inner relation has been unique-ified, we can assume it's good
+ * for hashing. This is important both because it's the right answer, and
+ * because we avoid contaminating the cache with a value that's wrong for
+ * non-unique-ified paths.
+ */
+ if (IsA(inner_path, UniquePath))
+ {
+ innerbucketsize = 1.0 / virtualbuckets;
+ innermcvfreq = 0.0;
+ }
+ else
+ {
+ innerbucketsize = 1.0;
+ innermcvfreq = 1.0;
+ foreach(hcl, hashclauses)
+ {
+ RestrictInfo *restrictinfo = lfirst_node(RestrictInfo, hcl);
+ Selectivity thisbucketsize;
+ Selectivity thismcvfreq;
+
+ /*
+ * First we have to figure out which side of the hashjoin clause
+ * is the inner side.
+ *
+ * Since we tend to visit the same clauses over and over when
+ * planning a large query, we cache the bucket stats estimates in
+ * the RestrictInfo node to avoid repeated lookups of statistics.
+ */
+ if (bms_is_subset(restrictinfo->right_relids,
+ inner_path->parent->relids))
+ {
+ /* righthand side is inner */
+ thisbucketsize = restrictinfo->right_bucketsize;
+ if (thisbucketsize < 0)
+ {
+ /* not cached yet */
+ estimate_hash_bucket_stats(root,
+ get_rightop(restrictinfo->clause),
+ virtualbuckets,
+ &restrictinfo->right_mcvfreq,
+ &restrictinfo->right_bucketsize);
+ thisbucketsize = restrictinfo->right_bucketsize;
+ }
+ thismcvfreq = restrictinfo->right_mcvfreq;
+ }
+ else
+ {
+ Assert(bms_is_subset(restrictinfo->left_relids,
+ inner_path->parent->relids));
+ /* lefthand side is inner */
+ thisbucketsize = restrictinfo->left_bucketsize;
+ if (thisbucketsize < 0)
+ {
+ /* not cached yet */
+ estimate_hash_bucket_stats(root,
+ get_leftop(restrictinfo->clause),
+ virtualbuckets,
+ &restrictinfo->left_mcvfreq,
+ &restrictinfo->left_bucketsize);
+ thisbucketsize = restrictinfo->left_bucketsize;
+ }
+ thismcvfreq = restrictinfo->left_mcvfreq;
+ }
+
+ if (innerbucketsize > thisbucketsize)
+ innerbucketsize = thisbucketsize;
+ if (innermcvfreq > thismcvfreq)
+ innermcvfreq = thismcvfreq;
+ }
+ }
+
+ /*
+ * If the bucket holding the inner MCV would exceed hash_mem, we don't
+ * want to hash unless there is really no other alternative, so apply
+ * disable_cost. (The executor normally copes with excessive memory usage
+ * by splitting batches, but obviously it cannot separate equal values
+ * that way, so it will be unable to drive the batch size below hash_mem
+ * when this is true.)
+ */
+ if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq),
+ inner_path->pathtarget->width) > get_hash_memory_limit())
+ startup_cost += disable_cost;
+
+ /*
+ * Compute cost of the hashquals and qpquals (other restriction clauses)
+ * separately.
+ */
+ cost_qual_eval(&hash_qual_cost, hashclauses, root);
+ cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
+ qp_qual_cost.startup -= hash_qual_cost.startup;
+ qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
+
+ /* CPU costs */
+
+ if (path->jpath.jointype == JOIN_SEMI ||
+ path->jpath.jointype == JOIN_ANTI ||
+ extra->inner_unique)
+ {
+ double outer_matched_rows;
+ Selectivity inner_scan_frac;
+
+ /*
+ * With a SEMI or ANTI join, or if the innerrel is known unique, the
+ * executor will stop after the first match.
+ *
+ * For an outer-rel row that has at least one match, we can expect the
+ * bucket scan to stop after a fraction 1/(match_count+1) of the
+ * bucket's rows, if the matches are evenly distributed. Since they
+ * probably aren't quite evenly distributed, we apply a fuzz factor of
+ * 2.0 to that fraction. (If we used a larger fuzz factor, we'd have
+ * to clamp inner_scan_frac to at most 1.0; but since match_count is
+ * at least 1, no such clamp is needed now.)
+ */
+ outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac);
+ inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0);
+
+ startup_cost += hash_qual_cost.startup;
+ run_cost += hash_qual_cost.per_tuple * outer_matched_rows *
+ clamp_row_est(inner_path_rows * innerbucketsize * inner_scan_frac) * 0.5;
+
+ /*
+ * For unmatched outer-rel rows, the picture is quite a lot different.
+ * In the first place, there is no reason to assume that these rows
+ * preferentially hit heavily-populated buckets; instead assume they
+ * are uncorrelated with the inner distribution and so they see an
+ * average bucket size of inner_path_rows / virtualbuckets. In the
+ * second place, it seems likely that they will have few if any exact
+ * hash-code matches and so very few of the tuples in the bucket will
+ * actually require eval of the hash quals. We don't have any good
+ * way to estimate how many will, but for the moment assume that the
+ * effective cost per bucket entry is one-tenth what it is for
+ * matchable tuples.
+ */
+ run_cost += hash_qual_cost.per_tuple *
+ (outer_path_rows - outer_matched_rows) *
+ clamp_row_est(inner_path_rows / virtualbuckets) * 0.05;
+
+ /* Get # of tuples that will pass the basic join */
+ if (path->jpath.jointype == JOIN_ANTI)
+ hashjointuples = outer_path_rows - outer_matched_rows;
+ else
+ hashjointuples = outer_matched_rows;
+ }
+ else
+ {
+ /*
+ * The number of tuple comparisons needed is the number of outer
+ * tuples times the typical number of tuples in a hash bucket, which
+ * is the inner relation size times its bucketsize fraction. At each
+ * one, we need to evaluate the hashjoin quals. But actually,
+ * charging the full qual eval cost at each tuple is pessimistic,
+ * since we don't evaluate the quals unless the hash values match
+ * exactly. For lack of a better idea, halve the cost estimate to
+ * allow for that.
+ */
+ startup_cost += hash_qual_cost.startup;
+ run_cost += hash_qual_cost.per_tuple * outer_path_rows *
+ clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
+
+ /*
+ * Get approx # tuples passing the hashquals. We use
+ * approx_tuple_count here because we need an estimate done with
+ * JOIN_INNER semantics.
+ */
+ hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
+ }
+
+ /*
+ * For each tuple that gets through the hashjoin proper, we charge
+ * cpu_tuple_cost plus the cost of evaluating additional restriction
+ * clauses that are to be applied at the join. (This is pessimistic since
+ * not all of the quals may get evaluated at each tuple.)
+ */
+ startup_cost += qp_qual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
+ run_cost += cpu_per_tuple * hashjointuples;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->jpath.path.pathtarget->cost.startup;
+ run_cost += path->jpath.path.pathtarget->cost.per_tuple * path->jpath.path.rows;
+
+ path->jpath.path.startup_cost = startup_cost;
+ path->jpath.path.total_cost = startup_cost + run_cost;
+}
+
+
+/*
+ * cost_subplan
+ * Figure the costs for a SubPlan (or initplan).
+ *
+ * Note: we could dig the subplan's Plan out of the root list, but in practice
+ * all callers have it handy already, so we make them pass it.
+ */
+void
+cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan)
+{
+ QualCost sp_cost;
+
+ /* Figure any cost for evaluating the testexpr */
+ cost_qual_eval(&sp_cost,
+ make_ands_implicit((Expr *) subplan->testexpr),
+ root);
+
+ if (subplan->useHashTable)
+ {
+ /*
+ * If we are using a hash table for the subquery outputs, then the
+ * cost of evaluating the query is a one-time cost. We charge one
+ * cpu_operator_cost per tuple for the work of loading the hashtable,
+ * too.
+ */
+ sp_cost.startup += plan->total_cost +
+ cpu_operator_cost * plan->plan_rows;
+
+ /*
+ * The per-tuple costs include the cost of evaluating the lefthand
+ * expressions, plus the cost of probing the hashtable. We already
+ * accounted for the lefthand expressions as part of the testexpr, and
+ * will also have counted one cpu_operator_cost for each comparison
+ * operator. That is probably too low for the probing cost, but it's
+ * hard to make a better estimate, so live with it for now.
+ */
+ }
+ else
+ {
+ /*
+ * Otherwise we will be rescanning the subplan output on each
+ * evaluation. We need to estimate how much of the output we will
+ * actually need to scan. NOTE: this logic should agree with the
+ * tuple_fraction estimates used by make_subplan() in
+ * plan/subselect.c.
+ */
+ Cost plan_run_cost = plan->total_cost - plan->startup_cost;
+
+ if (subplan->subLinkType == EXISTS_SUBLINK)
+ {
+ /* we only need to fetch 1 tuple; clamp to avoid zero divide */
+ sp_cost.per_tuple += plan_run_cost / clamp_row_est(plan->plan_rows);
+ }
+ else if (subplan->subLinkType == ALL_SUBLINK ||
+ subplan->subLinkType == ANY_SUBLINK)
+ {
+ /* assume we need 50% of the tuples */
+ sp_cost.per_tuple += 0.50 * plan_run_cost;
+ /* also charge a cpu_operator_cost per row examined */
+ sp_cost.per_tuple += 0.50 * plan->plan_rows * cpu_operator_cost;
+ }
+ else
+ {
+ /* assume we need all tuples */
+ sp_cost.per_tuple += plan_run_cost;
+ }
+
+ /*
+ * Also account for subplan's startup cost. If the subplan is
+ * uncorrelated or undirect correlated, AND its topmost node is one
+ * that materializes its output, assume that we'll only need to pay
+ * its startup cost once; otherwise assume we pay the startup cost
+ * every time.
+ */
+ if (subplan->parParam == NIL &&
+ ExecMaterializesOutput(nodeTag(plan)))
+ sp_cost.startup += plan->startup_cost;
+ else
+ sp_cost.per_tuple += plan->startup_cost;
+ }
+
+ subplan->startup_cost = sp_cost.startup;
+ subplan->per_call_cost = sp_cost.per_tuple;
+}
+
+
+/*
+ * cost_rescan
+ * Given a finished Path, estimate the costs of rescanning it after
+ * having done so the first time. For some Path types a rescan is
+ * cheaper than an original scan (if no parameters change), and this
+ * function embodies knowledge about that. The default is to return
+ * the same costs stored in the Path. (Note that the cost estimates
+ * actually stored in Paths are always for first scans.)
+ *
+ * This function is not currently intended to model effects such as rescans
+ * being cheaper due to disk block caching; what we are concerned with is
+ * plan types wherein the executor caches results explicitly, or doesn't
+ * redo startup calculations, etc.
+ */
+static void
+cost_rescan(PlannerInfo *root, Path *path,
+ Cost *rescan_startup_cost, /* output parameters */
+ Cost *rescan_total_cost)
+{
+ switch (path->pathtype)
+ {
+ case T_FunctionScan:
+
+ /*
+ * Currently, nodeFunctionscan.c always executes the function to
+ * completion before returning any rows, and caches the results in
+ * a tuplestore. So the function eval cost is all startup cost
+ * and isn't paid over again on rescans. However, all run costs
+ * will be paid over again.
+ */
+ *rescan_startup_cost = 0;
+ *rescan_total_cost = path->total_cost - path->startup_cost;
+ break;
+ case T_HashJoin:
+
+ /*
+ * If it's a single-batch join, we don't need to rebuild the hash
+ * table during a rescan.
+ */
+ if (((HashPath *) path)->num_batches == 1)
+ {
+ /* Startup cost is exactly the cost of hash table building */
+ *rescan_startup_cost = 0;
+ *rescan_total_cost = path->total_cost - path->startup_cost;
+ }
+ else
+ {
+ /* Otherwise, no special treatment */
+ *rescan_startup_cost = path->startup_cost;
+ *rescan_total_cost = path->total_cost;
+ }
+ break;
+ case T_CteScan:
+ case T_WorkTableScan:
+ {
+ /*
+ * These plan types materialize their final result in a
+ * tuplestore or tuplesort object. So the rescan cost is only
+ * cpu_tuple_cost per tuple, unless the result is large enough
+ * to spill to disk.
+ */
+ Cost run_cost = cpu_tuple_cost * path->rows;
+ double nbytes = relation_byte_size(path->rows,
+ path->pathtarget->width);
+ long work_mem_bytes = work_mem * 1024L;
+
+ if (nbytes > work_mem_bytes)
+ {
+ /* It will spill, so account for re-read cost */
+ double npages = ceil(nbytes / BLCKSZ);
+
+ run_cost += seq_page_cost * npages;
+ }
+ *rescan_startup_cost = 0;
+ *rescan_total_cost = run_cost;
+ }
+ break;
+ case T_Material:
+ case T_Sort:
+ {
+ /*
+ * These plan types not only materialize their results, but do
+ * not implement qual filtering or projection. So they are
+ * even cheaper to rescan than the ones above. We charge only
+ * cpu_operator_cost per tuple. (Note: keep that in sync with
+ * the run_cost charge in cost_sort, and also see comments in
+ * cost_material before you change it.)
+ */
+ Cost run_cost = cpu_operator_cost * path->rows;
+ double nbytes = relation_byte_size(path->rows,
+ path->pathtarget->width);
+ long work_mem_bytes = work_mem * 1024L;
+
+ if (nbytes > work_mem_bytes)
+ {
+ /* It will spill, so account for re-read cost */
+ double npages = ceil(nbytes / BLCKSZ);
+
+ run_cost += seq_page_cost * npages;
+ }
+ *rescan_startup_cost = 0;
+ *rescan_total_cost = run_cost;
+ }
+ break;
+ case T_Memoize:
+ /* All the hard work is done by cost_memoize_rescan */
+ cost_memoize_rescan(root, (MemoizePath *) path,
+ rescan_startup_cost, rescan_total_cost);
+ break;
+ default:
+ *rescan_startup_cost = path->startup_cost;
+ *rescan_total_cost = path->total_cost;
+ break;
+ }
+}
+
+
+/*
+ * cost_qual_eval
+ * Estimate the CPU costs of evaluating a WHERE clause.
+ * The input can be either an implicitly-ANDed list of boolean
+ * expressions, or a list of RestrictInfo nodes. (The latter is
+ * preferred since it allows caching of the results.)
+ * The result includes both a one-time (startup) component,
+ * and a per-evaluation component.
+ */
+void
+cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root)
+{
+ cost_qual_eval_context context;
+ ListCell *l;
+
+ context.root = root;
+ context.total.startup = 0;
+ context.total.per_tuple = 0;
+
+ /* We don't charge any cost for the implicit ANDing at top level ... */
+
+ foreach(l, quals)
+ {
+ Node *qual = (Node *) lfirst(l);
+
+ cost_qual_eval_walker(qual, &context);
+ }
+
+ *cost = context.total;
+}
+
+/*
+ * cost_qual_eval_node
+ * As above, for a single RestrictInfo or expression.
+ */
+void
+cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root)
+{
+ cost_qual_eval_context context;
+
+ context.root = root;
+ context.total.startup = 0;
+ context.total.per_tuple = 0;
+
+ cost_qual_eval_walker(qual, &context);
+
+ *cost = context.total;
+}
+
+static bool
+cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
+{
+ if (node == NULL)
+ return false;
+
+ /*
+ * RestrictInfo nodes contain an eval_cost field reserved for this
+ * routine's use, so that it's not necessary to evaluate the qual clause's
+ * cost more than once. If the clause's cost hasn't been computed yet,
+ * the field's startup value will contain -1.
+ */
+ if (IsA(node, RestrictInfo))
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) node;
+
+ if (rinfo->eval_cost.startup < 0)
+ {
+ cost_qual_eval_context locContext;
+
+ locContext.root = context->root;
+ locContext.total.startup = 0;
+ locContext.total.per_tuple = 0;
+
+ /*
+ * For an OR clause, recurse into the marked-up tree so that we
+ * set the eval_cost for contained RestrictInfos too.
+ */
+ if (rinfo->orclause)
+ cost_qual_eval_walker((Node *) rinfo->orclause, &locContext);
+ else
+ cost_qual_eval_walker((Node *) rinfo->clause, &locContext);
+
+ /*
+ * If the RestrictInfo is marked pseudoconstant, it will be tested
+ * only once, so treat its cost as all startup cost.
+ */
+ if (rinfo->pseudoconstant)
+ {
+ /* count one execution during startup */
+ locContext.total.startup += locContext.total.per_tuple;
+ locContext.total.per_tuple = 0;
+ }
+ rinfo->eval_cost = locContext.total;
+ }
+ context->total.startup += rinfo->eval_cost.startup;
+ context->total.per_tuple += rinfo->eval_cost.per_tuple;
+ /* do NOT recurse into children */
+ return false;
+ }
+
+ /*
+ * For each operator or function node in the given tree, we charge the
+ * estimated execution cost given by pg_proc.procost (remember to multiply
+ * this by cpu_operator_cost).
+ *
+ * Vars and Consts are charged zero, and so are boolean operators (AND,
+ * OR, NOT). Simplistic, but a lot better than no model at all.
+ *
+ * Should we try to account for the possibility of short-circuit
+ * evaluation of AND/OR? Probably *not*, because that would make the
+ * results depend on the clause ordering, and we are not in any position
+ * to expect that the current ordering of the clauses is the one that's
+ * going to end up being used. The above per-RestrictInfo caching would
+ * not mix well with trying to re-order clauses anyway.
+ *
+ * Another issue that is entirely ignored here is that if a set-returning
+ * function is below top level in the tree, the functions/operators above
+ * it will need to be evaluated multiple times. In practical use, such
+ * cases arise so seldom as to not be worth the added complexity needed;
+ * moreover, since our rowcount estimates for functions tend to be pretty
+ * phony, the results would also be pretty phony.
+ */
+ if (IsA(node, FuncExpr))
+ {
+ add_function_cost(context->root, ((FuncExpr *) node)->funcid, node,
+ &context->total);
+ }
+ else if (IsA(node, OpExpr) ||
+ IsA(node, DistinctExpr) ||
+ IsA(node, NullIfExpr))
+ {
+ /* rely on struct equivalence to treat these all alike */
+ set_opfuncid((OpExpr *) node);
+ add_function_cost(context->root, ((OpExpr *) node)->opfuncid, node,
+ &context->total);
+ }
+ else if (IsA(node, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
+ Node *arraynode = (Node *) lsecond(saop->args);
+ QualCost sacosts;
+ QualCost hcosts;
+ int estarraylen = estimate_array_length(arraynode);
+
+ set_sa_opfuncid(saop);
+ sacosts.startup = sacosts.per_tuple = 0;
+ add_function_cost(context->root, saop->opfuncid, NULL,
+ &sacosts);
+
+ if (OidIsValid(saop->hashfuncid))
+ {
+ /* Handle costs for hashed ScalarArrayOpExpr */
+ hcosts.startup = hcosts.per_tuple = 0;
+
+ add_function_cost(context->root, saop->hashfuncid, NULL, &hcosts);
+ context->total.startup += sacosts.startup + hcosts.startup;
+
+ /* Estimate the cost of building the hashtable. */
+ context->total.startup += estarraylen * hcosts.per_tuple;
+
+ /*
+ * XXX should we charge a little bit for sacosts.per_tuple when
+ * building the table, or is it ok to assume there will be zero
+ * hash collision?
+ */
+
+ /*
+ * Charge for hashtable lookups. Charge a single hash and a
+ * single comparison.
+ */
+ context->total.per_tuple += hcosts.per_tuple + sacosts.per_tuple;
+ }
+ else
+ {
+ /*
+ * Estimate that the operator will be applied to about half of the
+ * array elements before the answer is determined.
+ */
+ context->total.startup += sacosts.startup;
+ context->total.per_tuple += sacosts.per_tuple *
+ estimate_array_length(arraynode) * 0.5;
+ }
+ }
+ else if (IsA(node, Aggref) ||
+ IsA(node, WindowFunc))
+ {
+ /*
+ * Aggref and WindowFunc nodes are (and should be) treated like Vars,
+ * ie, zero execution cost in the current model, because they behave
+ * essentially like Vars at execution. We disregard the costs of
+ * their input expressions for the same reason. The actual execution
+ * costs of the aggregate/window functions and their arguments have to
+ * be factored into plan-node-specific costing of the Agg or WindowAgg
+ * plan node.
+ */
+ return false; /* don't recurse into children */
+ }
+ else if (IsA(node, GroupingFunc))
+ {
+ /* Treat this as having cost 1 */
+ context->total.per_tuple += cpu_operator_cost;
+ return false; /* don't recurse into children */
+ }
+ else if (IsA(node, CoerceViaIO))
+ {
+ CoerceViaIO *iocoerce = (CoerceViaIO *) node;
+ Oid iofunc;
+ Oid typioparam;
+ bool typisvarlena;
+
+ /* check the result type's input function */
+ getTypeInputInfo(iocoerce->resulttype,
+ &iofunc, &typioparam);
+ add_function_cost(context->root, iofunc, NULL,
+ &context->total);
+ /* check the input type's output function */
+ getTypeOutputInfo(exprType((Node *) iocoerce->arg),
+ &iofunc, &typisvarlena);
+ add_function_cost(context->root, iofunc, NULL,
+ &context->total);
+ }
+ else if (IsA(node, ArrayCoerceExpr))
+ {
+ ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
+ QualCost perelemcost;
+
+ cost_qual_eval_node(&perelemcost, (Node *) acoerce->elemexpr,
+ context->root);
+ context->total.startup += perelemcost.startup;
+ if (perelemcost.per_tuple > 0)
+ context->total.per_tuple += perelemcost.per_tuple *
+ estimate_array_length((Node *) acoerce->arg);
+ }
+ else if (IsA(node, RowCompareExpr))
+ {
+ /* Conservatively assume we will check all the columns */
+ RowCompareExpr *rcexpr = (RowCompareExpr *) node;
+ ListCell *lc;
+
+ foreach(lc, rcexpr->opnos)
+ {
+ Oid opid = lfirst_oid(lc);
+
+ add_function_cost(context->root, get_opcode(opid), NULL,
+ &context->total);
+ }
+ }
+ else if (IsA(node, MinMaxExpr) ||
+ IsA(node, SQLValueFunction) ||
+ IsA(node, XmlExpr) ||
+ IsA(node, CoerceToDomain) ||
+ IsA(node, NextValueExpr))
+ {
+ /* Treat all these as having cost 1 */
+ context->total.per_tuple += cpu_operator_cost;
+ }
+ else if (IsA(node, CurrentOfExpr))
+ {
+ /* Report high cost to prevent selection of anything but TID scan */
+ context->total.startup += disable_cost;
+ }
+ else if (IsA(node, SubLink))
+ {
+ /* This routine should not be applied to un-planned expressions */
+ elog(ERROR, "cannot handle unplanned sub-select");
+ }
+ else if (IsA(node, SubPlan))
+ {
+ /*
+ * A subplan node in an expression typically indicates that the
+ * subplan will be executed on each evaluation, so charge accordingly.
+ * (Sub-selects that can be executed as InitPlans have already been
+ * removed from the expression.)
+ */
+ SubPlan *subplan = (SubPlan *) node;
+
+ context->total.startup += subplan->startup_cost;
+ context->total.per_tuple += subplan->per_call_cost;
+
+ /*
+ * We don't want to recurse into the testexpr, because it was already
+ * counted in the SubPlan node's costs. So we're done.
+ */
+ return false;
+ }
+ else if (IsA(node, AlternativeSubPlan))
+ {
+ /*
+ * Arbitrarily use the first alternative plan for costing. (We should
+ * certainly only include one alternative, and we don't yet have
+ * enough information to know which one the executor is most likely to
+ * use.)
+ */
+ AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
+
+ return cost_qual_eval_walker((Node *) linitial(asplan->subplans),
+ context);
+ }
+ else if (IsA(node, PlaceHolderVar))
+ {
+ /*
+ * A PlaceHolderVar should be given cost zero when considering general
+ * expression evaluation costs. The expense of doing the contained
+ * expression is charged as part of the tlist eval costs of the scan
+ * or join where the PHV is first computed (see set_rel_width and
+ * add_placeholders_to_joinrel). If we charged it again here, we'd be
+ * double-counting the cost for each level of plan that the PHV
+ * bubbles up through. Hence, return without recursing into the
+ * phexpr.
+ */
+ return false;
+ }
+
+ /* recurse into children */
+ return expression_tree_walker(node, cost_qual_eval_walker,
+ (void *) context);
+}
+
+/*
+ * get_restriction_qual_cost
+ * Compute evaluation costs of a baserel's restriction quals, plus any
+ * movable join quals that have been pushed down to the scan.
+ * Results are returned into *qpqual_cost.
+ *
+ * This is a convenience subroutine that works for seqscans and other cases
+ * where all the given quals will be evaluated the hard way. It's not useful
+ * for cost_index(), for example, where the index machinery takes care of
+ * some of the quals. We assume baserestrictcost was previously set by
+ * set_baserel_size_estimates().
+ */
+static void
+get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel,
+ ParamPathInfo *param_info,
+ QualCost *qpqual_cost)
+{
+ if (param_info)
+ {
+ /* Include costs of pushed-down clauses */
+ cost_qual_eval(qpqual_cost, param_info->ppi_clauses, root);
+
+ qpqual_cost->startup += baserel->baserestrictcost.startup;
+ qpqual_cost->per_tuple += baserel->baserestrictcost.per_tuple;
+ }
+ else
+ *qpqual_cost = baserel->baserestrictcost;
+}
+
+
+/*
+ * compute_semi_anti_join_factors
+ * Estimate how much of the inner input a SEMI, ANTI, or inner_unique join
+ * can be expected to scan.
+ *
+ * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning
+ * inner rows as soon as it finds a match to the current outer row.
+ * The same happens if we have detected the inner rel is unique.
+ * We should therefore adjust some of the cost components for this effect.
+ * This function computes some estimates needed for these adjustments.
+ * These estimates will be the same regardless of the particular paths used
+ * for the outer and inner relation, so we compute these once and then pass
+ * them to all the join cost estimation functions.
+ *
+ * Input parameters:
+ * joinrel: join relation under consideration
+ * outerrel: outer relation under consideration
+ * innerrel: inner relation under consideration
+ * jointype: if not JOIN_SEMI or JOIN_ANTI, we assume it's inner_unique
+ * sjinfo: SpecialJoinInfo relevant to this join
+ * restrictlist: join quals
+ * Output parameters:
+ * *semifactors is filled in (see pathnodes.h for field definitions)
+ */
+void
+compute_semi_anti_join_factors(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist,
+ SemiAntiJoinFactors *semifactors)
+{
+ Selectivity jselec;
+ Selectivity nselec;
+ Selectivity avgmatch;
+ SpecialJoinInfo norm_sjinfo;
+ List *joinquals;
+ ListCell *l;
+
+ /*
+ * In an ANTI join, we must ignore clauses that are "pushed down", since
+ * those won't affect the match logic. In a SEMI join, we do not
+ * distinguish joinquals from "pushed down" quals, so just use the whole
+ * restrictinfo list. For other outer join types, we should consider only
+ * non-pushed-down quals, so that this devolves to an IS_OUTER_JOIN check.
+ */
+ if (IS_OUTER_JOIN(jointype))
+ {
+ joinquals = NIL;
+ foreach(l, restrictlist)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+
+ if (!RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
+ joinquals = lappend(joinquals, rinfo);
+ }
+ }
+ else
+ joinquals = restrictlist;
+
+ /*
+ * Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses.
+ */
+ jselec = clauselist_selectivity(root,
+ joinquals,
+ 0,
+ (jointype == JOIN_ANTI) ? JOIN_ANTI : JOIN_SEMI,
+ sjinfo);
+
+ /*
+ * Also get the normal inner-join selectivity of the join clauses.
+ */
+ norm_sjinfo.type = T_SpecialJoinInfo;
+ norm_sjinfo.min_lefthand = outerrel->relids;
+ norm_sjinfo.min_righthand = innerrel->relids;
+ norm_sjinfo.syn_lefthand = outerrel->relids;
+ norm_sjinfo.syn_righthand = innerrel->relids;
+ norm_sjinfo.jointype = JOIN_INNER;
+ /* we don't bother trying to make the remaining fields valid */
+ norm_sjinfo.lhs_strict = false;
+ norm_sjinfo.delay_upper_joins = false;
+ norm_sjinfo.semi_can_btree = false;
+ norm_sjinfo.semi_can_hash = false;
+ norm_sjinfo.semi_operators = NIL;
+ norm_sjinfo.semi_rhs_exprs = NIL;
+
+ nselec = clauselist_selectivity(root,
+ joinquals,
+ 0,
+ JOIN_INNER,
+ &norm_sjinfo);
+
+ /* Avoid leaking a lot of ListCells */
+ if (IS_OUTER_JOIN(jointype))
+ list_free(joinquals);
+
+ /*
+ * jselec can be interpreted as the fraction of outer-rel rows that have
+ * any matches (this is true for both SEMI and ANTI cases). And nselec is
+ * the fraction of the Cartesian product that matches. So, the average
+ * number of matches for each outer-rel row that has at least one match is
+ * nselec * inner_rows / jselec.
+ *
+ * Note: it is correct to use the inner rel's "rows" count here, even
+ * though we might later be considering a parameterized inner path with
+ * fewer rows. This is because we have included all the join clauses in
+ * the selectivity estimate.
+ */
+ if (jselec > 0) /* protect against zero divide */
+ {
+ avgmatch = nselec * innerrel->rows / jselec;
+ /* Clamp to sane range */
+ avgmatch = Max(1.0, avgmatch);
+ }
+ else
+ avgmatch = 1.0;
+
+ semifactors->outer_match_frac = jselec;
+ semifactors->match_count = avgmatch;
+}
+
+/*
+ * has_indexed_join_quals
+ * Check whether all the joinquals of a nestloop join are used as
+ * inner index quals.
+ *
+ * If the inner path of a SEMI/ANTI join is an indexscan (including bitmap
+ * indexscan) that uses all the joinquals as indexquals, we can assume that an
+ * unmatched outer tuple is cheap to process, whereas otherwise it's probably
+ * expensive.
+ */
+static bool
+has_indexed_join_quals(NestPath *path)
+{
+ JoinPath *joinpath = &path->jpath;
+ Relids joinrelids = joinpath->path.parent->relids;
+ Path *innerpath = joinpath->innerjoinpath;
+ List *indexclauses;
+ bool found_one;
+ ListCell *lc;
+
+ /* If join still has quals to evaluate, it's not fast */
+ if (joinpath->joinrestrictinfo != NIL)
+ return false;
+ /* Nor if the inner path isn't parameterized at all */
+ if (innerpath->param_info == NULL)
+ return false;
+
+ /* Find the indexclauses list for the inner scan */
+ switch (innerpath->pathtype)
+ {
+ case T_IndexScan:
+ case T_IndexOnlyScan:
+ indexclauses = ((IndexPath *) innerpath)->indexclauses;
+ break;
+ case T_BitmapHeapScan:
+ {
+ /* Accept only a simple bitmap scan, not AND/OR cases */
+ Path *bmqual = ((BitmapHeapPath *) innerpath)->bitmapqual;
+
+ if (IsA(bmqual, IndexPath))
+ indexclauses = ((IndexPath *) bmqual)->indexclauses;
+ else
+ return false;
+ break;
+ }
+ default:
+
+ /*
+ * If it's not a simple indexscan, it probably doesn't run quickly
+ * for zero rows out, even if it's a parameterized path using all
+ * the joinquals.
+ */
+ return false;
+ }
+
+ /*
+ * Examine the inner path's param clauses. Any that are from the outer
+ * path must be found in the indexclauses list, either exactly or in an
+ * equivalent form generated by equivclass.c. Also, we must find at least
+ * one such clause, else it's a clauseless join which isn't fast.
+ */
+ found_one = false;
+ foreach(lc, innerpath->param_info->ppi_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ if (join_clause_is_movable_into(rinfo,
+ innerpath->parent->relids,
+ joinrelids))
+ {
+ if (!is_redundant_with_indexclauses(rinfo, indexclauses))
+ return false;
+ found_one = true;
+ }
+ }
+ return found_one;
+}
+
+
+/*
+ * approx_tuple_count
+ * Quick-and-dirty estimation of the number of join rows passing
+ * a set of qual conditions.
+ *
+ * The quals can be either an implicitly-ANDed list of boolean expressions,
+ * or a list of RestrictInfo nodes (typically the latter).
+ *
+ * We intentionally compute the selectivity under JOIN_INNER rules, even
+ * if it's some type of outer join. This is appropriate because we are
+ * trying to figure out how many tuples pass the initial merge or hash
+ * join step.
+ *
+ * This is quick-and-dirty because we bypass clauselist_selectivity, and
+ * simply multiply the independent clause selectivities together. Now
+ * clauselist_selectivity often can't do any better than that anyhow, but
+ * for some situations (such as range constraints) it is smarter. However,
+ * we can't effectively cache the results of clauselist_selectivity, whereas
+ * the individual clause selectivities can be and are cached.
+ *
+ * Since we are only using the results to estimate how many potential
+ * output tuples are generated and passed through qpqual checking, it
+ * seems OK to live with the approximation.
+ */
+static double
+approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
+{
+ double tuples;
+ double outer_tuples = path->outerjoinpath->rows;
+ double inner_tuples = path->innerjoinpath->rows;
+ SpecialJoinInfo sjinfo;
+ Selectivity selec = 1.0;
+ ListCell *l;
+
+ /*
+ * Make up a SpecialJoinInfo for JOIN_INNER semantics.
+ */
+ sjinfo.type = T_SpecialJoinInfo;
+ sjinfo.min_lefthand = path->outerjoinpath->parent->relids;
+ sjinfo.min_righthand = path->innerjoinpath->parent->relids;
+ sjinfo.syn_lefthand = path->outerjoinpath->parent->relids;
+ sjinfo.syn_righthand = path->innerjoinpath->parent->relids;
+ sjinfo.jointype = JOIN_INNER;
+ /* we don't bother trying to make the remaining fields valid */
+ sjinfo.lhs_strict = false;
+ sjinfo.delay_upper_joins = false;
+ sjinfo.semi_can_btree = false;
+ sjinfo.semi_can_hash = false;
+ sjinfo.semi_operators = NIL;
+ sjinfo.semi_rhs_exprs = NIL;
+
+ /* Get the approximate selectivity */
+ foreach(l, quals)
+ {
+ Node *qual = (Node *) lfirst(l);
+
+ /* Note that clause_selectivity will be able to cache its result */
+ selec *= clause_selectivity(root, qual, 0, JOIN_INNER, &sjinfo);
+ }
+
+ /* Apply it to the input relation sizes */
+ tuples = selec * outer_tuples * inner_tuples;
+
+ return clamp_row_est(tuples);
+}
+
+
+/*
+ * set_baserel_size_estimates
+ * Set the size estimates for the given base relation.
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already, and rel->tuples must be set.
+ *
+ * We set the following fields of the rel node:
+ * rows: the estimated number of output tuples (after applying
+ * restriction clauses).
+ * width: the estimated average output tuple width in bytes.
+ * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
+ */
+void
+set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
+{
+ double nrows;
+
+ /* Should only be applied to base relations */
+ Assert(rel->relid > 0);
+
+ nrows = rel->tuples *
+ clauselist_selectivity(root,
+ rel->baserestrictinfo,
+ 0,
+ JOIN_INNER,
+ NULL);
+
+ rel->rows = clamp_row_est(nrows);
+
+ cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root);
+
+ set_rel_width(root, rel);
+}
+
+/*
+ * get_parameterized_baserel_size
+ * Make a size estimate for a parameterized scan of a base relation.
+ *
+ * 'param_clauses' lists the additional join clauses to be used.
+ *
+ * set_baserel_size_estimates must have been applied already.
+ */
+double
+get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel,
+ List *param_clauses)
+{
+ List *allclauses;
+ double nrows;
+
+ /*
+ * Estimate the number of rows returned by the parameterized scan, knowing
+ * that it will apply all the extra join clauses as well as the rel's own
+ * restriction clauses. Note that we force the clauses to be treated as
+ * non-join clauses during selectivity estimation.
+ */
+ allclauses = list_concat_copy(param_clauses, rel->baserestrictinfo);
+ nrows = rel->tuples *
+ clauselist_selectivity(root,
+ allclauses,
+ rel->relid, /* do not use 0! */
+ JOIN_INNER,
+ NULL);
+ nrows = clamp_row_est(nrows);
+ /* For safety, make sure result is not more than the base estimate */
+ if (nrows > rel->rows)
+ nrows = rel->rows;
+ return nrows;
+}
+
+/*
+ * set_joinrel_size_estimates
+ * Set the size estimates for the given join relation.
+ *
+ * The rel's targetlist must have been constructed already, and a
+ * restriction clause list that matches the given component rels must
+ * be provided.
+ *
+ * Since there is more than one way to make a joinrel for more than two
+ * base relations, the results we get here could depend on which component
+ * rel pair is provided. In theory we should get the same answers no matter
+ * which pair is provided; in practice, since the selectivity estimation
+ * routines don't handle all cases equally well, we might not. But there's
+ * not much to be done about it. (Would it make sense to repeat the
+ * calculations for each pair of input rels that's encountered, and somehow
+ * average the results? Probably way more trouble than it's worth, and
+ * anyway we must keep the rowcount estimate the same for all paths for the
+ * joinrel.)
+ *
+ * We set only the rows field here. The reltarget field was already set by
+ * build_joinrel_tlist, and baserestrictcost is not used for join rels.
+ */
+void
+set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist)
+{
+ rel->rows = calc_joinrel_size_estimate(root,
+ rel,
+ outer_rel,
+ inner_rel,
+ outer_rel->rows,
+ inner_rel->rows,
+ sjinfo,
+ restrictlist);
+}
+
+/*
+ * get_parameterized_joinrel_size
+ * Make a size estimate for a parameterized scan of a join relation.
+ *
+ * 'rel' is the joinrel under consideration.
+ * 'outer_path', 'inner_path' are (probably also parameterized) Paths that
+ * produce the relations being joined.
+ * 'sjinfo' is any SpecialJoinInfo relevant to this join.
+ * 'restrict_clauses' lists the join clauses that need to be applied at the
+ * join node (including any movable clauses that were moved down to this join,
+ * and not including any movable clauses that were pushed down into the
+ * child paths).
+ *
+ * set_joinrel_size_estimates must have been applied already.
+ */
+double
+get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel,
+ Path *outer_path,
+ Path *inner_path,
+ SpecialJoinInfo *sjinfo,
+ List *restrict_clauses)
+{
+ double nrows;
+
+ /*
+ * Estimate the number of rows returned by the parameterized join as the
+ * sizes of the input paths times the selectivity of the clauses that have
+ * ended up at this join node.
+ *
+ * As with set_joinrel_size_estimates, the rowcount estimate could depend
+ * on the pair of input paths provided, though ideally we'd get the same
+ * estimate for any pair with the same parameterization.
+ */
+ nrows = calc_joinrel_size_estimate(root,
+ rel,
+ outer_path->parent,
+ inner_path->parent,
+ outer_path->rows,
+ inner_path->rows,
+ sjinfo,
+ restrict_clauses);
+ /* For safety, make sure result is not more than the base estimate */
+ if (nrows > rel->rows)
+ nrows = rel->rows;
+ return nrows;
+}
+
+/*
+ * calc_joinrel_size_estimate
+ * Workhorse for set_joinrel_size_estimates and
+ * get_parameterized_joinrel_size.
+ *
+ * outer_rel/inner_rel are the relations being joined, but they should be
+ * assumed to have sizes outer_rows/inner_rows; those numbers might be less
+ * than what rel->rows says, when we are considering parameterized paths.
+ */
+static double
+calc_joinrel_size_estimate(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel,
+ double outer_rows,
+ double inner_rows,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist_in)
+{
+ /* This apparently-useless variable dodges a compiler bug in VS2013: */
+ List *restrictlist = restrictlist_in;
+ JoinType jointype = sjinfo->jointype;
+ Selectivity fkselec;
+ Selectivity jselec;
+ Selectivity pselec;
+ double nrows;
+
+ /*
+ * Compute joinclause selectivity. Note that we are only considering
+ * clauses that become restriction clauses at this join level; we are not
+ * double-counting them because they were not considered in estimating the
+ * sizes of the component rels.
+ *
+ * First, see whether any of the joinclauses can be matched to known FK
+ * constraints. If so, drop those clauses from the restrictlist, and
+ * instead estimate their selectivity using FK semantics. (We do this
+ * without regard to whether said clauses are local or "pushed down".
+ * Probably, an FK-matching clause could never be seen as pushed down at
+ * an outer join, since it would be strict and hence would be grounds for
+ * join strength reduction.) fkselec gets the net selectivity for
+ * FK-matching clauses, or 1.0 if there are none.
+ */
+ fkselec = get_foreign_key_join_selectivity(root,
+ outer_rel->relids,
+ inner_rel->relids,
+ sjinfo,
+ &restrictlist);
+
+ /*
+ * For an outer join, we have to distinguish the selectivity of the join's
+ * own clauses (JOIN/ON conditions) from any clauses that were "pushed
+ * down". For inner joins we just count them all as joinclauses.
+ */
+ if (IS_OUTER_JOIN(jointype))
+ {
+ List *joinquals = NIL;
+ List *pushedquals = NIL;
+ ListCell *l;
+
+ /* Grovel through the clauses to separate into two lists */
+ foreach(l, restrictlist)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+
+ if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
+ pushedquals = lappend(pushedquals, rinfo);
+ else
+ joinquals = lappend(joinquals, rinfo);
+ }
+
+ /* Get the separate selectivities */
+ jselec = clauselist_selectivity(root,
+ joinquals,
+ 0,
+ jointype,
+ sjinfo);
+ pselec = clauselist_selectivity(root,
+ pushedquals,
+ 0,
+ jointype,
+ sjinfo);
+
+ /* Avoid leaking a lot of ListCells */
+ list_free(joinquals);
+ list_free(pushedquals);
+ }
+ else
+ {
+ jselec = clauselist_selectivity(root,
+ restrictlist,
+ 0,
+ jointype,
+ sjinfo);
+ pselec = 0.0; /* not used, keep compiler quiet */
+ }
+
+ /*
+ * Basically, we multiply size of Cartesian product by selectivity.
+ *
+ * If we are doing an outer join, take that into account: the joinqual
+ * selectivity has to be clamped using the knowledge that the output must
+ * be at least as large as the non-nullable input. However, any
+ * pushed-down quals are applied after the outer join, so their
+ * selectivity applies fully.
+ *
+ * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction
+ * of LHS rows that have matches, and we apply that straightforwardly.
+ */
+ switch (jointype)
+ {
+ case JOIN_INNER:
+ nrows = outer_rows * inner_rows * fkselec * jselec;
+ /* pselec not used */
+ break;
+ case JOIN_LEFT:
+ nrows = outer_rows * inner_rows * fkselec * jselec;
+ if (nrows < outer_rows)
+ nrows = outer_rows;
+ nrows *= pselec;
+ break;
+ case JOIN_FULL:
+ nrows = outer_rows * inner_rows * fkselec * jselec;
+ if (nrows < outer_rows)
+ nrows = outer_rows;
+ if (nrows < inner_rows)
+ nrows = inner_rows;
+ nrows *= pselec;
+ break;
+ case JOIN_SEMI:
+ nrows = outer_rows * fkselec * jselec;
+ /* pselec not used */
+ break;
+ case JOIN_ANTI:
+ nrows = outer_rows * (1.0 - fkselec * jselec);
+ nrows *= pselec;
+ break;
+ default:
+ /* other values not expected here */
+ elog(ERROR, "unrecognized join type: %d", (int) jointype);
+ nrows = 0; /* keep compiler quiet */
+ break;
+ }
+
+ return clamp_row_est(nrows);
+}
+
+/*
+ * get_foreign_key_join_selectivity
+ * Estimate join selectivity for foreign-key-related clauses.
+ *
+ * Remove any clauses that can be matched to FK constraints from *restrictlist,
+ * and return a substitute estimate of their selectivity. 1.0 is returned
+ * when there are no such clauses.
+ *
+ * The reason for treating such clauses specially is that we can get better
+ * estimates this way than by relying on clauselist_selectivity(), especially
+ * for multi-column FKs where that function's assumption that the clauses are
+ * independent falls down badly. But even with single-column FKs, we may be
+ * able to get a better answer when the pg_statistic stats are missing or out
+ * of date.
+ */
+static Selectivity
+get_foreign_key_join_selectivity(PlannerInfo *root,
+ Relids outer_relids,
+ Relids inner_relids,
+ SpecialJoinInfo *sjinfo,
+ List **restrictlist)
+{
+ Selectivity fkselec = 1.0;
+ JoinType jointype = sjinfo->jointype;
+ List *worklist = *restrictlist;
+ ListCell *lc;
+
+ /* Consider each FK constraint that is known to match the query */
+ foreach(lc, root->fkey_list)
+ {
+ ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
+ bool ref_is_outer;
+ List *removedlist;
+ ListCell *cell;
+
+ /*
+ * This FK is not relevant unless it connects a baserel on one side of
+ * this join to a baserel on the other side.
+ */
+ if (bms_is_member(fkinfo->con_relid, outer_relids) &&
+ bms_is_member(fkinfo->ref_relid, inner_relids))
+ ref_is_outer = false;
+ else if (bms_is_member(fkinfo->ref_relid, outer_relids) &&
+ bms_is_member(fkinfo->con_relid, inner_relids))
+ ref_is_outer = true;
+ else
+ continue;
+
+ /*
+ * If we're dealing with a semi/anti join, and the FK's referenced
+ * relation is on the outside, then knowledge of the FK doesn't help
+ * us figure out what we need to know (which is the fraction of outer
+ * rows that have matches). On the other hand, if the referenced rel
+ * is on the inside, then all outer rows must have matches in the
+ * referenced table (ignoring nulls). But any restriction or join
+ * clauses that filter that table will reduce the fraction of matches.
+ * We can account for restriction clauses, but it's too hard to guess
+ * how many table rows would get through a join that's inside the RHS.
+ * Hence, if either case applies, punt and ignore the FK.
+ */
+ if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
+ (ref_is_outer || bms_membership(inner_relids) != BMS_SINGLETON))
+ continue;
+
+ /*
+ * Modify the restrictlist by removing clauses that match the FK (and
+ * putting them into removedlist instead). It seems unsafe to modify
+ * the originally-passed List structure, so we make a shallow copy the
+ * first time through.
+ */
+ if (worklist == *restrictlist)
+ worklist = list_copy(worklist);
+
+ removedlist = NIL;
+ foreach(cell, worklist)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
+ bool remove_it = false;
+ int i;
+
+ /* Drop this clause if it matches any column of the FK */
+ for (i = 0; i < fkinfo->nkeys; i++)
+ {
+ if (rinfo->parent_ec)
+ {
+ /*
+ * EC-derived clauses can only match by EC. It is okay to
+ * consider any clause derived from the same EC as
+ * matching the FK: even if equivclass.c chose to generate
+ * a clause equating some other pair of Vars, it could
+ * have generated one equating the FK's Vars. So for
+ * purposes of estimation, we can act as though it did so.
+ *
+ * Note: checking parent_ec is a bit of a cheat because
+ * there are EC-derived clauses that don't have parent_ec
+ * set; but such clauses must compare expressions that
+ * aren't just Vars, so they cannot match the FK anyway.
+ */
+ if (fkinfo->eclass[i] == rinfo->parent_ec)
+ {
+ remove_it = true;
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * Otherwise, see if rinfo was previously matched to FK as
+ * a "loose" clause.
+ */
+ if (list_member_ptr(fkinfo->rinfos[i], rinfo))
+ {
+ remove_it = true;
+ break;
+ }
+ }
+ }
+ if (remove_it)
+ {
+ worklist = foreach_delete_current(worklist, cell);
+ removedlist = lappend(removedlist, rinfo);
+ }
+ }
+
+ /*
+ * If we failed to remove all the matching clauses we expected to
+ * find, chicken out and ignore this FK; applying its selectivity
+ * might result in double-counting. Put any clauses we did manage to
+ * remove back into the worklist.
+ *
+ * Since the matching clauses are known not outerjoin-delayed, they
+ * would normally have appeared in the initial joinclause list. If we
+ * didn't find them, there are two possibilities:
+ *
+ * 1. If the FK match is based on an EC that is ec_has_const, it won't
+ * have generated any join clauses at all. We discount such ECs while
+ * checking to see if we have "all" the clauses. (Below, we'll adjust
+ * the selectivity estimate for this case.)
+ *
+ * 2. The clauses were matched to some other FK in a previous
+ * iteration of this loop, and thus removed from worklist. (A likely
+ * case is that two FKs are matched to the same EC; there will be only
+ * one EC-derived clause in the initial list, so the first FK will
+ * consume it.) Applying both FKs' selectivity independently risks
+ * underestimating the join size; in particular, this would undo one
+ * of the main things that ECs were invented for, namely to avoid
+ * double-counting the selectivity of redundant equality conditions.
+ * Later we might think of a reasonable way to combine the estimates,
+ * but for now, just punt, since this is a fairly uncommon situation.
+ */
+ if (removedlist == NIL ||
+ list_length(removedlist) !=
+ (fkinfo->nmatched_ec - fkinfo->nconst_ec + fkinfo->nmatched_ri))
+ {
+ worklist = list_concat(worklist, removedlist);
+ continue;
+ }
+
+ /*
+ * Finally we get to the payoff: estimate selectivity using the
+ * knowledge that each referencing row will match exactly one row in
+ * the referenced table.
+ *
+ * XXX that's not true in the presence of nulls in the referencing
+ * column(s), so in principle we should derate the estimate for those.
+ * However (1) if there are any strict restriction clauses for the
+ * referencing column(s) elsewhere in the query, derating here would
+ * be double-counting the null fraction, and (2) it's not very clear
+ * how to combine null fractions for multiple referencing columns. So
+ * we do nothing for now about correcting for nulls.
+ *
+ * XXX another point here is that if either side of an FK constraint
+ * is an inheritance parent, we estimate as though the constraint
+ * covers all its children as well. This is not an unreasonable
+ * assumption for a referencing table, ie the user probably applied
+ * identical constraints to all child tables (though perhaps we ought
+ * to check that). But it's not possible to have done that for a
+ * referenced table. Fortunately, precisely because that doesn't
+ * work, it is uncommon in practice to have an FK referencing a parent
+ * table. So, at least for now, disregard inheritance here.
+ */
+ if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+ {
+ /*
+ * For JOIN_SEMI and JOIN_ANTI, we only get here when the FK's
+ * referenced table is exactly the inside of the join. The join
+ * selectivity is defined as the fraction of LHS rows that have
+ * matches. The FK implies that every LHS row has a match *in the
+ * referenced table*; but any restriction clauses on it will
+ * reduce the number of matches. Hence we take the join
+ * selectivity as equal to the selectivity of the table's
+ * restriction clauses, which is rows / tuples; but we must guard
+ * against tuples == 0.
+ */
+ RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
+ double ref_tuples = Max(ref_rel->tuples, 1.0);
+
+ fkselec *= ref_rel->rows / ref_tuples;
+ }
+ else
+ {
+ /*
+ * Otherwise, selectivity is exactly 1/referenced-table-size; but
+ * guard against tuples == 0. Note we should use the raw table
+ * tuple count, not any estimate of its filtered or joined size.
+ */
+ RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
+ double ref_tuples = Max(ref_rel->tuples, 1.0);
+
+ fkselec *= 1.0 / ref_tuples;
+ }
+
+ /*
+ * If any of the FK columns participated in ec_has_const ECs, then
+ * equivclass.c will have generated "var = const" restrictions for
+ * each side of the join, thus reducing the sizes of both input
+ * relations. Taking the fkselec at face value would amount to
+ * double-counting the selectivity of the constant restriction for the
+ * referencing Var. Hence, look for the restriction clause(s) that
+ * were applied to the referencing Var(s), and divide out their
+ * selectivity to correct for this.
+ */
+ if (fkinfo->nconst_ec > 0)
+ {
+ for (int i = 0; i < fkinfo->nkeys; i++)
+ {
+ EquivalenceClass *ec = fkinfo->eclass[i];
+
+ if (ec && ec->ec_has_const)
+ {
+ EquivalenceMember *em = fkinfo->fk_eclass_member[i];
+ RestrictInfo *rinfo = find_derived_clause_for_ec_member(ec,
+ em);
+
+ if (rinfo)
+ {
+ Selectivity s0;
+
+ s0 = clause_selectivity(root,
+ (Node *) rinfo,
+ 0,
+ jointype,
+ sjinfo);
+ if (s0 > 0)
+ fkselec /= s0;
+ }
+ }
+ }
+ }
+ }
+
+ *restrictlist = worklist;
+ CLAMP_PROBABILITY(fkselec);
+ return fkselec;
+}
+
+/*
+ * set_subquery_size_estimates
+ * Set the size estimates for a base relation that is a subquery.
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already, and the Paths for the subquery must have been completed.
+ * We look at the subquery's PlannerInfo to extract data.
+ *
+ * We set the same fields as set_baserel_size_estimates.
+ */
+void
+set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel)
+{
+ PlannerInfo *subroot = rel->subroot;
+ RelOptInfo *sub_final_rel;
+ ListCell *lc;
+
+ /* Should only be applied to base relations that are subqueries */
+ Assert(rel->relid > 0);
+ Assert(planner_rt_fetch(rel->relid, root)->rtekind == RTE_SUBQUERY);
+
+ /*
+ * Copy raw number of output rows from subquery. All of its paths should
+ * have the same output rowcount, so just look at cheapest-total.
+ */
+ sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
+ rel->tuples = sub_final_rel->cheapest_total_path->rows;
+
+ /*
+ * Compute per-output-column width estimates by examining the subquery's
+ * targetlist. For any output that is a plain Var, get the width estimate
+ * that was made while planning the subquery. Otherwise, we leave it to
+ * set_rel_width to fill in a datatype-based default estimate.
+ */
+ foreach(lc, subroot->parse->targetList)
+ {
+ TargetEntry *te = lfirst_node(TargetEntry, lc);
+ Node *texpr = (Node *) te->expr;
+ int32 item_width = 0;
+
+ /* junk columns aren't visible to upper query */
+ if (te->resjunk)
+ continue;
+
+ /*
+ * The subquery could be an expansion of a view that's had columns
+ * added to it since the current query was parsed, so that there are
+ * non-junk tlist columns in it that don't correspond to any column
+ * visible at our query level. Ignore such columns.
+ */
+ if (te->resno < rel->min_attr || te->resno > rel->max_attr)
+ continue;
+
+ /*
+ * XXX This currently doesn't work for subqueries containing set
+ * operations, because the Vars in their tlists are bogus references
+ * to the first leaf subquery, which wouldn't give the right answer
+ * even if we could still get to its PlannerInfo.
+ *
+ * Also, the subquery could be an appendrel for which all branches are
+ * known empty due to constraint exclusion, in which case
+ * set_append_rel_pathlist will have left the attr_widths set to zero.
+ *
+ * In either case, we just leave the width estimate zero until
+ * set_rel_width fixes it.
+ */
+ if (IsA(texpr, Var) &&
+ subroot->parse->setOperations == NULL)
+ {
+ Var *var = (Var *) texpr;
+ RelOptInfo *subrel = find_base_rel(subroot, var->varno);
+
+ item_width = subrel->attr_widths[var->varattno - subrel->min_attr];
+ }
+ rel->attr_widths[te->resno - rel->min_attr] = item_width;
+ }
+
+ /* Now estimate number of output rows, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * set_function_size_estimates
+ * Set the size estimates for a base relation that is a function call.
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already.
+ *
+ * We set the same fields as set_baserel_size_estimates.
+ */
+void
+set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
+{
+ RangeTblEntry *rte;
+ ListCell *lc;
+
+ /* Should only be applied to base relations that are functions */
+ Assert(rel->relid > 0);
+ rte = planner_rt_fetch(rel->relid, root);
+ Assert(rte->rtekind == RTE_FUNCTION);
+
+ /*
+ * Estimate number of rows the functions will return. The rowcount of the
+ * node is that of the largest function result.
+ */
+ rel->tuples = 0;
+ foreach(lc, rte->functions)
+ {
+ RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
+ double ntup = expression_returns_set_rows(root, rtfunc->funcexpr);
+
+ if (ntup > rel->tuples)
+ rel->tuples = ntup;
+ }
+
+ /* Now estimate number of output rows, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * set_function_size_estimates
+ * Set the size estimates for a base relation that is a function call.
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already.
+ *
+ * We set the same fields as set_tablefunc_size_estimates.
+ */
+void
+set_tablefunc_size_estimates(PlannerInfo *root, RelOptInfo *rel)
+{
+ /* Should only be applied to base relations that are functions */
+ Assert(rel->relid > 0);
+ Assert(planner_rt_fetch(rel->relid, root)->rtekind == RTE_TABLEFUNC);
+
+ rel->tuples = 100;
+
+ /* Now estimate number of output rows, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * set_values_size_estimates
+ * Set the size estimates for a base relation that is a values list.
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already.
+ *
+ * We set the same fields as set_baserel_size_estimates.
+ */
+void
+set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel)
+{
+ RangeTblEntry *rte;
+
+ /* Should only be applied to base relations that are values lists */
+ Assert(rel->relid > 0);
+ rte = planner_rt_fetch(rel->relid, root);
+ Assert(rte->rtekind == RTE_VALUES);
+
+ /*
+ * Estimate number of rows the values list will return. We know this
+ * precisely based on the list length (well, barring set-returning
+ * functions in list items, but that's a refinement not catered for
+ * anywhere else either).
+ */
+ rel->tuples = list_length(rte->values_lists);
+
+ /* Now estimate number of output rows, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * set_cte_size_estimates
+ * Set the size estimates for a base relation that is a CTE reference.
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already, and we need an estimate of the number of rows returned by the CTE
+ * (if a regular CTE) or the non-recursive term (if a self-reference).
+ *
+ * We set the same fields as set_baserel_size_estimates.
+ */
+void
+set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel, double cte_rows)
+{
+ RangeTblEntry *rte;
+
+ /* Should only be applied to base relations that are CTE references */
+ Assert(rel->relid > 0);
+ rte = planner_rt_fetch(rel->relid, root);
+ Assert(rte->rtekind == RTE_CTE);
+
+ if (rte->self_reference)
+ {
+ /*
+ * In a self-reference, we assume the average worktable size is a
+ * multiple of the nonrecursive term's size. The best multiplier will
+ * vary depending on query "fan-out", so make its value adjustable.
+ */
+ rel->tuples = clamp_row_est(recursive_worktable_factor * cte_rows);
+ }
+ else
+ {
+ /* Otherwise just believe the CTE's rowcount estimate */
+ rel->tuples = cte_rows;
+ }
+
+ /* Now estimate number of output rows, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * set_namedtuplestore_size_estimates
+ * Set the size estimates for a base relation that is a tuplestore reference.
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already.
+ *
+ * We set the same fields as set_baserel_size_estimates.
+ */
+void
+set_namedtuplestore_size_estimates(PlannerInfo *root, RelOptInfo *rel)
+{
+ RangeTblEntry *rte;
+
+ /* Should only be applied to base relations that are tuplestore references */
+ Assert(rel->relid > 0);
+ rte = planner_rt_fetch(rel->relid, root);
+ Assert(rte->rtekind == RTE_NAMEDTUPLESTORE);
+
+ /*
+ * Use the estimate provided by the code which is generating the named
+ * tuplestore. In some cases, the actual number might be available; in
+ * others the same plan will be re-used, so a "typical" value might be
+ * estimated and used.
+ */
+ rel->tuples = rte->enrtuples;
+ if (rel->tuples < 0)
+ rel->tuples = 1000;
+
+ /* Now estimate number of output rows, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * set_result_size_estimates
+ * Set the size estimates for an RTE_RESULT base relation
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already.
+ *
+ * We set the same fields as set_baserel_size_estimates.
+ */
+void
+set_result_size_estimates(PlannerInfo *root, RelOptInfo *rel)
+{
+ /* Should only be applied to RTE_RESULT base relations */
+ Assert(rel->relid > 0);
+ Assert(planner_rt_fetch(rel->relid, root)->rtekind == RTE_RESULT);
+
+ /* RTE_RESULT always generates a single row, natively */
+ rel->tuples = 1;
+
+ /* Now estimate number of output rows, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * set_foreign_size_estimates
+ * Set the size estimates for a base relation that is a foreign table.
+ *
+ * There is not a whole lot that we can do here; the foreign-data wrapper
+ * is responsible for producing useful estimates. We can do a decent job
+ * of estimating baserestrictcost, so we set that, and we also set up width
+ * using what will be purely datatype-driven estimates from the targetlist.
+ * There is no way to do anything sane with the rows value, so we just put
+ * a default estimate and hope that the wrapper can improve on it. The
+ * wrapper's GetForeignRelSize function will be called momentarily.
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already.
+ */
+void
+set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel)
+{
+ /* Should only be applied to base relations */
+ Assert(rel->relid > 0);
+
+ rel->rows = 1000; /* entirely bogus default estimate */
+
+ cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root);
+
+ set_rel_width(root, rel);
+}
+
+
+/*
+ * set_rel_width
+ * Set the estimated output width of a base relation.
+ *
+ * The estimated output width is the sum of the per-attribute width estimates
+ * for the actually-referenced columns, plus any PHVs or other expressions
+ * that have to be calculated at this relation. This is the amount of data
+ * we'd need to pass upwards in case of a sort, hash, etc.
+ *
+ * This function also sets reltarget->cost, so it's a bit misnamed now.
+ *
+ * NB: this works best on plain relations because it prefers to look at
+ * real Vars. For subqueries, set_subquery_size_estimates will already have
+ * copied up whatever per-column estimates were made within the subquery,
+ * and for other types of rels there isn't much we can do anyway. We fall
+ * back on (fairly stupid) datatype-based width estimates if we can't get
+ * any better number.
+ *
+ * The per-attribute width estimates are cached for possible re-use while
+ * building join relations or post-scan/join pathtargets.
+ */
+static void
+set_rel_width(PlannerInfo *root, RelOptInfo *rel)
+{
+ Oid reloid = planner_rt_fetch(rel->relid, root)->relid;
+ int32 tuple_width = 0;
+ bool have_wholerow_var = false;
+ ListCell *lc;
+
+ /* Vars are assumed to have cost zero, but other exprs do not */
+ rel->reltarget->cost.startup = 0;
+ rel->reltarget->cost.per_tuple = 0;
+
+ foreach(lc, rel->reltarget->exprs)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ /*
+ * Ordinarily, a Var in a rel's targetlist must belong to that rel;
+ * but there are corner cases involving LATERAL references where that
+ * isn't so. If the Var has the wrong varno, fall through to the
+ * generic case (it doesn't seem worth the trouble to be any smarter).
+ */
+ if (IsA(node, Var) &&
+ ((Var *) node)->varno == rel->relid)
+ {
+ Var *var = (Var *) node;
+ int ndx;
+ int32 item_width;
+
+ Assert(var->varattno >= rel->min_attr);
+ Assert(var->varattno <= rel->max_attr);
+
+ ndx = var->varattno - rel->min_attr;
+
+ /*
+ * If it's a whole-row Var, we'll deal with it below after we have
+ * already cached as many attr widths as possible.
+ */
+ if (var->varattno == 0)
+ {
+ have_wholerow_var = true;
+ continue;
+ }
+
+ /*
+ * The width may have been cached already (especially if it's a
+ * subquery), so don't duplicate effort.
+ */
+ if (rel->attr_widths[ndx] > 0)
+ {
+ tuple_width += rel->attr_widths[ndx];
+ continue;
+ }
+
+ /* Try to get column width from statistics */
+ if (reloid != InvalidOid && var->varattno > 0)
+ {
+ item_width = get_attavgwidth(reloid, var->varattno);
+ if (item_width > 0)
+ {
+ rel->attr_widths[ndx] = item_width;
+ tuple_width += item_width;
+ continue;
+ }
+ }
+
+ /*
+ * Not a plain relation, or can't find statistics for it. Estimate
+ * using just the type info.
+ */
+ item_width = get_typavgwidth(var->vartype, var->vartypmod);
+ Assert(item_width > 0);
+ rel->attr_widths[ndx] = item_width;
+ tuple_width += item_width;
+ }
+ else if (IsA(node, PlaceHolderVar))
+ {
+ /*
+ * We will need to evaluate the PHV's contained expression while
+ * scanning this rel, so be sure to include it in reltarget->cost.
+ */
+ PlaceHolderVar *phv = (PlaceHolderVar *) node;
+ PlaceHolderInfo *phinfo = find_placeholder_info(root, phv, false);
+ QualCost cost;
+
+ tuple_width += phinfo->ph_width;
+ cost_qual_eval_node(&cost, (Node *) phv->phexpr, root);
+ rel->reltarget->cost.startup += cost.startup;
+ rel->reltarget->cost.per_tuple += cost.per_tuple;
+ }
+ else
+ {
+ /*
+ * We could be looking at an expression pulled up from a subquery,
+ * or a ROW() representing a whole-row child Var, etc. Do what we
+ * can using the expression type information.
+ */
+ int32 item_width;
+ QualCost cost;
+
+ item_width = get_typavgwidth(exprType(node), exprTypmod(node));
+ Assert(item_width > 0);
+ tuple_width += item_width;
+ /* Not entirely clear if we need to account for cost, but do so */
+ cost_qual_eval_node(&cost, node, root);
+ rel->reltarget->cost.startup += cost.startup;
+ rel->reltarget->cost.per_tuple += cost.per_tuple;
+ }
+ }
+
+ /*
+ * If we have a whole-row reference, estimate its width as the sum of
+ * per-column widths plus heap tuple header overhead.
+ */
+ if (have_wholerow_var)
+ {
+ int32 wholerow_width = MAXALIGN(SizeofHeapTupleHeader);
+
+ if (reloid != InvalidOid)
+ {
+ /* Real relation, so estimate true tuple width */
+ wholerow_width += get_relation_data_width(reloid,
+ rel->attr_widths - rel->min_attr);
+ }
+ else
+ {
+ /* Do what we can with info for a phony rel */
+ AttrNumber i;
+
+ for (i = 1; i <= rel->max_attr; i++)
+ wholerow_width += rel->attr_widths[i - rel->min_attr];
+ }
+
+ rel->attr_widths[0 - rel->min_attr] = wholerow_width;
+
+ /*
+ * Include the whole-row Var as part of the output tuple. Yes, that
+ * really is what happens at runtime.
+ */
+ tuple_width += wholerow_width;
+ }
+
+ Assert(tuple_width >= 0);
+ rel->reltarget->width = tuple_width;
+}
+
+/*
+ * set_pathtarget_cost_width
+ * Set the estimated eval cost and output width of a PathTarget tlist.
+ *
+ * As a notational convenience, returns the same PathTarget pointer passed in.
+ *
+ * Most, though not quite all, uses of this function occur after we've run
+ * set_rel_width() for base relations; so we can usually obtain cached width
+ * estimates for Vars. If we can't, fall back on datatype-based width
+ * estimates. Present early-planning uses of PathTargets don't need accurate
+ * widths badly enough to justify going to the catalogs for better data.
+ */
+PathTarget *
+set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target)
+{
+ int32 tuple_width = 0;
+ ListCell *lc;
+
+ /* Vars are assumed to have cost zero, but other exprs do not */
+ target->cost.startup = 0;
+ target->cost.per_tuple = 0;
+
+ foreach(lc, target->exprs)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+ int32 item_width;
+
+ /* We should not see any upper-level Vars here */
+ Assert(var->varlevelsup == 0);
+
+ /* Try to get data from RelOptInfo cache */
+ if (!IS_SPECIAL_VARNO(var->varno) &&
+ var->varno < root->simple_rel_array_size)
+ {
+ RelOptInfo *rel = root->simple_rel_array[var->varno];
+
+ if (rel != NULL &&
+ var->varattno >= rel->min_attr &&
+ var->varattno <= rel->max_attr)
+ {
+ int ndx = var->varattno - rel->min_attr;
+
+ if (rel->attr_widths[ndx] > 0)
+ {
+ tuple_width += rel->attr_widths[ndx];
+ continue;
+ }
+ }
+ }
+
+ /*
+ * No cached data available, so estimate using just the type info.
+ */
+ item_width = get_typavgwidth(var->vartype, var->vartypmod);
+ Assert(item_width > 0);
+ tuple_width += item_width;
+ }
+ else
+ {
+ /*
+ * Handle general expressions using type info.
+ */
+ int32 item_width;
+ QualCost cost;
+
+ item_width = get_typavgwidth(exprType(node), exprTypmod(node));
+ Assert(item_width > 0);
+ tuple_width += item_width;
+
+ /* Account for cost, too */
+ cost_qual_eval_node(&cost, node, root);
+ target->cost.startup += cost.startup;
+ target->cost.per_tuple += cost.per_tuple;
+ }
+ }
+
+ Assert(tuple_width >= 0);
+ target->width = tuple_width;
+
+ return target;
+}
+
+/*
+ * relation_byte_size
+ * Estimate the storage space in bytes for a given number of tuples
+ * of a given width (size in bytes).
+ */
+static double
+relation_byte_size(double tuples, int width)
+{
+ return tuples * (MAXALIGN(width) + MAXALIGN(SizeofHeapTupleHeader));
+}
+
+/*
+ * page_size
+ * Returns an estimate of the number of pages covered by a given
+ * number of tuples of a given width (size in bytes).
+ */
+static double
+page_size(double tuples, int width)
+{
+ return ceil(relation_byte_size(tuples, width) / BLCKSZ);
+}
+
+/*
+ * Estimate the fraction of the work that each worker will do given the
+ * number of workers budgeted for the path.
+ */
+static double
+get_parallel_divisor(Path *path)
+{
+ double parallel_divisor = path->parallel_workers;
+
+ /*
+ * Early experience with parallel query suggests that when there is only
+ * one worker, the leader often makes a very substantial contribution to
+ * executing the parallel portion of the plan, but as more workers are
+ * added, it does less and less, because it's busy reading tuples from the
+ * workers and doing whatever non-parallel post-processing is needed. By
+ * the time we reach 4 workers, the leader no longer makes a meaningful
+ * contribution. Thus, for now, estimate that the leader spends 30% of
+ * its time servicing each worker, and the remainder executing the
+ * parallel plan.
+ */
+ if (parallel_leader_participation)
+ {
+ double leader_contribution;
+
+ leader_contribution = 1.0 - (0.3 * path->parallel_workers);
+ if (leader_contribution > 0)
+ parallel_divisor += leader_contribution;
+ }
+
+ return parallel_divisor;
+}
+
+/*
+ * compute_bitmap_pages
+ *
+ * compute number of pages fetched from heap in bitmap heap scan.
+ */
+double
+compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
+ int loop_count, Cost *cost, double *tuple)
+{
+ Cost indexTotalCost;
+ Selectivity indexSelectivity;
+ double T;
+ double pages_fetched;
+ double tuples_fetched;
+ double heap_pages;
+ long maxentries;
+
+ /*
+ * Fetch total cost of obtaining the bitmap, as well as its total
+ * selectivity.
+ */
+ cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
+
+ /*
+ * Estimate number of main-table pages fetched.
+ */
+ tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
+
+ T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
+
+ /*
+ * For a single scan, the number of heap pages that need to be fetched is
+ * the same as the Mackert and Lohman formula for the case T <= b (ie, no
+ * re-reads needed).
+ */
+ pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+
+ /*
+ * Calculate the number of pages fetched from the heap. Then based on
+ * current work_mem estimate get the estimated maxentries in the bitmap.
+ * (Note that we always do this calculation based on the number of pages
+ * that would be fetched in a single iteration, even if loop_count > 1.
+ * That's correct, because only that number of entries will be stored in
+ * the bitmap at one time.)
+ */
+ heap_pages = Min(pages_fetched, baserel->pages);
+ maxentries = tbm_calculate_entries(work_mem * 1024L);
+
+ if (loop_count > 1)
+ {
+ /*
+ * For repeated bitmap scans, scale up the number of tuples fetched in
+ * the Mackert and Lohman formula by the number of scans, so that we
+ * estimate the number of pages fetched by all the scans. Then
+ * pro-rate for one scan.
+ */
+ pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
+ baserel->pages,
+ get_indexpath_pages(bitmapqual),
+ root);
+ pages_fetched /= loop_count;
+ }
+
+ if (pages_fetched >= T)
+ pages_fetched = T;
+ else
+ pages_fetched = ceil(pages_fetched);
+
+ if (maxentries < heap_pages)
+ {
+ double exact_pages;
+ double lossy_pages;
+
+ /*
+ * Crude approximation of the number of lossy pages. Because of the
+ * way tbm_lossify() is coded, the number of lossy pages increases
+ * very sharply as soon as we run short of memory; this formula has
+ * that property and seems to perform adequately in testing, but it's
+ * possible we could do better somehow.
+ */
+ lossy_pages = Max(0, heap_pages - maxentries / 2);
+ exact_pages = heap_pages - lossy_pages;
+
+ /*
+ * If there are lossy pages then recompute the number of tuples
+ * processed by the bitmap heap node. We assume here that the chance
+ * of a given tuple coming from an exact page is the same as the
+ * chance that a given page is exact. This might not be true, but
+ * it's not clear how we can do any better.
+ */
+ if (lossy_pages > 0)
+ tuples_fetched =
+ clamp_row_est(indexSelectivity *
+ (exact_pages / heap_pages) * baserel->tuples +
+ (lossy_pages / heap_pages) * baserel->tuples);
+ }
+
+ if (cost)
+ *cost = indexTotalCost;
+ if (tuple)
+ *tuple = tuples_fetched;
+
+ return pages_fetched;
+}
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
new file mode 100644
index 0000000..9f39f46
--- /dev/null
+++ b/src/backend/optimizer/path/equivclass.c
@@ -0,0 +1,3226 @@
+/*-------------------------------------------------------------------------
+ *
+ * equivclass.c
+ * Routines for managing EquivalenceClasses
+ *
+ * See src/backend/optimizer/README for discussion of EquivalenceClasses.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/path/equivclass.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/stratnum.h"
+#include "catalog/pg_type.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/appendinfo.h"
+#include "optimizer/clauses.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
+#include "utils/lsyscache.h"
+
+
+static EquivalenceMember *add_eq_member(EquivalenceClass *ec,
+ Expr *expr, Relids relids, Relids nullable_relids,
+ bool is_child, Oid datatype);
+static bool is_exprlist_member(Expr *node, List *exprs);
+static void generate_base_implied_equalities_const(PlannerInfo *root,
+ EquivalenceClass *ec);
+static void generate_base_implied_equalities_no_const(PlannerInfo *root,
+ EquivalenceClass *ec);
+static void generate_base_implied_equalities_broken(PlannerInfo *root,
+ EquivalenceClass *ec);
+static List *generate_join_implied_equalities_normal(PlannerInfo *root,
+ EquivalenceClass *ec,
+ Relids join_relids,
+ Relids outer_relids,
+ Relids inner_relids);
+static List *generate_join_implied_equalities_broken(PlannerInfo *root,
+ EquivalenceClass *ec,
+ Relids nominal_join_relids,
+ Relids outer_relids,
+ Relids nominal_inner_relids,
+ RelOptInfo *inner_rel);
+static Oid select_equality_operator(EquivalenceClass *ec,
+ Oid lefttype, Oid righttype);
+static RestrictInfo *create_join_clause(PlannerInfo *root,
+ EquivalenceClass *ec, Oid opno,
+ EquivalenceMember *leftem,
+ EquivalenceMember *rightem,
+ EquivalenceClass *parent_ec);
+static bool reconsider_outer_join_clause(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ bool outer_on_left);
+static bool reconsider_full_join_clause(PlannerInfo *root,
+ RestrictInfo *rinfo);
+static Bitmapset *get_eclass_indexes_for_relids(PlannerInfo *root,
+ Relids relids);
+static Bitmapset *get_common_eclass_indexes(PlannerInfo *root, Relids relids1,
+ Relids relids2);
+
+
+/*
+ * process_equivalence
+ * The given clause has a mergejoinable operator and can be applied without
+ * any delay by an outer join, so its two sides can be considered equal
+ * anywhere they are both computable; moreover that equality can be
+ * extended transitively. Record this knowledge in the EquivalenceClass
+ * data structure, if applicable. Returns true if successful, false if not
+ * (in which case caller should treat the clause as ordinary, not an
+ * equivalence).
+ *
+ * In some cases, although we cannot convert a clause into EquivalenceClass
+ * knowledge, we can still modify it to a more useful form than the original.
+ * Then, *p_restrictinfo will be replaced by a new RestrictInfo, which is what
+ * the caller should use for further processing.
+ *
+ * If below_outer_join is true, then the clause was found below the nullable
+ * side of an outer join, so its sides might validly be both NULL rather than
+ * strictly equal. We can still deduce equalities in such cases, but we take
+ * care to mark an EquivalenceClass if it came from any such clauses. Also,
+ * we have to check that both sides are either pseudo-constants or strict
+ * functions of Vars, else they might not both go to NULL above the outer
+ * join. (This is the main reason why we need a failure return. It's more
+ * convenient to check this case here than at the call sites...)
+ *
+ * We also reject proposed equivalence clauses if they contain leaky functions
+ * and have security_level above zero. The EC evaluation rules require us to
+ * apply certain tests at certain joining levels, and we can't tolerate
+ * delaying any test on security_level grounds. By rejecting candidate clauses
+ * that might require security delays, we ensure it's safe to apply an EC
+ * clause as soon as it's supposed to be applied.
+ *
+ * On success return, we have also initialized the clause's left_ec/right_ec
+ * fields to point to the EquivalenceClass representing it. This saves lookup
+ * effort later.
+ *
+ * Note: constructing merged EquivalenceClasses is a standard UNION-FIND
+ * problem, for which there exist better data structures than simple lists.
+ * If this code ever proves to be a bottleneck then it could be sped up ---
+ * but for now, simple is beautiful.
+ *
+ * Note: this is only called during planner startup, not during GEQO
+ * exploration, so we need not worry about whether we're in the right
+ * memory context.
+ */
+bool
+process_equivalence(PlannerInfo *root,
+ RestrictInfo **p_restrictinfo,
+ bool below_outer_join)
+{
+ RestrictInfo *restrictinfo = *p_restrictinfo;
+ Expr *clause = restrictinfo->clause;
+ Oid opno,
+ collation,
+ item1_type,
+ item2_type;
+ Expr *item1;
+ Expr *item2;
+ Relids item1_relids,
+ item2_relids,
+ item1_nullable_relids,
+ item2_nullable_relids;
+ List *opfamilies;
+ EquivalenceClass *ec1,
+ *ec2;
+ EquivalenceMember *em1,
+ *em2;
+ ListCell *lc1;
+ int ec2_idx;
+
+ /* Should not already be marked as having generated an eclass */
+ Assert(restrictinfo->left_ec == NULL);
+ Assert(restrictinfo->right_ec == NULL);
+
+ /* Reject if it is potentially postponable by security considerations */
+ if (restrictinfo->security_level > 0 && !restrictinfo->leakproof)
+ return false;
+
+ /* Extract info from given clause */
+ Assert(is_opclause(clause));
+ opno = ((OpExpr *) clause)->opno;
+ collation = ((OpExpr *) clause)->inputcollid;
+ item1 = (Expr *) get_leftop(clause);
+ item2 = (Expr *) get_rightop(clause);
+ item1_relids = restrictinfo->left_relids;
+ item2_relids = restrictinfo->right_relids;
+
+ /*
+ * Ensure both input expressions expose the desired collation (their types
+ * should be OK already); see comments for canonicalize_ec_expression.
+ */
+ item1 = canonicalize_ec_expression(item1,
+ exprType((Node *) item1),
+ collation);
+ item2 = canonicalize_ec_expression(item2,
+ exprType((Node *) item2),
+ collation);
+
+ /*
+ * Clauses of the form X=X cannot be translated into EquivalenceClasses.
+ * We'd either end up with a single-entry EC, losing the knowledge that
+ * the clause was present at all, or else make an EC with duplicate
+ * entries, causing other issues.
+ */
+ if (equal(item1, item2))
+ {
+ /*
+ * If the operator is strict, then the clause can be treated as just
+ * "X IS NOT NULL". (Since we know we are considering a top-level
+ * qual, we can ignore the difference between FALSE and NULL results.)
+ * It's worth making the conversion because we'll typically get a much
+ * better selectivity estimate than we would for X=X.
+ *
+ * If the operator is not strict, we can't be sure what it will do
+ * with NULLs, so don't attempt to optimize it.
+ */
+ set_opfuncid((OpExpr *) clause);
+ if (func_strict(((OpExpr *) clause)->opfuncid))
+ {
+ NullTest *ntest = makeNode(NullTest);
+
+ ntest->arg = item1;
+ ntest->nulltesttype = IS_NOT_NULL;
+ ntest->argisrow = false; /* correct even if composite arg */
+ ntest->location = -1;
+
+ *p_restrictinfo =
+ make_restrictinfo(root,
+ (Expr *) ntest,
+ restrictinfo->is_pushed_down,
+ restrictinfo->outerjoin_delayed,
+ restrictinfo->pseudoconstant,
+ restrictinfo->security_level,
+ NULL,
+ restrictinfo->outer_relids,
+ restrictinfo->nullable_relids);
+ }
+ return false;
+ }
+
+ /*
+ * If below outer join, check for strictness, else reject.
+ */
+ if (below_outer_join)
+ {
+ if (!bms_is_empty(item1_relids) &&
+ contain_nonstrict_functions((Node *) item1))
+ return false; /* LHS is non-strict but not constant */
+ if (!bms_is_empty(item2_relids) &&
+ contain_nonstrict_functions((Node *) item2))
+ return false; /* RHS is non-strict but not constant */
+ }
+
+ /* Calculate nullable-relid sets for each side of the clause */
+ item1_nullable_relids = bms_intersect(item1_relids,
+ restrictinfo->nullable_relids);
+ item2_nullable_relids = bms_intersect(item2_relids,
+ restrictinfo->nullable_relids);
+
+ /*
+ * We use the declared input types of the operator, not exprType() of the
+ * inputs, as the nominal datatypes for opfamily lookup. This presumes
+ * that btree operators are always registered with amoplefttype and
+ * amoprighttype equal to their declared input types. We will need this
+ * info anyway to build EquivalenceMember nodes, and by extracting it now
+ * we can use type comparisons to short-circuit some equal() tests.
+ */
+ op_input_types(opno, &item1_type, &item2_type);
+
+ opfamilies = restrictinfo->mergeopfamilies;
+
+ /*
+ * Sweep through the existing EquivalenceClasses looking for matches to
+ * item1 and item2. These are the possible outcomes:
+ *
+ * 1. We find both in the same EC. The equivalence is already known, so
+ * there's nothing to do.
+ *
+ * 2. We find both in different ECs. Merge the two ECs together.
+ *
+ * 3. We find just one. Add the other to its EC.
+ *
+ * 4. We find neither. Make a new, two-entry EC.
+ *
+ * Note: since all ECs are built through this process or the similar
+ * search in get_eclass_for_sort_expr(), it's impossible that we'd match
+ * an item in more than one existing nonvolatile EC. So it's okay to stop
+ * at the first match.
+ */
+ ec1 = ec2 = NULL;
+ em1 = em2 = NULL;
+ ec2_idx = -1;
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ ListCell *lc2;
+
+ /* Never match to a volatile EC */
+ if (cur_ec->ec_has_volatile)
+ continue;
+
+ /*
+ * The collation has to match; check this first since it's cheaper
+ * than the opfamily comparison.
+ */
+ if (collation != cur_ec->ec_collation)
+ continue;
+
+ /*
+ * A "match" requires matching sets of btree opfamilies. Use of
+ * equal() for this test has implications discussed in the comments
+ * for get_mergejoin_opfamilies().
+ */
+ if (!equal(opfamilies, cur_ec->ec_opfamilies))
+ continue;
+
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ Assert(!cur_em->em_is_child); /* no children yet */
+
+ /*
+ * If below an outer join, don't match constants: they're not as
+ * constant as they look.
+ */
+ if ((below_outer_join || cur_ec->ec_below_outer_join) &&
+ cur_em->em_is_const)
+ continue;
+
+ if (!ec1 &&
+ item1_type == cur_em->em_datatype &&
+ equal(item1, cur_em->em_expr))
+ {
+ ec1 = cur_ec;
+ em1 = cur_em;
+ if (ec2)
+ break;
+ }
+
+ if (!ec2 &&
+ item2_type == cur_em->em_datatype &&
+ equal(item2, cur_em->em_expr))
+ {
+ ec2 = cur_ec;
+ ec2_idx = foreach_current_index(lc1);
+ em2 = cur_em;
+ if (ec1)
+ break;
+ }
+ }
+
+ if (ec1 && ec2)
+ break;
+ }
+
+ /* Sweep finished, what did we find? */
+
+ if (ec1 && ec2)
+ {
+ /* If case 1, nothing to do, except add to sources */
+ if (ec1 == ec2)
+ {
+ ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
+ ec1->ec_below_outer_join |= below_outer_join;
+ ec1->ec_min_security = Min(ec1->ec_min_security,
+ restrictinfo->security_level);
+ ec1->ec_max_security = Max(ec1->ec_max_security,
+ restrictinfo->security_level);
+ /* mark the RI as associated with this eclass */
+ restrictinfo->left_ec = ec1;
+ restrictinfo->right_ec = ec1;
+ /* mark the RI as usable with this pair of EMs */
+ restrictinfo->left_em = em1;
+ restrictinfo->right_em = em2;
+ return true;
+ }
+
+ /*
+ * Case 2: need to merge ec1 and ec2. This should never happen after
+ * the ECs have reached canonical state; otherwise, pathkeys could be
+ * rendered non-canonical by the merge, and relation eclass indexes
+ * would get broken by removal of an eq_classes list entry.
+ */
+ if (root->ec_merging_done)
+ elog(ERROR, "too late to merge equivalence classes");
+
+ /*
+ * We add ec2's items to ec1, then set ec2's ec_merged link to point
+ * to ec1 and remove ec2 from the eq_classes list. We cannot simply
+ * delete ec2 because that could leave dangling pointers in existing
+ * PathKeys. We leave it behind with a link so that the merged EC can
+ * be found.
+ */
+ ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members);
+ ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources);
+ ec1->ec_derives = list_concat(ec1->ec_derives, ec2->ec_derives);
+ ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids);
+ ec1->ec_has_const |= ec2->ec_has_const;
+ /* can't need to set has_volatile */
+ ec1->ec_below_outer_join |= ec2->ec_below_outer_join;
+ ec1->ec_min_security = Min(ec1->ec_min_security,
+ ec2->ec_min_security);
+ ec1->ec_max_security = Max(ec1->ec_max_security,
+ ec2->ec_max_security);
+ ec2->ec_merged = ec1;
+ root->eq_classes = list_delete_nth_cell(root->eq_classes, ec2_idx);
+ /* just to avoid debugging confusion w/ dangling pointers: */
+ ec2->ec_members = NIL;
+ ec2->ec_sources = NIL;
+ ec2->ec_derives = NIL;
+ ec2->ec_relids = NULL;
+ ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
+ ec1->ec_below_outer_join |= below_outer_join;
+ ec1->ec_min_security = Min(ec1->ec_min_security,
+ restrictinfo->security_level);
+ ec1->ec_max_security = Max(ec1->ec_max_security,
+ restrictinfo->security_level);
+ /* mark the RI as associated with this eclass */
+ restrictinfo->left_ec = ec1;
+ restrictinfo->right_ec = ec1;
+ /* mark the RI as usable with this pair of EMs */
+ restrictinfo->left_em = em1;
+ restrictinfo->right_em = em2;
+ }
+ else if (ec1)
+ {
+ /* Case 3: add item2 to ec1 */
+ em2 = add_eq_member(ec1, item2, item2_relids, item2_nullable_relids,
+ false, item2_type);
+ ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
+ ec1->ec_below_outer_join |= below_outer_join;
+ ec1->ec_min_security = Min(ec1->ec_min_security,
+ restrictinfo->security_level);
+ ec1->ec_max_security = Max(ec1->ec_max_security,
+ restrictinfo->security_level);
+ /* mark the RI as associated with this eclass */
+ restrictinfo->left_ec = ec1;
+ restrictinfo->right_ec = ec1;
+ /* mark the RI as usable with this pair of EMs */
+ restrictinfo->left_em = em1;
+ restrictinfo->right_em = em2;
+ }
+ else if (ec2)
+ {
+ /* Case 3: add item1 to ec2 */
+ em1 = add_eq_member(ec2, item1, item1_relids, item1_nullable_relids,
+ false, item1_type);
+ ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo);
+ ec2->ec_below_outer_join |= below_outer_join;
+ ec2->ec_min_security = Min(ec2->ec_min_security,
+ restrictinfo->security_level);
+ ec2->ec_max_security = Max(ec2->ec_max_security,
+ restrictinfo->security_level);
+ /* mark the RI as associated with this eclass */
+ restrictinfo->left_ec = ec2;
+ restrictinfo->right_ec = ec2;
+ /* mark the RI as usable with this pair of EMs */
+ restrictinfo->left_em = em1;
+ restrictinfo->right_em = em2;
+ }
+ else
+ {
+ /* Case 4: make a new, two-entry EC */
+ EquivalenceClass *ec = makeNode(EquivalenceClass);
+
+ ec->ec_opfamilies = opfamilies;
+ ec->ec_collation = collation;
+ ec->ec_members = NIL;
+ ec->ec_sources = list_make1(restrictinfo);
+ ec->ec_derives = NIL;
+ ec->ec_relids = NULL;
+ ec->ec_has_const = false;
+ ec->ec_has_volatile = false;
+ ec->ec_below_outer_join = below_outer_join;
+ ec->ec_broken = false;
+ ec->ec_sortref = 0;
+ ec->ec_min_security = restrictinfo->security_level;
+ ec->ec_max_security = restrictinfo->security_level;
+ ec->ec_merged = NULL;
+ em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids,
+ false, item1_type);
+ em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids,
+ false, item2_type);
+
+ root->eq_classes = lappend(root->eq_classes, ec);
+
+ /* mark the RI as associated with this eclass */
+ restrictinfo->left_ec = ec;
+ restrictinfo->right_ec = ec;
+ /* mark the RI as usable with this pair of EMs */
+ restrictinfo->left_em = em1;
+ restrictinfo->right_em = em2;
+ }
+
+ return true;
+}
+
+/*
+ * canonicalize_ec_expression
+ *
+ * This function ensures that the expression exposes the expected type and
+ * collation, so that it will be equal() to other equivalence-class expressions
+ * that it ought to be equal() to.
+ *
+ * The rule for datatypes is that the exposed type should match what it would
+ * be for an input to an operator of the EC's opfamilies; which is usually
+ * the declared input type of the operator, but in the case of polymorphic
+ * operators no relabeling is wanted (compare the behavior of parse_coerce.c).
+ * Expressions coming in from quals will generally have the right type
+ * already, but expressions coming from indexkeys may not (because they are
+ * represented without any explicit relabel in pg_index), and the same problem
+ * occurs for sort expressions (because the parser is likewise cavalier about
+ * putting relabels on them). Such cases will be binary-compatible with the
+ * real operators, so adding a RelabelType is sufficient.
+ *
+ * Also, the expression's exposed collation must match the EC's collation.
+ * This is important because in comparisons like "foo < bar COLLATE baz",
+ * only one of the expressions has the correct exposed collation as we receive
+ * it from the parser. Forcing both of them to have it ensures that all
+ * variant spellings of such a construct behave the same. Again, we can
+ * stick on a RelabelType to force the right exposed collation. (It might
+ * work to not label the collation at all in EC members, but this is risky
+ * since some parts of the system expect exprCollation() to deliver the
+ * right answer for a sort key.)
+ */
+Expr *
+canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation)
+{
+ Oid expr_type = exprType((Node *) expr);
+
+ /*
+ * For a polymorphic-input-type opclass, just keep the same exposed type.
+ * RECORD opclasses work like polymorphic-type ones for this purpose.
+ */
+ if (IsPolymorphicType(req_type) || req_type == RECORDOID)
+ req_type = expr_type;
+
+ /*
+ * No work if the expression exposes the right type/collation already.
+ */
+ if (expr_type != req_type ||
+ exprCollation((Node *) expr) != req_collation)
+ {
+ /*
+ * If we have to change the type of the expression, set typmod to -1,
+ * since the new type may not have the same typmod interpretation.
+ * When we only have to change collation, preserve the exposed typmod.
+ */
+ int32 req_typmod;
+
+ if (expr_type != req_type)
+ req_typmod = -1;
+ else
+ req_typmod = exprTypmod((Node *) expr);
+
+ /*
+ * Use applyRelabelType so that we preserve const-flatness. This is
+ * important since eval_const_expressions has already been applied.
+ */
+ expr = (Expr *) applyRelabelType((Node *) expr,
+ req_type, req_typmod, req_collation,
+ COERCE_IMPLICIT_CAST, -1, false);
+ }
+
+ return expr;
+}
+
+/*
+ * add_eq_member - build a new EquivalenceMember and add it to an EC
+ */
+static EquivalenceMember *
+add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
+ Relids nullable_relids, bool is_child, Oid datatype)
+{
+ EquivalenceMember *em = makeNode(EquivalenceMember);
+
+ em->em_expr = expr;
+ em->em_relids = relids;
+ em->em_nullable_relids = nullable_relids;
+ em->em_is_const = false;
+ em->em_is_child = is_child;
+ em->em_datatype = datatype;
+
+ if (bms_is_empty(relids))
+ {
+ /*
+ * No Vars, assume it's a pseudoconstant. This is correct for entries
+ * generated from process_equivalence(), because a WHERE clause can't
+ * contain aggregates or SRFs, and non-volatility was checked before
+ * process_equivalence() ever got called. But
+ * get_eclass_for_sort_expr() has to work harder. We put the tests
+ * there not here to save cycles in the equivalence case.
+ */
+ Assert(!is_child);
+ em->em_is_const = true;
+ ec->ec_has_const = true;
+ /* it can't affect ec_relids */
+ }
+ else if (!is_child) /* child members don't add to ec_relids */
+ {
+ ec->ec_relids = bms_add_members(ec->ec_relids, relids);
+ }
+ ec->ec_members = lappend(ec->ec_members, em);
+
+ return em;
+}
+
+
+/*
+ * get_eclass_for_sort_expr
+ * Given an expression and opfamily/collation info, find an existing
+ * equivalence class it is a member of; if none, optionally build a new
+ * single-member EquivalenceClass for it.
+ *
+ * expr is the expression, and nullable_relids is the set of base relids
+ * that are potentially nullable below it. We actually only care about
+ * the set of such relids that are used in the expression; but for caller
+ * convenience, we perform that intersection step here. The caller need
+ * only be sure that nullable_relids doesn't omit any nullable rels that
+ * might appear in the expr.
+ *
+ * sortref is the SortGroupRef of the originating SortGroupClause, if any,
+ * or zero if not. (It should never be zero if the expression is volatile!)
+ *
+ * If rel is not NULL, it identifies a specific relation we're considering
+ * a path for, and indicates that child EC members for that relation can be
+ * considered. Otherwise child members are ignored. (Note: since child EC
+ * members aren't guaranteed unique, a non-NULL value means that there could
+ * be more than one EC that matches the expression; if so it's order-dependent
+ * which one you get. This is annoying but it only happens in corner cases,
+ * so for now we live with just reporting the first match. See also
+ * generate_implied_equalities_for_column and match_pathkeys_to_index.)
+ *
+ * If create_it is true, we'll build a new EquivalenceClass when there is no
+ * match. If create_it is false, we just return NULL when no match.
+ *
+ * This can be used safely both before and after EquivalenceClass merging;
+ * since it never causes merging it does not invalidate any existing ECs
+ * or PathKeys. However, ECs added after path generation has begun are
+ * of limited usefulness, so usually it's best to create them beforehand.
+ *
+ * Note: opfamilies must be chosen consistently with the way
+ * process_equivalence() would do; that is, generated from a mergejoinable
+ * equality operator. Else we might fail to detect valid equivalences,
+ * generating poor (but not incorrect) plans.
+ */
+EquivalenceClass *
+get_eclass_for_sort_expr(PlannerInfo *root,
+ Expr *expr,
+ Relids nullable_relids,
+ List *opfamilies,
+ Oid opcintype,
+ Oid collation,
+ Index sortref,
+ Relids rel,
+ bool create_it)
+{
+ Relids expr_relids;
+ EquivalenceClass *newec;
+ EquivalenceMember *newem;
+ ListCell *lc1;
+ MemoryContext oldcontext;
+
+ /*
+ * Ensure the expression exposes the correct type and collation.
+ */
+ expr = canonicalize_ec_expression(expr, opcintype, collation);
+
+ /*
+ * Scan through the existing EquivalenceClasses for a match
+ */
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ ListCell *lc2;
+
+ /*
+ * Never match to a volatile EC, except when we are looking at another
+ * reference to the same volatile SortGroupClause.
+ */
+ if (cur_ec->ec_has_volatile &&
+ (sortref == 0 || sortref != cur_ec->ec_sortref))
+ continue;
+
+ if (collation != cur_ec->ec_collation)
+ continue;
+ if (!equal(opfamilies, cur_ec->ec_opfamilies))
+ continue;
+
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ /*
+ * Ignore child members unless they match the request.
+ */
+ if (cur_em->em_is_child &&
+ !bms_equal(cur_em->em_relids, rel))
+ continue;
+
+ /*
+ * If below an outer join, don't match constants: they're not as
+ * constant as they look.
+ */
+ if (cur_ec->ec_below_outer_join &&
+ cur_em->em_is_const)
+ continue;
+
+ if (opcintype == cur_em->em_datatype &&
+ equal(expr, cur_em->em_expr))
+ return cur_ec; /* Match! */
+ }
+ }
+
+ /* No match; does caller want a NULL result? */
+ if (!create_it)
+ return NULL;
+
+ /*
+ * OK, build a new single-member EC
+ *
+ * Here, we must be sure that we construct the EC in the right context.
+ */
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ newec = makeNode(EquivalenceClass);
+ newec->ec_opfamilies = list_copy(opfamilies);
+ newec->ec_collation = collation;
+ newec->ec_members = NIL;
+ newec->ec_sources = NIL;
+ newec->ec_derives = NIL;
+ newec->ec_relids = NULL;
+ newec->ec_has_const = false;
+ newec->ec_has_volatile = contain_volatile_functions((Node *) expr);
+ newec->ec_below_outer_join = false;
+ newec->ec_broken = false;
+ newec->ec_sortref = sortref;
+ newec->ec_min_security = UINT_MAX;
+ newec->ec_max_security = 0;
+ newec->ec_merged = NULL;
+
+ if (newec->ec_has_volatile && sortref == 0) /* should not happen */
+ elog(ERROR, "volatile EquivalenceClass has no sortref");
+
+ /*
+ * Get the precise set of nullable relids appearing in the expression.
+ */
+ expr_relids = pull_varnos(root, (Node *) expr);
+ nullable_relids = bms_intersect(nullable_relids, expr_relids);
+
+ newem = add_eq_member(newec, copyObject(expr), expr_relids,
+ nullable_relids, false, opcintype);
+
+ /*
+ * add_eq_member doesn't check for volatile functions, set-returning
+ * functions, aggregates, or window functions, but such could appear in
+ * sort expressions; so we have to check whether its const-marking was
+ * correct.
+ */
+ if (newec->ec_has_const)
+ {
+ if (newec->ec_has_volatile ||
+ expression_returns_set((Node *) expr) ||
+ contain_agg_clause((Node *) expr) ||
+ contain_window_function((Node *) expr))
+ {
+ newec->ec_has_const = false;
+ newem->em_is_const = false;
+ }
+ }
+
+ root->eq_classes = lappend(root->eq_classes, newec);
+
+ /*
+ * If EC merging is already complete, we have to mop up by adding the new
+ * EC to the eclass_indexes of the relation(s) mentioned in it.
+ */
+ if (root->ec_merging_done)
+ {
+ int ec_index = list_length(root->eq_classes) - 1;
+ int i = -1;
+
+ while ((i = bms_next_member(newec->ec_relids, i)) > 0)
+ {
+ RelOptInfo *rel = root->simple_rel_array[i];
+
+ Assert(rel->reloptkind == RELOPT_BASEREL ||
+ rel->reloptkind == RELOPT_DEADREL);
+
+ rel->eclass_indexes = bms_add_member(rel->eclass_indexes,
+ ec_index);
+ }
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return newec;
+}
+
+/*
+ * find_ec_member_matching_expr
+ * Locate an EquivalenceClass member matching the given expr, if any;
+ * return NULL if no match.
+ *
+ * "Matching" is defined as "equal after stripping RelabelTypes".
+ * This is used for identifying sort expressions, and we need to allow
+ * binary-compatible relabeling for some cases involving binary-compatible
+ * sort operators.
+ *
+ * Child EC members are ignored unless they belong to given 'relids'.
+ */
+EquivalenceMember *
+find_ec_member_matching_expr(EquivalenceClass *ec,
+ Expr *expr,
+ Relids relids)
+{
+ ListCell *lc;
+
+ /* We ignore binary-compatible relabeling on both ends */
+ while (expr && IsA(expr, RelabelType))
+ expr = ((RelabelType *) expr)->arg;
+
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
+ Expr *emexpr;
+
+ /*
+ * We shouldn't be trying to sort by an equivalence class that
+ * contains a constant, so no need to consider such cases any further.
+ */
+ if (em->em_is_const)
+ continue;
+
+ /*
+ * Ignore child members unless they belong to the requested rel.
+ */
+ if (em->em_is_child &&
+ !bms_is_subset(em->em_relids, relids))
+ continue;
+
+ /*
+ * Match if same expression (after stripping relabel).
+ */
+ emexpr = em->em_expr;
+ while (emexpr && IsA(emexpr, RelabelType))
+ emexpr = ((RelabelType *) emexpr)->arg;
+
+ if (equal(emexpr, expr))
+ return em;
+ }
+
+ return NULL;
+}
+
+/*
+ * find_computable_ec_member
+ * Locate an EquivalenceClass member that can be computed from the
+ * expressions appearing in "exprs"; return NULL if no match.
+ *
+ * "exprs" can be either a list of bare expression trees, or a list of
+ * TargetEntry nodes. Either way, it should contain Vars and possibly
+ * Aggrefs and WindowFuncs, which are matched to the corresponding elements
+ * of the EquivalenceClass's expressions.
+ *
+ * Unlike find_ec_member_matching_expr, there's no special provision here
+ * for binary-compatible relabeling. This is intentional: if we have to
+ * compute an expression in this way, setrefs.c is going to insist on exact
+ * matches of Vars to the source tlist.
+ *
+ * Child EC members are ignored unless they belong to given 'relids'.
+ * Also, non-parallel-safe expressions are ignored if 'require_parallel_safe'.
+ *
+ * Note: some callers pass root == NULL for notational reasons. This is OK
+ * when require_parallel_safe is false.
+ */
+EquivalenceMember *
+find_computable_ec_member(PlannerInfo *root,
+ EquivalenceClass *ec,
+ List *exprs,
+ Relids relids,
+ bool require_parallel_safe)
+{
+ ListCell *lc;
+
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
+ List *exprvars;
+ ListCell *lc2;
+
+ /*
+ * We shouldn't be trying to sort by an equivalence class that
+ * contains a constant, so no need to consider such cases any further.
+ */
+ if (em->em_is_const)
+ continue;
+
+ /*
+ * Ignore child members unless they belong to the requested rel.
+ */
+ if (em->em_is_child &&
+ !bms_is_subset(em->em_relids, relids))
+ continue;
+
+ /*
+ * Match if all Vars and quasi-Vars are available in "exprs".
+ */
+ exprvars = pull_var_clause((Node *) em->em_expr,
+ PVC_INCLUDE_AGGREGATES |
+ PVC_INCLUDE_WINDOWFUNCS |
+ PVC_INCLUDE_PLACEHOLDERS);
+ foreach(lc2, exprvars)
+ {
+ if (!is_exprlist_member(lfirst(lc2), exprs))
+ break;
+ }
+ list_free(exprvars);
+ if (lc2)
+ continue; /* we hit a non-available Var */
+
+ /*
+ * If requested, reject expressions that are not parallel-safe. We
+ * check this last because it's a rather expensive test.
+ */
+ if (require_parallel_safe &&
+ !is_parallel_safe(root, (Node *) em->em_expr))
+ continue;
+
+ return em; /* found usable expression */
+ }
+
+ return NULL;
+}
+
+/*
+ * is_exprlist_member
+ * Subroutine for find_computable_ec_member: is "node" in "exprs"?
+ *
+ * Per the requirements of that function, "exprs" might or might not have
+ * TargetEntry superstructure.
+ */
+static bool
+is_exprlist_member(Expr *node, List *exprs)
+{
+ ListCell *lc;
+
+ foreach(lc, exprs)
+ {
+ Expr *expr = (Expr *) lfirst(lc);
+
+ if (expr && IsA(expr, TargetEntry))
+ expr = ((TargetEntry *) expr)->expr;
+
+ if (equal(node, expr))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * relation_can_be_sorted_early
+ * Can this relation be sorted on this EC before the final output step?
+ *
+ * To succeed, we must find an EC member that prepare_sort_from_pathkeys knows
+ * how to sort on, given the rel's reltarget as input. There are also a few
+ * additional constraints based on the fact that the desired sort will be done
+ * "early", within the scan/join part of the plan. Also, non-parallel-safe
+ * expressions are ignored if 'require_parallel_safe'.
+ *
+ * At some point we might want to return the identified EquivalenceMember,
+ * but for now, callers only want to know if there is one.
+ */
+bool
+relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel,
+ EquivalenceClass *ec, bool require_parallel_safe)
+{
+ PathTarget *target = rel->reltarget;
+ EquivalenceMember *em;
+ ListCell *lc;
+
+ /*
+ * Reject volatile ECs immediately; such sorts must always be postponed.
+ */
+ if (ec->ec_has_volatile)
+ return false;
+
+ /*
+ * Try to find an EM directly matching some reltarget member.
+ */
+ foreach(lc, target->exprs)
+ {
+ Expr *targetexpr = (Expr *) lfirst(lc);
+
+ em = find_ec_member_matching_expr(ec, targetexpr, rel->relids);
+ if (!em)
+ continue;
+
+ /*
+ * Reject expressions involving set-returning functions, as those
+ * can't be computed early either. (Note: this test and the following
+ * one are effectively checking properties of targetexpr, so there's
+ * no point in asking whether some other EC member would be better.)
+ */
+ if (expression_returns_set((Node *) em->em_expr))
+ continue;
+
+ /*
+ * If requested, reject expressions that are not parallel-safe. We
+ * check this last because it's a rather expensive test.
+ */
+ if (require_parallel_safe &&
+ !is_parallel_safe(root, (Node *) em->em_expr))
+ continue;
+
+ return true;
+ }
+
+ /*
+ * Try to find an expression computable from the reltarget.
+ */
+ em = find_computable_ec_member(root, ec, target->exprs, rel->relids,
+ require_parallel_safe);
+ if (!em)
+ return false;
+
+ /*
+ * Reject expressions involving set-returning functions, as those can't be
+ * computed early either. (There's no point in looking for another EC
+ * member in this case; since SRFs can't appear in WHERE, they cannot
+ * belong to multi-member ECs.)
+ */
+ if (expression_returns_set((Node *) em->em_expr))
+ return false;
+
+ return true;
+}
+
+/*
+ * generate_base_implied_equalities
+ * Generate any restriction clauses that we can deduce from equivalence
+ * classes.
+ *
+ * When an EC contains pseudoconstants, our strategy is to generate
+ * "member = const1" clauses where const1 is the first constant member, for
+ * every other member (including other constants). If we are able to do this
+ * then we don't need any "var = var" comparisons because we've successfully
+ * constrained all the vars at their points of creation. If we fail to
+ * generate any of these clauses due to lack of cross-type operators, we fall
+ * back to the "ec_broken" strategy described below. (XXX if there are
+ * multiple constants of different types, it's possible that we might succeed
+ * in forming all the required clauses if we started from a different const
+ * member; but this seems a sufficiently hokey corner case to not be worth
+ * spending lots of cycles on.)
+ *
+ * For ECs that contain no pseudoconstants, we generate derived clauses
+ * "member1 = member2" for each pair of members belonging to the same base
+ * relation (actually, if there are more than two for the same base relation,
+ * we only need enough clauses to link each to each other). This provides
+ * the base case for the recursion: each row emitted by a base relation scan
+ * will constrain all computable members of the EC to be equal. As each
+ * join path is formed, we'll add additional derived clauses on-the-fly
+ * to maintain this invariant (see generate_join_implied_equalities).
+ *
+ * If the opfamilies used by the EC do not provide complete sets of cross-type
+ * equality operators, it is possible that we will fail to generate a clause
+ * that must be generated to maintain the invariant. (An example: given
+ * "WHERE a.x = b.y AND b.y = a.z", the scheme breaks down if we cannot
+ * generate "a.x = a.z" as a restriction clause for A.) In this case we mark
+ * the EC "ec_broken" and fall back to regurgitating its original source
+ * RestrictInfos at appropriate times. We do not try to retract any derived
+ * clauses already generated from the broken EC, so the resulting plan could
+ * be poor due to bad selectivity estimates caused by redundant clauses. But
+ * the correct solution to that is to fix the opfamilies ...
+ *
+ * Equality clauses derived by this function are passed off to
+ * process_implied_equality (in plan/initsplan.c) to be inserted into the
+ * restrictinfo datastructures. Note that this must be called after initial
+ * scanning of the quals and before Path construction begins.
+ *
+ * We make no attempt to avoid generating duplicate RestrictInfos here: we
+ * don't search ec_sources or ec_derives for matches. It doesn't really
+ * seem worth the trouble to do so.
+ */
+void
+generate_base_implied_equalities(PlannerInfo *root)
+{
+ int ec_index;
+ ListCell *lc;
+
+ /*
+ * At this point, we're done absorbing knowledge of equivalences in the
+ * query, so no further EC merging should happen, and ECs remaining in the
+ * eq_classes list can be considered canonical. (But note that it's still
+ * possible for new single-member ECs to be added through
+ * get_eclass_for_sort_expr().)
+ */
+ root->ec_merging_done = true;
+
+ ec_index = 0;
+ foreach(lc, root->eq_classes)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc);
+ bool can_generate_joinclause = false;
+ int i;
+
+ Assert(ec->ec_merged == NULL); /* else shouldn't be in list */
+ Assert(!ec->ec_broken); /* not yet anyway... */
+
+ /*
+ * Generate implied equalities that are restriction clauses.
+ * Single-member ECs won't generate any deductions, either here or at
+ * the join level.
+ */
+ if (list_length(ec->ec_members) > 1)
+ {
+ if (ec->ec_has_const)
+ generate_base_implied_equalities_const(root, ec);
+ else
+ generate_base_implied_equalities_no_const(root, ec);
+
+ /* Recover if we failed to generate required derived clauses */
+ if (ec->ec_broken)
+ generate_base_implied_equalities_broken(root, ec);
+
+ /* Detect whether this EC might generate join clauses */
+ can_generate_joinclause =
+ (bms_membership(ec->ec_relids) == BMS_MULTIPLE);
+ }
+
+ /*
+ * Mark the base rels cited in each eclass (which should all exist by
+ * now) with the eq_classes indexes of all eclasses mentioning them.
+ * This will let us avoid searching in subsequent lookups. While
+ * we're at it, we can mark base rels that have pending eclass joins;
+ * this is a cheap version of has_relevant_eclass_joinclause().
+ */
+ i = -1;
+ while ((i = bms_next_member(ec->ec_relids, i)) > 0)
+ {
+ RelOptInfo *rel = root->simple_rel_array[i];
+
+ Assert(rel->reloptkind == RELOPT_BASEREL);
+
+ rel->eclass_indexes = bms_add_member(rel->eclass_indexes,
+ ec_index);
+
+ if (can_generate_joinclause)
+ rel->has_eclass_joins = true;
+ }
+
+ ec_index++;
+ }
+}
+
+/*
+ * generate_base_implied_equalities when EC contains pseudoconstant(s)
+ */
+static void
+generate_base_implied_equalities_const(PlannerInfo *root,
+ EquivalenceClass *ec)
+{
+ EquivalenceMember *const_em = NULL;
+ ListCell *lc;
+
+ /*
+ * In the trivial case where we just had one "var = const" clause, push
+ * the original clause back into the main planner machinery. There is
+ * nothing to be gained by doing it differently, and we save the effort to
+ * re-build and re-analyze an equality clause that will be exactly
+ * equivalent to the old one.
+ */
+ if (list_length(ec->ec_members) == 2 &&
+ list_length(ec->ec_sources) == 1)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) linitial(ec->ec_sources);
+
+ if (bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE)
+ {
+ distribute_restrictinfo_to_rels(root, restrictinfo);
+ return;
+ }
+ }
+
+ /*
+ * Find the constant member to use. We prefer an actual constant to
+ * pseudo-constants (such as Params), because the constraint exclusion
+ * machinery might be able to exclude relations on the basis of generated
+ * "var = const" equalities, but "var = param" won't work for that.
+ */
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
+
+ if (cur_em->em_is_const)
+ {
+ const_em = cur_em;
+ if (IsA(cur_em->em_expr, Const))
+ break;
+ }
+ }
+ Assert(const_em != NULL);
+
+ /* Generate a derived equality against each other member */
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
+ Oid eq_op;
+ RestrictInfo *rinfo;
+
+ Assert(!cur_em->em_is_child); /* no children yet */
+ if (cur_em == const_em)
+ continue;
+ eq_op = select_equality_operator(ec,
+ cur_em->em_datatype,
+ const_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ {
+ /* failed... */
+ ec->ec_broken = true;
+ break;
+ }
+ rinfo = process_implied_equality(root, eq_op, ec->ec_collation,
+ cur_em->em_expr, const_em->em_expr,
+ bms_copy(ec->ec_relids),
+ bms_union(cur_em->em_nullable_relids,
+ const_em->em_nullable_relids),
+ ec->ec_min_security,
+ ec->ec_below_outer_join,
+ cur_em->em_is_const);
+
+ /*
+ * If the clause didn't degenerate to a constant, fill in the correct
+ * markings for a mergejoinable clause, and save it in ec_derives. (We
+ * will not re-use such clauses directly, but selectivity estimation
+ * may consult the list later. Note that this use of ec_derives does
+ * not overlap with its use for join clauses, since we never generate
+ * join clauses from an ec_has_const eclass.)
+ */
+ if (rinfo && rinfo->mergeopfamilies)
+ {
+ /* it's not redundant, so don't set parent_ec */
+ rinfo->left_ec = rinfo->right_ec = ec;
+ rinfo->left_em = cur_em;
+ rinfo->right_em = const_em;
+ ec->ec_derives = lappend(ec->ec_derives, rinfo);
+ }
+ }
+}
+
+/*
+ * generate_base_implied_equalities when EC contains no pseudoconstants
+ */
+static void
+generate_base_implied_equalities_no_const(PlannerInfo *root,
+ EquivalenceClass *ec)
+{
+ EquivalenceMember **prev_ems;
+ ListCell *lc;
+
+ /*
+ * We scan the EC members once and track the last-seen member for each
+ * base relation. When we see another member of the same base relation,
+ * we generate "prev_em = cur_em". This results in the minimum number of
+ * derived clauses, but it's possible that it will fail when a different
+ * ordering would succeed. XXX FIXME: use a UNION-FIND algorithm similar
+ * to the way we build merged ECs. (Use a list-of-lists for each rel.)
+ */
+ prev_ems = (EquivalenceMember **)
+ palloc0(root->simple_rel_array_size * sizeof(EquivalenceMember *));
+
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
+ int relid;
+
+ Assert(!cur_em->em_is_child); /* no children yet */
+ if (!bms_get_singleton_member(cur_em->em_relids, &relid))
+ continue;
+ Assert(relid < root->simple_rel_array_size);
+
+ if (prev_ems[relid] != NULL)
+ {
+ EquivalenceMember *prev_em = prev_ems[relid];
+ Oid eq_op;
+ RestrictInfo *rinfo;
+
+ eq_op = select_equality_operator(ec,
+ prev_em->em_datatype,
+ cur_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ {
+ /* failed... */
+ ec->ec_broken = true;
+ break;
+ }
+ rinfo = process_implied_equality(root, eq_op, ec->ec_collation,
+ prev_em->em_expr, cur_em->em_expr,
+ bms_copy(ec->ec_relids),
+ bms_union(prev_em->em_nullable_relids,
+ cur_em->em_nullable_relids),
+ ec->ec_min_security,
+ ec->ec_below_outer_join,
+ false);
+
+ /*
+ * If the clause didn't degenerate to a constant, fill in the
+ * correct markings for a mergejoinable clause. We don't put it
+ * in ec_derives however; we don't currently need to re-find such
+ * clauses, and we don't want to clutter that list with non-join
+ * clauses.
+ */
+ if (rinfo && rinfo->mergeopfamilies)
+ {
+ /* it's not redundant, so don't set parent_ec */
+ rinfo->left_ec = rinfo->right_ec = ec;
+ rinfo->left_em = prev_em;
+ rinfo->right_em = cur_em;
+ }
+ }
+ prev_ems[relid] = cur_em;
+ }
+
+ pfree(prev_ems);
+
+ /*
+ * We also have to make sure that all the Vars used in the member clauses
+ * will be available at any join node we might try to reference them at.
+ * For the moment we force all the Vars to be available at all join nodes
+ * for this eclass. Perhaps this could be improved by doing some
+ * pre-analysis of which members we prefer to join, but it's no worse than
+ * what happened in the pre-8.3 code.
+ */
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
+ List *vars = pull_var_clause((Node *) cur_em->em_expr,
+ PVC_RECURSE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
+ PVC_INCLUDE_PLACEHOLDERS);
+
+ add_vars_to_targetlist(root, vars, ec->ec_relids, false);
+ list_free(vars);
+ }
+}
+
+/*
+ * generate_base_implied_equalities cleanup after failure
+ *
+ * What we must do here is push any zero- or one-relation source RestrictInfos
+ * of the EC back into the main restrictinfo datastructures. Multi-relation
+ * clauses will be regurgitated later by generate_join_implied_equalities().
+ * (We do it this way to maintain continuity with the case that ec_broken
+ * becomes set only after we've gone up a join level or two.) However, for
+ * an EC that contains constants, we can adopt a simpler strategy and just
+ * throw back all the source RestrictInfos immediately; that works because
+ * we know that such an EC can't become broken later. (This rule justifies
+ * ignoring ec_has_const ECs in generate_join_implied_equalities, even when
+ * they are broken.)
+ */
+static void
+generate_base_implied_equalities_broken(PlannerInfo *root,
+ EquivalenceClass *ec)
+{
+ ListCell *lc;
+
+ foreach(lc, ec->ec_sources)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
+
+ if (ec->ec_has_const ||
+ bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE)
+ distribute_restrictinfo_to_rels(root, restrictinfo);
+ }
+}
+
+
+/*
+ * generate_join_implied_equalities
+ * Generate any join clauses that we can deduce from equivalence classes.
+ *
+ * At a join node, we must enforce restriction clauses sufficient to ensure
+ * that all equivalence-class members computable at that node are equal.
+ * Since the set of clauses to enforce can vary depending on which subset
+ * relations are the inputs, we have to compute this afresh for each join
+ * relation pair. Hence a fresh List of RestrictInfo nodes is built and
+ * passed back on each call.
+ *
+ * In addition to its use at join nodes, this can be applied to generate
+ * eclass-based join clauses for use in a parameterized scan of a base rel.
+ * The reason for the asymmetry of specifying the inner rel as a RelOptInfo
+ * and the outer rel by Relids is that this usage occurs before we have
+ * built any join RelOptInfos.
+ *
+ * An annoying special case for parameterized scans is that the inner rel can
+ * be an appendrel child (an "other rel"). In this case we must generate
+ * appropriate clauses using child EC members. add_child_rel_equivalences
+ * must already have been done for the child rel.
+ *
+ * The results are sufficient for use in merge, hash, and plain nestloop join
+ * methods. We do not worry here about selecting clauses that are optimal
+ * for use in a parameterized indexscan. indxpath.c makes its own selections
+ * of clauses to use, and if the ones we pick here are redundant with those,
+ * the extras will be eliminated at createplan time, using the parent_ec
+ * markers that we provide (see is_redundant_derived_clause()).
+ *
+ * Because the same join clauses are likely to be needed multiple times as
+ * we consider different join paths, we avoid generating multiple copies:
+ * whenever we select a particular pair of EquivalenceMembers to join,
+ * we check to see if the pair matches any original clause (in ec_sources)
+ * or previously-built clause (in ec_derives). This saves memory and allows
+ * re-use of information cached in RestrictInfos.
+ *
+ * join_relids should always equal bms_union(outer_relids, inner_rel->relids).
+ * We could simplify this function's API by computing it internally, but in
+ * most current uses, the caller has the value at hand anyway.
+ */
+List *
+generate_join_implied_equalities(PlannerInfo *root,
+ Relids join_relids,
+ Relids outer_relids,
+ RelOptInfo *inner_rel)
+{
+ List *result = NIL;
+ Relids inner_relids = inner_rel->relids;
+ Relids nominal_inner_relids;
+ Relids nominal_join_relids;
+ Bitmapset *matching_ecs;
+ int i;
+
+ /* If inner rel is a child, extra setup work is needed */
+ if (IS_OTHER_REL(inner_rel))
+ {
+ Assert(!bms_is_empty(inner_rel->top_parent_relids));
+
+ /* Fetch relid set for the topmost parent rel */
+ nominal_inner_relids = inner_rel->top_parent_relids;
+ /* ECs will be marked with the parent's relid, not the child's */
+ nominal_join_relids = bms_union(outer_relids, nominal_inner_relids);
+ }
+ else
+ {
+ nominal_inner_relids = inner_relids;
+ nominal_join_relids = join_relids;
+ }
+
+ /*
+ * Get all eclasses that mention both inner and outer sides of the join
+ */
+ matching_ecs = get_common_eclass_indexes(root, nominal_inner_relids,
+ outer_relids);
+
+ i = -1;
+ while ((i = bms_next_member(matching_ecs, i)) >= 0)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
+ List *sublist = NIL;
+
+ /* ECs containing consts do not need any further enforcement */
+ if (ec->ec_has_const)
+ continue;
+
+ /* Single-member ECs won't generate any deductions */
+ if (list_length(ec->ec_members) <= 1)
+ continue;
+
+ /* Sanity check that this eclass overlaps the join */
+ Assert(bms_overlap(ec->ec_relids, nominal_join_relids));
+
+ if (!ec->ec_broken)
+ sublist = generate_join_implied_equalities_normal(root,
+ ec,
+ join_relids,
+ outer_relids,
+ inner_relids);
+
+ /* Recover if we failed to generate required derived clauses */
+ if (ec->ec_broken)
+ sublist = generate_join_implied_equalities_broken(root,
+ ec,
+ nominal_join_relids,
+ outer_relids,
+ nominal_inner_relids,
+ inner_rel);
+
+ result = list_concat(result, sublist);
+ }
+
+ return result;
+}
+
+/*
+ * generate_join_implied_equalities_for_ecs
+ * As above, but consider only the listed ECs.
+ */
+List *
+generate_join_implied_equalities_for_ecs(PlannerInfo *root,
+ List *eclasses,
+ Relids join_relids,
+ Relids outer_relids,
+ RelOptInfo *inner_rel)
+{
+ List *result = NIL;
+ Relids inner_relids = inner_rel->relids;
+ Relids nominal_inner_relids;
+ Relids nominal_join_relids;
+ ListCell *lc;
+
+ /* If inner rel is a child, extra setup work is needed */
+ if (IS_OTHER_REL(inner_rel))
+ {
+ Assert(!bms_is_empty(inner_rel->top_parent_relids));
+
+ /* Fetch relid set for the topmost parent rel */
+ nominal_inner_relids = inner_rel->top_parent_relids;
+ /* ECs will be marked with the parent's relid, not the child's */
+ nominal_join_relids = bms_union(outer_relids, nominal_inner_relids);
+ }
+ else
+ {
+ nominal_inner_relids = inner_relids;
+ nominal_join_relids = join_relids;
+ }
+
+ foreach(lc, eclasses)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc);
+ List *sublist = NIL;
+
+ /* ECs containing consts do not need any further enforcement */
+ if (ec->ec_has_const)
+ continue;
+
+ /* Single-member ECs won't generate any deductions */
+ if (list_length(ec->ec_members) <= 1)
+ continue;
+
+ /* We can quickly ignore any that don't overlap the join, too */
+ if (!bms_overlap(ec->ec_relids, nominal_join_relids))
+ continue;
+
+ if (!ec->ec_broken)
+ sublist = generate_join_implied_equalities_normal(root,
+ ec,
+ join_relids,
+ outer_relids,
+ inner_relids);
+
+ /* Recover if we failed to generate required derived clauses */
+ if (ec->ec_broken)
+ sublist = generate_join_implied_equalities_broken(root,
+ ec,
+ nominal_join_relids,
+ outer_relids,
+ nominal_inner_relids,
+ inner_rel);
+
+ result = list_concat(result, sublist);
+ }
+
+ return result;
+}
+
+/*
+ * generate_join_implied_equalities for a still-valid EC
+ */
+static List *
+generate_join_implied_equalities_normal(PlannerInfo *root,
+ EquivalenceClass *ec,
+ Relids join_relids,
+ Relids outer_relids,
+ Relids inner_relids)
+{
+ List *result = NIL;
+ List *new_members = NIL;
+ List *outer_members = NIL;
+ List *inner_members = NIL;
+ ListCell *lc1;
+
+ /*
+ * First, scan the EC to identify member values that are computable at the
+ * outer rel, at the inner rel, or at this relation but not in either
+ * input rel. The outer-rel members should already be enforced equal,
+ * likewise for the inner-rel members. We'll need to create clauses to
+ * enforce that any newly computable members are all equal to each other
+ * as well as to at least one input member, plus enforce at least one
+ * outer-rel member equal to at least one inner-rel member.
+ */
+ foreach(lc1, ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1);
+
+ /*
+ * We don't need to check explicitly for child EC members. This test
+ * against join_relids will cause them to be ignored except when
+ * considering a child inner rel, which is what we want.
+ */
+ if (!bms_is_subset(cur_em->em_relids, join_relids))
+ continue; /* not computable yet, or wrong child */
+
+ if (bms_is_subset(cur_em->em_relids, outer_relids))
+ outer_members = lappend(outer_members, cur_em);
+ else if (bms_is_subset(cur_em->em_relids, inner_relids))
+ inner_members = lappend(inner_members, cur_em);
+ else
+ new_members = lappend(new_members, cur_em);
+ }
+
+ /*
+ * First, select the joinclause if needed. We can equate any one outer
+ * member to any one inner member, but we have to find a datatype
+ * combination for which an opfamily member operator exists. If we have
+ * choices, we prefer simple Var members (possibly with RelabelType) since
+ * these are (a) cheapest to compute at runtime and (b) most likely to
+ * have useful statistics. Also, prefer operators that are also
+ * hashjoinable.
+ */
+ if (outer_members && inner_members)
+ {
+ EquivalenceMember *best_outer_em = NULL;
+ EquivalenceMember *best_inner_em = NULL;
+ Oid best_eq_op = InvalidOid;
+ int best_score = -1;
+ RestrictInfo *rinfo;
+
+ foreach(lc1, outer_members)
+ {
+ EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc1);
+ ListCell *lc2;
+
+ foreach(lc2, inner_members)
+ {
+ EquivalenceMember *inner_em = (EquivalenceMember *) lfirst(lc2);
+ Oid eq_op;
+ int score;
+
+ eq_op = select_equality_operator(ec,
+ outer_em->em_datatype,
+ inner_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ continue;
+ score = 0;
+ if (IsA(outer_em->em_expr, Var) ||
+ (IsA(outer_em->em_expr, RelabelType) &&
+ IsA(((RelabelType *) outer_em->em_expr)->arg, Var)))
+ score++;
+ if (IsA(inner_em->em_expr, Var) ||
+ (IsA(inner_em->em_expr, RelabelType) &&
+ IsA(((RelabelType *) inner_em->em_expr)->arg, Var)))
+ score++;
+ if (op_hashjoinable(eq_op,
+ exprType((Node *) outer_em->em_expr)))
+ score++;
+ if (score > best_score)
+ {
+ best_outer_em = outer_em;
+ best_inner_em = inner_em;
+ best_eq_op = eq_op;
+ best_score = score;
+ if (best_score == 3)
+ break; /* no need to look further */
+ }
+ }
+ if (best_score == 3)
+ break; /* no need to look further */
+ }
+ if (best_score < 0)
+ {
+ /* failed... */
+ ec->ec_broken = true;
+ return NIL;
+ }
+
+ /*
+ * Create clause, setting parent_ec to mark it as redundant with other
+ * joinclauses
+ */
+ rinfo = create_join_clause(root, ec, best_eq_op,
+ best_outer_em, best_inner_em,
+ ec);
+
+ result = lappend(result, rinfo);
+ }
+
+ /*
+ * Now deal with building restrictions for any expressions that involve
+ * Vars from both sides of the join. We have to equate all of these to
+ * each other as well as to at least one old member (if any).
+ *
+ * XXX as in generate_base_implied_equalities_no_const, we could be a lot
+ * smarter here to avoid unnecessary failures in cross-type situations.
+ * For now, use the same left-to-right method used there.
+ */
+ if (new_members)
+ {
+ List *old_members = list_concat(outer_members, inner_members);
+ EquivalenceMember *prev_em = NULL;
+ RestrictInfo *rinfo;
+
+ /* For now, arbitrarily take the first old_member as the one to use */
+ if (old_members)
+ new_members = lappend(new_members, linitial(old_members));
+
+ foreach(lc1, new_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1);
+
+ if (prev_em != NULL)
+ {
+ Oid eq_op;
+
+ eq_op = select_equality_operator(ec,
+ prev_em->em_datatype,
+ cur_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ {
+ /* failed... */
+ ec->ec_broken = true;
+ return NIL;
+ }
+ /* do NOT set parent_ec, this qual is not redundant! */
+ rinfo = create_join_clause(root, ec, eq_op,
+ prev_em, cur_em,
+ NULL);
+
+ result = lappend(result, rinfo);
+ }
+ prev_em = cur_em;
+ }
+ }
+
+ return result;
+}
+
+/*
+ * generate_join_implied_equalities cleanup after failure
+ *
+ * Return any original RestrictInfos that are enforceable at this join.
+ *
+ * In the case of a child inner relation, we have to translate the
+ * original RestrictInfos from parent to child Vars.
+ */
+static List *
+generate_join_implied_equalities_broken(PlannerInfo *root,
+ EquivalenceClass *ec,
+ Relids nominal_join_relids,
+ Relids outer_relids,
+ Relids nominal_inner_relids,
+ RelOptInfo *inner_rel)
+{
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, ec->ec_sources)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
+ Relids clause_relids = restrictinfo->required_relids;
+
+ if (bms_is_subset(clause_relids, nominal_join_relids) &&
+ !bms_is_subset(clause_relids, outer_relids) &&
+ !bms_is_subset(clause_relids, nominal_inner_relids))
+ result = lappend(result, restrictinfo);
+ }
+
+ /*
+ * If we have to translate, just brute-force apply adjust_appendrel_attrs
+ * to all the RestrictInfos at once. This will result in returning
+ * RestrictInfos that are not listed in ec_derives, but there shouldn't be
+ * any duplication, and it's a sufficiently narrow corner case that we
+ * shouldn't sweat too much over it anyway.
+ *
+ * Since inner_rel might be an indirect descendant of the baserel
+ * mentioned in the ec_sources clauses, we have to be prepared to apply
+ * multiple levels of Var translation.
+ */
+ if (IS_OTHER_REL(inner_rel) && result != NIL)
+ result = (List *) adjust_appendrel_attrs_multilevel(root,
+ (Node *) result,
+ inner_rel->relids,
+ inner_rel->top_parent_relids);
+
+ return result;
+}
+
+
+/*
+ * select_equality_operator
+ * Select a suitable equality operator for comparing two EC members
+ *
+ * Returns InvalidOid if no operator can be found for this datatype combination
+ */
+static Oid
+select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype)
+{
+ ListCell *lc;
+
+ foreach(lc, ec->ec_opfamilies)
+ {
+ Oid opfamily = lfirst_oid(lc);
+ Oid opno;
+
+ opno = get_opfamily_member(opfamily, lefttype, righttype,
+ BTEqualStrategyNumber);
+ if (!OidIsValid(opno))
+ continue;
+ /* If no barrier quals in query, don't worry about leaky operators */
+ if (ec->ec_max_security == 0)
+ return opno;
+ /* Otherwise, insist that selected operators be leakproof */
+ if (get_func_leakproof(get_opcode(opno)))
+ return opno;
+ }
+ return InvalidOid;
+}
+
+
+/*
+ * create_join_clause
+ * Find or make a RestrictInfo comparing the two given EC members
+ * with the given operator.
+ *
+ * parent_ec is either equal to ec (if the clause is a potentially-redundant
+ * join clause) or NULL (if not). We have to treat this as part of the
+ * match requirements --- it's possible that a clause comparing the same two
+ * EMs is a join clause in one join path and a restriction clause in another.
+ */
+static RestrictInfo *
+create_join_clause(PlannerInfo *root,
+ EquivalenceClass *ec, Oid opno,
+ EquivalenceMember *leftem,
+ EquivalenceMember *rightem,
+ EquivalenceClass *parent_ec)
+{
+ RestrictInfo *rinfo;
+ ListCell *lc;
+ MemoryContext oldcontext;
+
+ /*
+ * Search to see if we already built a RestrictInfo for this pair of
+ * EquivalenceMembers. We can use either original source clauses or
+ * previously-derived clauses. The check on opno is probably redundant,
+ * but be safe ...
+ */
+ foreach(lc, ec->ec_sources)
+ {
+ rinfo = (RestrictInfo *) lfirst(lc);
+ if (rinfo->left_em == leftem &&
+ rinfo->right_em == rightem &&
+ rinfo->parent_ec == parent_ec &&
+ opno == ((OpExpr *) rinfo->clause)->opno)
+ return rinfo;
+ }
+
+ foreach(lc, ec->ec_derives)
+ {
+ rinfo = (RestrictInfo *) lfirst(lc);
+ if (rinfo->left_em == leftem &&
+ rinfo->right_em == rightem &&
+ rinfo->parent_ec == parent_ec &&
+ opno == ((OpExpr *) rinfo->clause)->opno)
+ return rinfo;
+ }
+
+ /*
+ * Not there, so build it, in planner context so we can re-use it. (Not
+ * important in normal planning, but definitely so in GEQO.)
+ */
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ rinfo = build_implied_join_equality(root,
+ opno,
+ ec->ec_collation,
+ leftem->em_expr,
+ rightem->em_expr,
+ bms_union(leftem->em_relids,
+ rightem->em_relids),
+ bms_union(leftem->em_nullable_relids,
+ rightem->em_nullable_relids),
+ ec->ec_min_security);
+
+ /* Mark the clause as redundant, or not */
+ rinfo->parent_ec = parent_ec;
+
+ /*
+ * We know the correct values for left_ec/right_ec, ie this particular EC,
+ * so we can just set them directly instead of forcing another lookup.
+ */
+ rinfo->left_ec = ec;
+ rinfo->right_ec = ec;
+
+ /* Mark it as usable with these EMs */
+ rinfo->left_em = leftem;
+ rinfo->right_em = rightem;
+ /* and save it for possible re-use */
+ ec->ec_derives = lappend(ec->ec_derives, rinfo);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return rinfo;
+}
+
+
+/*
+ * reconsider_outer_join_clauses
+ * Re-examine any outer-join clauses that were set aside by
+ * distribute_qual_to_rels(), and see if we can derive any
+ * EquivalenceClasses from them. Then, if they were not made
+ * redundant, push them out into the regular join-clause lists.
+ *
+ * When we have mergejoinable clauses A = B that are outer-join clauses,
+ * we can't blindly combine them with other clauses A = C to deduce B = C,
+ * since in fact the "equality" A = B won't necessarily hold above the
+ * outer join (one of the variables might be NULL instead). Nonetheless
+ * there are cases where we can add qual clauses using transitivity.
+ *
+ * One case that we look for here is an outer-join clause OUTERVAR = INNERVAR
+ * for which there is also an equivalence clause OUTERVAR = CONSTANT.
+ * It is safe and useful to push a clause INNERVAR = CONSTANT into the
+ * evaluation of the inner (nullable) relation, because any inner rows not
+ * meeting this condition will not contribute to the outer-join result anyway.
+ * (Any outer rows they could join to will be eliminated by the pushed-down
+ * equivalence clause.)
+ *
+ * Note that the above rule does not work for full outer joins; nor is it
+ * very interesting to consider cases where the generated equivalence clause
+ * would involve relations outside the outer join, since such clauses couldn't
+ * be pushed into the inner side's scan anyway. So the restriction to
+ * outervar = pseudoconstant is not really giving up anything.
+ *
+ * For full-join cases, we can only do something useful if it's a FULL JOIN
+ * USING and a merged column has an equivalence MERGEDVAR = CONSTANT.
+ * By the time it gets here, the merged column will look like
+ * COALESCE(LEFTVAR, RIGHTVAR)
+ * and we will have a full-join clause LEFTVAR = RIGHTVAR that we can match
+ * the COALESCE expression to. In this situation we can push LEFTVAR = CONSTANT
+ * and RIGHTVAR = CONSTANT into the input relations, since any rows not
+ * meeting these conditions cannot contribute to the join result.
+ *
+ * Again, there isn't any traction to be gained by trying to deal with
+ * clauses comparing a mergedvar to a non-pseudoconstant. So we can make
+ * use of the EquivalenceClasses to search for matching variables that were
+ * equivalenced to constants. The interesting outer-join clauses were
+ * accumulated for us by distribute_qual_to_rels.
+ *
+ * When we find one of these cases, we implement the changes we want by
+ * generating a new equivalence clause INNERVAR = CONSTANT (or LEFTVAR, etc)
+ * and pushing it into the EquivalenceClass structures. This is because we
+ * may already know that INNERVAR is equivalenced to some other var(s), and
+ * we'd like the constant to propagate to them too. Note that it would be
+ * unsafe to merge any existing EC for INNERVAR with the OUTERVAR's EC ---
+ * that could result in propagating constant restrictions from
+ * INNERVAR to OUTERVAR, which would be very wrong.
+ *
+ * It's possible that the INNERVAR is also an OUTERVAR for some other
+ * outer-join clause, in which case the process can be repeated. So we repeat
+ * looping over the lists of clauses until no further deductions can be made.
+ * Whenever we do make a deduction, we remove the generating clause from the
+ * lists, since we don't want to make the same deduction twice.
+ *
+ * If we don't find any match for a set-aside outer join clause, we must
+ * throw it back into the regular joinclause processing by passing it to
+ * distribute_restrictinfo_to_rels(). If we do generate a derived clause,
+ * however, the outer-join clause is redundant. We still throw it back,
+ * because otherwise the join will be seen as a clauseless join and avoided
+ * during join order searching; but we mark it as redundant to keep from
+ * messing up the joinrel's size estimate. (This behavior means that the
+ * API for this routine is uselessly complex: we could have just put all
+ * the clauses into the regular processing initially. We keep it because
+ * someday we might want to do something else, such as inserting "dummy"
+ * joinclauses instead of real ones.)
+ *
+ * Outer join clauses that are marked outerjoin_delayed are special: this
+ * condition means that one or both VARs might go to null due to a lower
+ * outer join. We can still push a constant through the clause, but only
+ * if its operator is strict; and we *have to* throw the clause back into
+ * regular joinclause processing. By keeping the strict join clause,
+ * we ensure that any null-extended rows that are mistakenly generated due
+ * to suppressing rows not matching the constant will be rejected at the
+ * upper outer join. (This doesn't work for full-join clauses.)
+ */
+void
+reconsider_outer_join_clauses(PlannerInfo *root)
+{
+ bool found;
+ ListCell *cell;
+
+ /* Outer loop repeats until we find no more deductions */
+ do
+ {
+ found = false;
+
+ /* Process the LEFT JOIN clauses */
+ foreach(cell, root->left_join_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
+
+ if (reconsider_outer_join_clause(root, rinfo, true))
+ {
+ found = true;
+ /* remove it from the list */
+ root->left_join_clauses =
+ foreach_delete_current(root->left_join_clauses, cell);
+ /* we throw it back anyway (see notes above) */
+ /* but the thrown-back clause has no extra selectivity */
+ rinfo->norm_selec = 2.0;
+ rinfo->outer_selec = 1.0;
+ distribute_restrictinfo_to_rels(root, rinfo);
+ }
+ }
+
+ /* Process the RIGHT JOIN clauses */
+ foreach(cell, root->right_join_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
+
+ if (reconsider_outer_join_clause(root, rinfo, false))
+ {
+ found = true;
+ /* remove it from the list */
+ root->right_join_clauses =
+ foreach_delete_current(root->right_join_clauses, cell);
+ /* we throw it back anyway (see notes above) */
+ /* but the thrown-back clause has no extra selectivity */
+ rinfo->norm_selec = 2.0;
+ rinfo->outer_selec = 1.0;
+ distribute_restrictinfo_to_rels(root, rinfo);
+ }
+ }
+
+ /* Process the FULL JOIN clauses */
+ foreach(cell, root->full_join_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
+
+ if (reconsider_full_join_clause(root, rinfo))
+ {
+ found = true;
+ /* remove it from the list */
+ root->full_join_clauses =
+ foreach_delete_current(root->full_join_clauses, cell);
+ /* we throw it back anyway (see notes above) */
+ /* but the thrown-back clause has no extra selectivity */
+ rinfo->norm_selec = 2.0;
+ rinfo->outer_selec = 1.0;
+ distribute_restrictinfo_to_rels(root, rinfo);
+ }
+ }
+ } while (found);
+
+ /* Now, any remaining clauses have to be thrown back */
+ foreach(cell, root->left_join_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
+
+ distribute_restrictinfo_to_rels(root, rinfo);
+ }
+ foreach(cell, root->right_join_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
+
+ distribute_restrictinfo_to_rels(root, rinfo);
+ }
+ foreach(cell, root->full_join_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
+
+ distribute_restrictinfo_to_rels(root, rinfo);
+ }
+}
+
+/*
+ * reconsider_outer_join_clauses for a single LEFT/RIGHT JOIN clause
+ *
+ * Returns true if we were able to propagate a constant through the clause.
+ */
+static bool
+reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo,
+ bool outer_on_left)
+{
+ Expr *outervar,
+ *innervar;
+ Oid opno,
+ collation,
+ left_type,
+ right_type,
+ inner_datatype;
+ Relids inner_relids,
+ inner_nullable_relids;
+ ListCell *lc1;
+
+ Assert(is_opclause(rinfo->clause));
+ opno = ((OpExpr *) rinfo->clause)->opno;
+ collation = ((OpExpr *) rinfo->clause)->inputcollid;
+
+ /* If clause is outerjoin_delayed, operator must be strict */
+ if (rinfo->outerjoin_delayed && !op_strict(opno))
+ return false;
+
+ /* Extract needed info from the clause */
+ op_input_types(opno, &left_type, &right_type);
+ if (outer_on_left)
+ {
+ outervar = (Expr *) get_leftop(rinfo->clause);
+ innervar = (Expr *) get_rightop(rinfo->clause);
+ inner_datatype = right_type;
+ inner_relids = rinfo->right_relids;
+ }
+ else
+ {
+ outervar = (Expr *) get_rightop(rinfo->clause);
+ innervar = (Expr *) get_leftop(rinfo->clause);
+ inner_datatype = left_type;
+ inner_relids = rinfo->left_relids;
+ }
+ inner_nullable_relids = bms_intersect(inner_relids,
+ rinfo->nullable_relids);
+
+ /* Scan EquivalenceClasses for a match to outervar */
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ bool match;
+ ListCell *lc2;
+
+ /* Ignore EC unless it contains pseudoconstants */
+ if (!cur_ec->ec_has_const)
+ continue;
+ /* Never match to a volatile EC */
+ if (cur_ec->ec_has_volatile)
+ continue;
+ /* It has to match the outer-join clause as to semantics, too */
+ if (collation != cur_ec->ec_collation)
+ continue;
+ if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies))
+ continue;
+ /* Does it contain a match to outervar? */
+ match = false;
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ Assert(!cur_em->em_is_child); /* no children yet */
+ if (equal(outervar, cur_em->em_expr))
+ {
+ match = true;
+ break;
+ }
+ }
+ if (!match)
+ continue; /* no match, so ignore this EC */
+
+ /*
+ * Yes it does! Try to generate a clause INNERVAR = CONSTANT for each
+ * CONSTANT in the EC. Note that we must succeed with at least one
+ * constant before we can decide to throw away the outer-join clause.
+ */
+ match = false;
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+ Oid eq_op;
+ RestrictInfo *newrinfo;
+
+ if (!cur_em->em_is_const)
+ continue; /* ignore non-const members */
+ eq_op = select_equality_operator(cur_ec,
+ inner_datatype,
+ cur_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ continue; /* can't generate equality */
+ newrinfo = build_implied_join_equality(root,
+ eq_op,
+ cur_ec->ec_collation,
+ innervar,
+ cur_em->em_expr,
+ bms_copy(inner_relids),
+ bms_copy(inner_nullable_relids),
+ cur_ec->ec_min_security);
+ if (process_equivalence(root, &newrinfo, true))
+ match = true;
+ }
+
+ /*
+ * If we were able to equate INNERVAR to any constant, report success.
+ * Otherwise, fall out of the search loop, since we know the OUTERVAR
+ * appears in at most one EC.
+ */
+ if (match)
+ return true;
+ else
+ break;
+ }
+
+ return false; /* failed to make any deduction */
+}
+
+/*
+ * reconsider_outer_join_clauses for a single FULL JOIN clause
+ *
+ * Returns true if we were able to propagate a constant through the clause.
+ */
+static bool
+reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo)
+{
+ Expr *leftvar;
+ Expr *rightvar;
+ Oid opno,
+ collation,
+ left_type,
+ right_type;
+ Relids left_relids,
+ right_relids,
+ left_nullable_relids,
+ right_nullable_relids;
+ ListCell *lc1;
+
+ /* Can't use an outerjoin_delayed clause here */
+ if (rinfo->outerjoin_delayed)
+ return false;
+
+ /* Extract needed info from the clause */
+ Assert(is_opclause(rinfo->clause));
+ opno = ((OpExpr *) rinfo->clause)->opno;
+ collation = ((OpExpr *) rinfo->clause)->inputcollid;
+ op_input_types(opno, &left_type, &right_type);
+ leftvar = (Expr *) get_leftop(rinfo->clause);
+ rightvar = (Expr *) get_rightop(rinfo->clause);
+ left_relids = rinfo->left_relids;
+ right_relids = rinfo->right_relids;
+ left_nullable_relids = bms_intersect(left_relids,
+ rinfo->nullable_relids);
+ right_nullable_relids = bms_intersect(right_relids,
+ rinfo->nullable_relids);
+
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ EquivalenceMember *coal_em = NULL;
+ bool match;
+ bool matchleft;
+ bool matchright;
+ ListCell *lc2;
+ int coal_idx = -1;
+
+ /* Ignore EC unless it contains pseudoconstants */
+ if (!cur_ec->ec_has_const)
+ continue;
+ /* Never match to a volatile EC */
+ if (cur_ec->ec_has_volatile)
+ continue;
+ /* It has to match the outer-join clause as to semantics, too */
+ if (collation != cur_ec->ec_collation)
+ continue;
+ if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies))
+ continue;
+
+ /*
+ * Does it contain a COALESCE(leftvar, rightvar) construct?
+ *
+ * We can assume the COALESCE() inputs are in the same order as the
+ * join clause, since both were automatically generated in the cases
+ * we care about.
+ *
+ * XXX currently this may fail to match in cross-type cases because
+ * the COALESCE will contain typecast operations while the join clause
+ * may not (if there is a cross-type mergejoin operator available for
+ * the two column types). Is it OK to strip implicit coercions from
+ * the COALESCE arguments?
+ */
+ match = false;
+ foreach(lc2, cur_ec->ec_members)
+ {
+ coal_em = (EquivalenceMember *) lfirst(lc2);
+ Assert(!coal_em->em_is_child); /* no children yet */
+ if (IsA(coal_em->em_expr, CoalesceExpr))
+ {
+ CoalesceExpr *cexpr = (CoalesceExpr *) coal_em->em_expr;
+ Node *cfirst;
+ Node *csecond;
+
+ if (list_length(cexpr->args) != 2)
+ continue;
+ cfirst = (Node *) linitial(cexpr->args);
+ csecond = (Node *) lsecond(cexpr->args);
+
+ if (equal(leftvar, cfirst) && equal(rightvar, csecond))
+ {
+ coal_idx = foreach_current_index(lc2);
+ match = true;
+ break;
+ }
+ }
+ }
+ if (!match)
+ continue; /* no match, so ignore this EC */
+
+ /*
+ * Yes it does! Try to generate clauses LEFTVAR = CONSTANT and
+ * RIGHTVAR = CONSTANT for each CONSTANT in the EC. Note that we must
+ * succeed with at least one constant for each var before we can
+ * decide to throw away the outer-join clause.
+ */
+ matchleft = matchright = false;
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+ Oid eq_op;
+ RestrictInfo *newrinfo;
+
+ if (!cur_em->em_is_const)
+ continue; /* ignore non-const members */
+ eq_op = select_equality_operator(cur_ec,
+ left_type,
+ cur_em->em_datatype);
+ if (OidIsValid(eq_op))
+ {
+ newrinfo = build_implied_join_equality(root,
+ eq_op,
+ cur_ec->ec_collation,
+ leftvar,
+ cur_em->em_expr,
+ bms_copy(left_relids),
+ bms_copy(left_nullable_relids),
+ cur_ec->ec_min_security);
+ if (process_equivalence(root, &newrinfo, true))
+ matchleft = true;
+ }
+ eq_op = select_equality_operator(cur_ec,
+ right_type,
+ cur_em->em_datatype);
+ if (OidIsValid(eq_op))
+ {
+ newrinfo = build_implied_join_equality(root,
+ eq_op,
+ cur_ec->ec_collation,
+ rightvar,
+ cur_em->em_expr,
+ bms_copy(right_relids),
+ bms_copy(right_nullable_relids),
+ cur_ec->ec_min_security);
+ if (process_equivalence(root, &newrinfo, true))
+ matchright = true;
+ }
+ }
+
+ /*
+ * If we were able to equate both vars to constants, we're done, and
+ * we can throw away the full-join clause as redundant. Moreover, we
+ * can remove the COALESCE entry from the EC, since the added
+ * restrictions ensure it will always have the expected value. (We
+ * don't bother trying to update ec_relids or ec_sources.)
+ */
+ if (matchleft && matchright)
+ {
+ cur_ec->ec_members = list_delete_nth_cell(cur_ec->ec_members, coal_idx);
+ return true;
+ }
+
+ /*
+ * Otherwise, fall out of the search loop, since we know the COALESCE
+ * appears in at most one EC (XXX might stop being true if we allow
+ * stripping of coercions above?)
+ */
+ break;
+ }
+
+ return false; /* failed to make any deduction */
+}
+
+
+/*
+ * exprs_known_equal
+ * Detect whether two expressions are known equal due to equivalence
+ * relationships.
+ *
+ * Actually, this only shows that the expressions are equal according
+ * to some opfamily's notion of equality --- but we only use it for
+ * selectivity estimation, so a fuzzy idea of equality is OK.
+ *
+ * Note: does not bother to check for "equal(item1, item2)"; caller must
+ * check that case if it's possible to pass identical items.
+ */
+bool
+exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
+{
+ ListCell *lc1;
+
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
+ bool item1member = false;
+ bool item2member = false;
+ ListCell *lc2;
+
+ /* Never match to a volatile EC */
+ if (ec->ec_has_volatile)
+ continue;
+
+ foreach(lc2, ec->ec_members)
+ {
+ EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
+
+ if (em->em_is_child)
+ continue; /* ignore children here */
+ if (equal(item1, em->em_expr))
+ item1member = true;
+ else if (equal(item2, em->em_expr))
+ item2member = true;
+ /* Exit as soon as equality is proven */
+ if (item1member && item2member)
+ return true;
+ }
+ }
+ return false;
+}
+
+
+/*
+ * match_eclasses_to_foreign_key_col
+ * See whether a foreign key column match is proven by any eclass.
+ *
+ * If the referenced and referencing Vars of the fkey's colno'th column are
+ * known equal due to any eclass, return that eclass; otherwise return NULL.
+ * (In principle there might be more than one matching eclass if multiple
+ * collations are involved, but since collation doesn't matter for equality,
+ * we ignore that fine point here.) This is much like exprs_known_equal,
+ * except that we insist on the comparison operator matching the eclass, so
+ * that the result is definite not approximate.
+ *
+ * On success, we also set fkinfo->eclass[colno] to the matching eclass,
+ * and set fkinfo->fk_eclass_member[colno] to the eclass member for the
+ * referencing Var.
+ */
+EquivalenceClass *
+match_eclasses_to_foreign_key_col(PlannerInfo *root,
+ ForeignKeyOptInfo *fkinfo,
+ int colno)
+{
+ Index var1varno = fkinfo->con_relid;
+ AttrNumber var1attno = fkinfo->conkey[colno];
+ Index var2varno = fkinfo->ref_relid;
+ AttrNumber var2attno = fkinfo->confkey[colno];
+ Oid eqop = fkinfo->conpfeqop[colno];
+ RelOptInfo *rel1 = root->simple_rel_array[var1varno];
+ RelOptInfo *rel2 = root->simple_rel_array[var2varno];
+ List *opfamilies = NIL; /* compute only if needed */
+ Bitmapset *matching_ecs;
+ int i;
+
+ /* Consider only eclasses mentioning both relations */
+ Assert(root->ec_merging_done);
+ Assert(IS_SIMPLE_REL(rel1));
+ Assert(IS_SIMPLE_REL(rel2));
+ matching_ecs = bms_intersect(rel1->eclass_indexes,
+ rel2->eclass_indexes);
+
+ i = -1;
+ while ((i = bms_next_member(matching_ecs, i)) >= 0)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes,
+ i);
+ EquivalenceMember *item1_em = NULL;
+ EquivalenceMember *item2_em = NULL;
+ ListCell *lc2;
+
+ /* Never match to a volatile EC */
+ if (ec->ec_has_volatile)
+ continue;
+ /* Note: it seems okay to match to "broken" eclasses here */
+
+ foreach(lc2, ec->ec_members)
+ {
+ EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
+ Var *var;
+
+ if (em->em_is_child)
+ continue; /* ignore children here */
+
+ /* EM must be a Var, possibly with RelabelType */
+ var = (Var *) em->em_expr;
+ while (var && IsA(var, RelabelType))
+ var = (Var *) ((RelabelType *) var)->arg;
+ if (!(var && IsA(var, Var)))
+ continue;
+
+ /* Match? */
+ if (var->varno == var1varno && var->varattno == var1attno)
+ item1_em = em;
+ else if (var->varno == var2varno && var->varattno == var2attno)
+ item2_em = em;
+
+ /* Have we found both PK and FK column in this EC? */
+ if (item1_em && item2_em)
+ {
+ /*
+ * Succeed if eqop matches EC's opfamilies. We could test
+ * this before scanning the members, but it's probably cheaper
+ * to test for member matches first.
+ */
+ if (opfamilies == NIL) /* compute if we didn't already */
+ opfamilies = get_mergejoin_opfamilies(eqop);
+ if (equal(opfamilies, ec->ec_opfamilies))
+ {
+ fkinfo->eclass[colno] = ec;
+ fkinfo->fk_eclass_member[colno] = item2_em;
+ return ec;
+ }
+ /* Otherwise, done with this EC, move on to the next */
+ break;
+ }
+ }
+ }
+ return NULL;
+}
+
+/*
+ * find_derived_clause_for_ec_member
+ * Search for a previously-derived clause mentioning the given EM.
+ *
+ * The eclass should be an ec_has_const EC, of which the EM is a non-const
+ * member. This should ensure there is just one derived clause mentioning
+ * the EM (and equating it to a constant).
+ * Returns NULL if no such clause can be found.
+ */
+RestrictInfo *
+find_derived_clause_for_ec_member(EquivalenceClass *ec,
+ EquivalenceMember *em)
+{
+ ListCell *lc;
+
+ Assert(ec->ec_has_const);
+ Assert(!em->em_is_const);
+ foreach(lc, ec->ec_derives)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ /*
+ * generate_base_implied_equalities_const will have put non-const
+ * members on the left side of derived clauses.
+ */
+ if (rinfo->left_em == em)
+ return rinfo;
+ }
+ return NULL;
+}
+
+
+/*
+ * add_child_rel_equivalences
+ * Search for EC members that reference the root parent of child_rel, and
+ * add transformed members referencing the child_rel.
+ *
+ * Note that this function won't be called at all unless we have at least some
+ * reason to believe that the EC members it generates will be useful.
+ *
+ * parent_rel and child_rel could be derived from appinfo, but since the
+ * caller has already computed them, we might as well just pass them in.
+ *
+ * The passed-in AppendRelInfo is not used when the parent_rel is not a
+ * top-level baserel, since it shows the mapping from the parent_rel but
+ * we need to translate EC expressions that refer to the top-level parent.
+ * Using it is faster than using adjust_appendrel_attrs_multilevel(), though,
+ * so we prefer it when we can.
+ */
+void
+add_child_rel_equivalences(PlannerInfo *root,
+ AppendRelInfo *appinfo,
+ RelOptInfo *parent_rel,
+ RelOptInfo *child_rel)
+{
+ Relids top_parent_relids = child_rel->top_parent_relids;
+ Relids child_relids = child_rel->relids;
+ int i;
+
+ /*
+ * EC merging should be complete already, so we can use the parent rel's
+ * eclass_indexes to avoid searching all of root->eq_classes.
+ */
+ Assert(root->ec_merging_done);
+ Assert(IS_SIMPLE_REL(parent_rel));
+
+ i = -1;
+ while ((i = bms_next_member(parent_rel->eclass_indexes, i)) >= 0)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
+ int num_members;
+
+ /*
+ * If this EC contains a volatile expression, then generating child
+ * EMs would be downright dangerous, so skip it. We rely on a
+ * volatile EC having only one EM.
+ */
+ if (cur_ec->ec_has_volatile)
+ continue;
+
+ /* Sanity check eclass_indexes only contain ECs for parent_rel */
+ Assert(bms_is_subset(top_parent_relids, cur_ec->ec_relids));
+
+ /*
+ * We don't use foreach() here because there's no point in scanning
+ * newly-added child members, so we can stop after the last
+ * pre-existing EC member.
+ */
+ num_members = list_length(cur_ec->ec_members);
+ for (int pos = 0; pos < num_members; pos++)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) list_nth(cur_ec->ec_members, pos);
+
+ if (cur_em->em_is_const)
+ continue; /* ignore consts here */
+
+ /*
+ * We consider only original EC members here, not
+ * already-transformed child members. Otherwise, if some original
+ * member expression references more than one appendrel, we'd get
+ * an O(N^2) explosion of useless derived expressions for
+ * combinations of children. (But add_child_join_rel_equivalences
+ * may add targeted combinations for partitionwise-join purposes.)
+ */
+ if (cur_em->em_is_child)
+ continue; /* ignore children here */
+
+ /* Does this member reference child's topmost parent rel? */
+ if (bms_overlap(cur_em->em_relids, top_parent_relids))
+ {
+ /* Yes, generate transformed child version */
+ Expr *child_expr;
+ Relids new_relids;
+ Relids new_nullable_relids;
+
+ if (parent_rel->reloptkind == RELOPT_BASEREL)
+ {
+ /* Simple single-level transformation */
+ child_expr = (Expr *)
+ adjust_appendrel_attrs(root,
+ (Node *) cur_em->em_expr,
+ 1, &appinfo);
+ }
+ else
+ {
+ /* Must do multi-level transformation */
+ child_expr = (Expr *)
+ adjust_appendrel_attrs_multilevel(root,
+ (Node *) cur_em->em_expr,
+ child_relids,
+ top_parent_relids);
+ }
+
+ /*
+ * Transform em_relids to match. Note we do *not* do
+ * pull_varnos(child_expr) here, as for example the
+ * transformation might have substituted a constant, but we
+ * don't want the child member to be marked as constant.
+ */
+ new_relids = bms_difference(cur_em->em_relids,
+ top_parent_relids);
+ new_relids = bms_add_members(new_relids, child_relids);
+
+ /*
+ * And likewise for nullable_relids. Note this code assumes
+ * parent and child relids are singletons.
+ */
+ new_nullable_relids = cur_em->em_nullable_relids;
+ if (bms_overlap(new_nullable_relids, top_parent_relids))
+ {
+ new_nullable_relids = bms_difference(new_nullable_relids,
+ top_parent_relids);
+ new_nullable_relids = bms_add_members(new_nullable_relids,
+ child_relids);
+ }
+
+ (void) add_eq_member(cur_ec, child_expr,
+ new_relids, new_nullable_relids,
+ true, cur_em->em_datatype);
+
+ /* Record this EC index for the child rel */
+ child_rel->eclass_indexes = bms_add_member(child_rel->eclass_indexes, i);
+ }
+ }
+ }
+}
+
+/*
+ * add_child_join_rel_equivalences
+ * Like add_child_rel_equivalences(), but for joinrels
+ *
+ * Here we find the ECs relevant to the top parent joinrel and add transformed
+ * member expressions that refer to this child joinrel.
+ *
+ * Note that this function won't be called at all unless we have at least some
+ * reason to believe that the EC members it generates will be useful.
+ */
+void
+add_child_join_rel_equivalences(PlannerInfo *root,
+ int nappinfos, AppendRelInfo **appinfos,
+ RelOptInfo *parent_joinrel,
+ RelOptInfo *child_joinrel)
+{
+ Relids top_parent_relids = child_joinrel->top_parent_relids;
+ Relids child_relids = child_joinrel->relids;
+ Bitmapset *matching_ecs;
+ MemoryContext oldcontext;
+ int i;
+
+ Assert(IS_JOIN_REL(child_joinrel) && IS_JOIN_REL(parent_joinrel));
+
+ /* We need consider only ECs that mention the parent joinrel */
+ matching_ecs = get_eclass_indexes_for_relids(root, top_parent_relids);
+
+ /*
+ * If we're being called during GEQO join planning, we still have to
+ * create any new EC members in the main planner context, to avoid having
+ * a corrupt EC data structure after the GEQO context is reset. This is
+ * problematic since we'll leak memory across repeated GEQO cycles. For
+ * now, though, bloat is better than crash. If it becomes a real issue
+ * we'll have to do something to avoid generating duplicate EC members.
+ */
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ i = -1;
+ while ((i = bms_next_member(matching_ecs, i)) >= 0)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
+ int num_members;
+
+ /*
+ * If this EC contains a volatile expression, then generating child
+ * EMs would be downright dangerous, so skip it. We rely on a
+ * volatile EC having only one EM.
+ */
+ if (cur_ec->ec_has_volatile)
+ continue;
+
+ /* Sanity check on get_eclass_indexes_for_relids result */
+ Assert(bms_overlap(top_parent_relids, cur_ec->ec_relids));
+
+ /*
+ * We don't use foreach() here because there's no point in scanning
+ * newly-added child members, so we can stop after the last
+ * pre-existing EC member.
+ */
+ num_members = list_length(cur_ec->ec_members);
+ for (int pos = 0; pos < num_members; pos++)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) list_nth(cur_ec->ec_members, pos);
+
+ if (cur_em->em_is_const)
+ continue; /* ignore consts here */
+
+ /*
+ * We consider only original EC members here, not
+ * already-transformed child members.
+ */
+ if (cur_em->em_is_child)
+ continue; /* ignore children here */
+
+ /*
+ * We may ignore expressions that reference a single baserel,
+ * because add_child_rel_equivalences should have handled them.
+ */
+ if (bms_membership(cur_em->em_relids) != BMS_MULTIPLE)
+ continue;
+
+ /* Does this member reference child's topmost parent rel? */
+ if (bms_overlap(cur_em->em_relids, top_parent_relids))
+ {
+ /* Yes, generate transformed child version */
+ Expr *child_expr;
+ Relids new_relids;
+ Relids new_nullable_relids;
+
+ if (parent_joinrel->reloptkind == RELOPT_JOINREL)
+ {
+ /* Simple single-level transformation */
+ child_expr = (Expr *)
+ adjust_appendrel_attrs(root,
+ (Node *) cur_em->em_expr,
+ nappinfos, appinfos);
+ }
+ else
+ {
+ /* Must do multi-level transformation */
+ Assert(parent_joinrel->reloptkind == RELOPT_OTHER_JOINREL);
+ child_expr = (Expr *)
+ adjust_appendrel_attrs_multilevel(root,
+ (Node *) cur_em->em_expr,
+ child_relids,
+ top_parent_relids);
+ }
+
+ /*
+ * Transform em_relids to match. Note we do *not* do
+ * pull_varnos(child_expr) here, as for example the
+ * transformation might have substituted a constant, but we
+ * don't want the child member to be marked as constant.
+ */
+ new_relids = bms_difference(cur_em->em_relids,
+ top_parent_relids);
+ new_relids = bms_add_members(new_relids, child_relids);
+
+ /*
+ * For nullable_relids, we must selectively replace parent
+ * nullable relids with child ones.
+ */
+ new_nullable_relids = cur_em->em_nullable_relids;
+ if (bms_overlap(new_nullable_relids, top_parent_relids))
+ new_nullable_relids =
+ adjust_child_relids_multilevel(root,
+ new_nullable_relids,
+ child_relids,
+ top_parent_relids);
+
+ (void) add_eq_member(cur_ec, child_expr,
+ new_relids, new_nullable_relids,
+ true, cur_em->em_datatype);
+ }
+ }
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+
+/*
+ * generate_implied_equalities_for_column
+ * Create EC-derived joinclauses usable with a specific column.
+ *
+ * This is used by indxpath.c to extract potentially indexable joinclauses
+ * from ECs, and can be used by foreign data wrappers for similar purposes.
+ * We assume that only expressions in Vars of a single table are of interest,
+ * but the caller provides a callback function to identify exactly which
+ * such expressions it would like to know about.
+ *
+ * We assume that any given table/index column could appear in only one EC.
+ * (This should be true in all but the most pathological cases, and if it
+ * isn't, we stop on the first match anyway.) Therefore, what we return
+ * is a redundant list of clauses equating the table/index column to each of
+ * the other-relation values it is known to be equal to. Any one of
+ * these clauses can be used to create a parameterized path, and there
+ * is no value in using more than one. (But it *is* worthwhile to create
+ * a separate parameterized path for each one, since that leads to different
+ * join orders.)
+ *
+ * The caller can pass a Relids set of rels we aren't interested in joining
+ * to, so as to save the work of creating useless clauses.
+ */
+List *
+generate_implied_equalities_for_column(PlannerInfo *root,
+ RelOptInfo *rel,
+ ec_matches_callback_type callback,
+ void *callback_arg,
+ Relids prohibited_rels)
+{
+ List *result = NIL;
+ bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
+ Relids parent_relids;
+ int i;
+
+ /* Should be OK to rely on eclass_indexes */
+ Assert(root->ec_merging_done);
+
+ /* Indexes are available only on base or "other" member relations. */
+ Assert(IS_SIMPLE_REL(rel));
+
+ /* If it's a child rel, we'll need to know what its parent(s) are */
+ if (is_child_rel)
+ parent_relids = find_childrel_parents(root, rel);
+ else
+ parent_relids = NULL; /* not used, but keep compiler quiet */
+
+ i = -1;
+ while ((i = bms_next_member(rel->eclass_indexes, i)) >= 0)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
+ EquivalenceMember *cur_em;
+ ListCell *lc2;
+
+ /* Sanity check eclass_indexes only contain ECs for rel */
+ Assert(is_child_rel || bms_is_subset(rel->relids, cur_ec->ec_relids));
+
+ /*
+ * Won't generate joinclauses if const or single-member (the latter
+ * test covers the volatile case too)
+ */
+ if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
+ continue;
+
+ /*
+ * Scan members, looking for a match to the target column. Note that
+ * child EC members are considered, but only when they belong to the
+ * target relation. (Unlike regular members, the same expression
+ * could be a child member of more than one EC. Therefore, it's
+ * potentially order-dependent which EC a child relation's target
+ * column gets matched to. This is annoying but it only happens in
+ * corner cases, so for now we live with just reporting the first
+ * match. See also get_eclass_for_sort_expr.)
+ */
+ cur_em = NULL;
+ foreach(lc2, cur_ec->ec_members)
+ {
+ cur_em = (EquivalenceMember *) lfirst(lc2);
+ if (bms_equal(cur_em->em_relids, rel->relids) &&
+ callback(root, rel, cur_ec, cur_em, callback_arg))
+ break;
+ cur_em = NULL;
+ }
+
+ if (!cur_em)
+ continue;
+
+ /*
+ * Found our match. Scan the other EC members and attempt to generate
+ * joinclauses.
+ */
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *other_em = (EquivalenceMember *) lfirst(lc2);
+ Oid eq_op;
+ RestrictInfo *rinfo;
+
+ if (other_em->em_is_child)
+ continue; /* ignore children here */
+
+ /* Make sure it'll be a join to a different rel */
+ if (other_em == cur_em ||
+ bms_overlap(other_em->em_relids, rel->relids))
+ continue;
+
+ /* Forget it if caller doesn't want joins to this rel */
+ if (bms_overlap(other_em->em_relids, prohibited_rels))
+ continue;
+
+ /*
+ * Also, if this is a child rel, avoid generating a useless join
+ * to its parent rel(s).
+ */
+ if (is_child_rel &&
+ bms_overlap(parent_relids, other_em->em_relids))
+ continue;
+
+ eq_op = select_equality_operator(cur_ec,
+ cur_em->em_datatype,
+ other_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ continue;
+
+ /* set parent_ec to mark as redundant with other joinclauses */
+ rinfo = create_join_clause(root, cur_ec, eq_op,
+ cur_em, other_em,
+ cur_ec);
+
+ result = lappend(result, rinfo);
+ }
+
+ /*
+ * If somehow we failed to create any join clauses, we might as well
+ * keep scanning the ECs for another match. But if we did make any,
+ * we're done, because we don't want to return non-redundant clauses.
+ */
+ if (result)
+ break;
+ }
+
+ return result;
+}
+
+/*
+ * have_relevant_eclass_joinclause
+ * Detect whether there is an EquivalenceClass that could produce
+ * a joinclause involving the two given relations.
+ *
+ * This is essentially a very cut-down version of
+ * generate_join_implied_equalities(). Note it's OK to occasionally say "yes"
+ * incorrectly. Hence we don't bother with details like whether the lack of a
+ * cross-type operator might prevent the clause from actually being generated.
+ */
+bool
+have_relevant_eclass_joinclause(PlannerInfo *root,
+ RelOptInfo *rel1, RelOptInfo *rel2)
+{
+ Bitmapset *matching_ecs;
+ int i;
+
+ /* Examine only eclasses mentioning both rel1 and rel2 */
+ matching_ecs = get_common_eclass_indexes(root, rel1->relids,
+ rel2->relids);
+
+ i = -1;
+ while ((i = bms_next_member(matching_ecs, i)) >= 0)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes,
+ i);
+
+ /*
+ * Sanity check that get_common_eclass_indexes gave only ECs
+ * containing both rels.
+ */
+ Assert(bms_overlap(rel1->relids, ec->ec_relids));
+ Assert(bms_overlap(rel2->relids, ec->ec_relids));
+
+ /*
+ * Won't generate joinclauses if single-member (this test covers the
+ * volatile case too)
+ */
+ if (list_length(ec->ec_members) <= 1)
+ continue;
+
+ /*
+ * We do not need to examine the individual members of the EC, because
+ * all that we care about is whether each rel overlaps the relids of
+ * at least one member, and get_common_eclass_indexes() and the single
+ * member check above are sufficient to prove that. (As with
+ * have_relevant_joinclause(), it is not necessary that the EC be able
+ * to form a joinclause relating exactly the two given rels, only that
+ * it be able to form a joinclause mentioning both, and this will
+ * surely be true if both of them overlap ec_relids.)
+ *
+ * Note we don't test ec_broken; if we did, we'd need a separate code
+ * path to look through ec_sources. Checking the membership anyway is
+ * OK as a possibly-overoptimistic heuristic.
+ *
+ * We don't test ec_has_const either, even though a const eclass won't
+ * generate real join clauses. This is because if we had "WHERE a.x =
+ * b.y and a.x = 42", it is worth considering a join between a and b,
+ * since the join result is likely to be small even though it'll end
+ * up being an unqualified nestloop.
+ */
+
+ return true;
+ }
+
+ return false;
+}
+
+
+/*
+ * has_relevant_eclass_joinclause
+ * Detect whether there is an EquivalenceClass that could produce
+ * a joinclause involving the given relation and anything else.
+ *
+ * This is the same as have_relevant_eclass_joinclause with the other rel
+ * implicitly defined as "everything else in the query".
+ */
+bool
+has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1)
+{
+ Bitmapset *matched_ecs;
+ int i;
+
+ /* Examine only eclasses mentioning rel1 */
+ matched_ecs = get_eclass_indexes_for_relids(root, rel1->relids);
+
+ i = -1;
+ while ((i = bms_next_member(matched_ecs, i)) >= 0)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes,
+ i);
+
+ /*
+ * Won't generate joinclauses if single-member (this test covers the
+ * volatile case too)
+ */
+ if (list_length(ec->ec_members) <= 1)
+ continue;
+
+ /*
+ * Per the comment in have_relevant_eclass_joinclause, it's sufficient
+ * to find an EC that mentions both this rel and some other rel.
+ */
+ if (!bms_is_subset(ec->ec_relids, rel1->relids))
+ return true;
+ }
+
+ return false;
+}
+
+
+/*
+ * eclass_useful_for_merging
+ * Detect whether the EC could produce any mergejoinable join clauses
+ * against the specified relation.
+ *
+ * This is just a heuristic test and doesn't have to be exact; it's better
+ * to say "yes" incorrectly than "no". Hence we don't bother with details
+ * like whether the lack of a cross-type operator might prevent the clause
+ * from actually being generated.
+ */
+bool
+eclass_useful_for_merging(PlannerInfo *root,
+ EquivalenceClass *eclass,
+ RelOptInfo *rel)
+{
+ Relids relids;
+ ListCell *lc;
+
+ Assert(!eclass->ec_merged);
+
+ /*
+ * Won't generate joinclauses if const or single-member (the latter test
+ * covers the volatile case too)
+ */
+ if (eclass->ec_has_const || list_length(eclass->ec_members) <= 1)
+ return false;
+
+ /*
+ * Note we don't test ec_broken; if we did, we'd need a separate code path
+ * to look through ec_sources. Checking the members anyway is OK as a
+ * possibly-overoptimistic heuristic.
+ */
+
+ /* If specified rel is a child, we must consider the topmost parent rel */
+ if (IS_OTHER_REL(rel))
+ {
+ Assert(!bms_is_empty(rel->top_parent_relids));
+ relids = rel->top_parent_relids;
+ }
+ else
+ relids = rel->relids;
+
+ /* If rel already includes all members of eclass, no point in searching */
+ if (bms_is_subset(eclass->ec_relids, relids))
+ return false;
+
+ /* To join, we need a member not in the given rel */
+ foreach(lc, eclass->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
+
+ if (cur_em->em_is_child)
+ continue; /* ignore children here */
+
+ if (!bms_overlap(cur_em->em_relids, relids))
+ return true;
+ }
+
+ return false;
+}
+
+
+/*
+ * is_redundant_derived_clause
+ * Test whether rinfo is derived from same EC as any clause in clauselist;
+ * if so, it can be presumed to represent a condition that's redundant
+ * with that member of the list.
+ */
+bool
+is_redundant_derived_clause(RestrictInfo *rinfo, List *clauselist)
+{
+ EquivalenceClass *parent_ec = rinfo->parent_ec;
+ ListCell *lc;
+
+ /* Fail if it's not a potentially-redundant clause from some EC */
+ if (parent_ec == NULL)
+ return false;
+
+ foreach(lc, clauselist)
+ {
+ RestrictInfo *otherrinfo = (RestrictInfo *) lfirst(lc);
+
+ if (otherrinfo->parent_ec == parent_ec)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * is_redundant_with_indexclauses
+ * Test whether rinfo is redundant with any clause in the IndexClause
+ * list. Here, for convenience, we test both simple identity and
+ * whether it is derived from the same EC as any member of the list.
+ */
+bool
+is_redundant_with_indexclauses(RestrictInfo *rinfo, List *indexclauses)
+{
+ EquivalenceClass *parent_ec = rinfo->parent_ec;
+ ListCell *lc;
+
+ foreach(lc, indexclauses)
+ {
+ IndexClause *iclause = lfirst_node(IndexClause, lc);
+ RestrictInfo *otherrinfo = iclause->rinfo;
+
+ /* If indexclause is lossy, it won't enforce the condition exactly */
+ if (iclause->lossy)
+ continue;
+
+ /* Match if it's same clause (pointer equality should be enough) */
+ if (rinfo == otherrinfo)
+ return true;
+ /* Match if derived from same EC */
+ if (parent_ec && otherrinfo->parent_ec == parent_ec)
+ return true;
+
+ /*
+ * No need to look at the derived clauses in iclause->indexquals; they
+ * couldn't match if the parent clause didn't.
+ */
+ }
+
+ return false;
+}
+
+/*
+ * get_eclass_indexes_for_relids
+ * Build and return a Bitmapset containing the indexes into root's
+ * eq_classes list for all eclasses that mention any of these relids
+ */
+static Bitmapset *
+get_eclass_indexes_for_relids(PlannerInfo *root, Relids relids)
+{
+ Bitmapset *ec_indexes = NULL;
+ int i = -1;
+
+ /* Should be OK to rely on eclass_indexes */
+ Assert(root->ec_merging_done);
+
+ while ((i = bms_next_member(relids, i)) > 0)
+ {
+ RelOptInfo *rel = root->simple_rel_array[i];
+
+ ec_indexes = bms_add_members(ec_indexes, rel->eclass_indexes);
+ }
+ return ec_indexes;
+}
+
+/*
+ * get_common_eclass_indexes
+ * Build and return a Bitmapset containing the indexes into root's
+ * eq_classes list for all eclasses that mention rels in both
+ * relids1 and relids2.
+ */
+static Bitmapset *
+get_common_eclass_indexes(PlannerInfo *root, Relids relids1, Relids relids2)
+{
+ Bitmapset *rel1ecs;
+ Bitmapset *rel2ecs;
+ int relid;
+
+ rel1ecs = get_eclass_indexes_for_relids(root, relids1);
+
+ /*
+ * We can get away with just using the relation's eclass_indexes directly
+ * when relids2 is a singleton set.
+ */
+ if (bms_get_singleton_member(relids2, &relid))
+ rel2ecs = root->simple_rel_array[relid]->eclass_indexes;
+ else
+ rel2ecs = get_eclass_indexes_for_relids(root, relids2);
+
+ /* Calculate and return the common EC indexes, recycling the left input. */
+ return bms_int_members(rel1ecs, rel2ecs);
+}
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
new file mode 100644
index 0000000..3800f0c
--- /dev/null
+++ b/src/backend/optimizer/path/indxpath.c
@@ -0,0 +1,3817 @@
+/*-------------------------------------------------------------------------
+ *
+ * indxpath.c
+ * Routines to determine which indexes are usable for scanning a
+ * given relation, and create Paths accordingly.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/path/indxpath.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/stratnum.h"
+#include "access/sysattr.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_type.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/cost.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/prep.h"
+#include "optimizer/restrictinfo.h"
+#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
+
+
+/* XXX see PartCollMatchesExprColl */
+#define IndexCollMatchesExprColl(idxcollation, exprcollation) \
+ ((idxcollation) == InvalidOid || (idxcollation) == (exprcollation))
+
+/* Whether we are looking for plain indexscan, bitmap scan, or either */
+typedef enum
+{
+ ST_INDEXSCAN, /* must support amgettuple */
+ ST_BITMAPSCAN, /* must support amgetbitmap */
+ ST_ANYSCAN /* either is okay */
+} ScanTypeControl;
+
+/* Data structure for collecting qual clauses that match an index */
+typedef struct
+{
+ bool nonempty; /* True if lists are not all empty */
+ /* Lists of IndexClause nodes, one list per index column */
+ List *indexclauses[INDEX_MAX_KEYS];
+} IndexClauseSet;
+
+/* Per-path data used within choose_bitmap_and() */
+typedef struct
+{
+ Path *path; /* IndexPath, BitmapAndPath, or BitmapOrPath */
+ List *quals; /* the WHERE clauses it uses */
+ List *preds; /* predicates of its partial index(es) */
+ Bitmapset *clauseids; /* quals+preds represented as a bitmapset */
+ bool unclassifiable; /* has too many quals+preds to process? */
+} PathClauseUsage;
+
+/* Callback argument for ec_member_matches_indexcol */
+typedef struct
+{
+ IndexOptInfo *index; /* index we're considering */
+ int indexcol; /* index column we want to match to */
+} ec_member_matches_arg;
+
+
+static void consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index,
+ IndexClauseSet *rclauseset,
+ IndexClauseSet *jclauseset,
+ IndexClauseSet *eclauseset,
+ List **bitindexpaths);
+static void consider_index_join_outer_rels(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index,
+ IndexClauseSet *rclauseset,
+ IndexClauseSet *jclauseset,
+ IndexClauseSet *eclauseset,
+ List **bitindexpaths,
+ List *indexjoinclauses,
+ int considered_clauses,
+ List **considered_relids);
+static void get_join_index_paths(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index,
+ IndexClauseSet *rclauseset,
+ IndexClauseSet *jclauseset,
+ IndexClauseSet *eclauseset,
+ List **bitindexpaths,
+ Relids relids,
+ List **considered_relids);
+static bool eclass_already_used(EquivalenceClass *parent_ec, Relids oldrelids,
+ List *indexjoinclauses);
+static bool bms_equal_any(Relids relids, List *relids_list);
+static void get_index_paths(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index, IndexClauseSet *clauses,
+ List **bitindexpaths);
+static List *build_index_paths(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index, IndexClauseSet *clauses,
+ bool useful_predicate,
+ ScanTypeControl scantype,
+ bool *skip_nonnative_saop,
+ bool *skip_lower_saop);
+static List *build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
+ List *clauses, List *other_clauses);
+static List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
+ List *clauses, List *other_clauses);
+static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
+ List *paths);
+static int path_usage_comparator(const void *a, const void *b);
+static Cost bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel,
+ Path *ipath);
+static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel,
+ List *paths);
+static PathClauseUsage *classify_index_clause_usage(Path *path,
+ List **clauselist);
+static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
+static int find_list_position(Node *node, List **nodelist);
+static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index);
+static double get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids);
+static double adjust_rowcount_for_semijoins(PlannerInfo *root,
+ Index cur_relid,
+ Index outer_relid,
+ double rowcount);
+static double approximate_joinrel_size(PlannerInfo *root, Relids relids);
+static void match_restriction_clauses_to_index(PlannerInfo *root,
+ IndexOptInfo *index,
+ IndexClauseSet *clauseset);
+static void match_join_clauses_to_index(PlannerInfo *root,
+ RelOptInfo *rel, IndexOptInfo *index,
+ IndexClauseSet *clauseset,
+ List **joinorclauses);
+static void match_eclass_clauses_to_index(PlannerInfo *root,
+ IndexOptInfo *index,
+ IndexClauseSet *clauseset);
+static void match_clauses_to_index(PlannerInfo *root,
+ List *clauses,
+ IndexOptInfo *index,
+ IndexClauseSet *clauseset);
+static void match_clause_to_index(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ IndexOptInfo *index,
+ IndexClauseSet *clauseset);
+static IndexClause *match_clause_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index);
+static IndexClause *match_boolean_index_clause(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol, IndexOptInfo *index);
+static IndexClause *match_opclause_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index);
+static IndexClause *match_funcclause_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index);
+static IndexClause *get_index_clause_from_support(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ Oid funcid,
+ int indexarg,
+ int indexcol,
+ IndexOptInfo *index);
+static IndexClause *match_saopclause_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index);
+static IndexClause *match_rowcompare_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index);
+static IndexClause *expand_indexqual_rowcompare(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index,
+ Oid expr_op,
+ bool var_on_left);
+static void match_pathkeys_to_index(IndexOptInfo *index, List *pathkeys,
+ List **orderby_clauses_p,
+ List **clause_columns_p);
+static Expr *match_clause_to_ordering_op(IndexOptInfo *index,
+ int indexcol, Expr *clause, Oid pk_opfamily);
+static bool ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel,
+ EquivalenceClass *ec, EquivalenceMember *em,
+ void *arg);
+
+
+/*
+ * create_index_paths()
+ * Generate all interesting index paths for the given relation.
+ * Candidate paths are added to the rel's pathlist (using add_path).
+ *
+ * To be considered for an index scan, an index must match one or more
+ * restriction clauses or join clauses from the query's qual condition,
+ * or match the query's ORDER BY condition, or have a predicate that
+ * matches the query's qual condition.
+ *
+ * There are two basic kinds of index scans. A "plain" index scan uses
+ * only restriction clauses (possibly none at all) in its indexqual,
+ * so it can be applied in any context. A "parameterized" index scan uses
+ * join clauses (plus restriction clauses, if available) in its indexqual.
+ * When joining such a scan to one of the relations supplying the other
+ * variables used in its indexqual, the parameterized scan must appear as
+ * the inner relation of a nestloop join; it can't be used on the outer side,
+ * nor in a merge or hash join. In that context, values for the other rels'
+ * attributes are available and fixed during any one scan of the indexpath.
+ *
+ * An IndexPath is generated and submitted to add_path() for each plain or
+ * parameterized index scan this routine deems potentially interesting for
+ * the current query.
+ *
+ * 'rel' is the relation for which we want to generate index paths
+ *
+ * Note: check_index_predicates() must have been run previously for this rel.
+ *
+ * Note: in cases involving LATERAL references in the relation's tlist, it's
+ * possible that rel->lateral_relids is nonempty. Currently, we include
+ * lateral_relids into the parameterization reported for each path, but don't
+ * take it into account otherwise. The fact that any such rels *must* be
+ * available as parameter sources perhaps should influence our choices of
+ * index quals ... but for now, it doesn't seem worth troubling over.
+ * In particular, comments below about "unparameterized" paths should be read
+ * as meaning "unparameterized so far as the indexquals are concerned".
+ */
+void
+create_index_paths(PlannerInfo *root, RelOptInfo *rel)
+{
+ List *indexpaths;
+ List *bitindexpaths;
+ List *bitjoinpaths;
+ List *joinorclauses;
+ IndexClauseSet rclauseset;
+ IndexClauseSet jclauseset;
+ IndexClauseSet eclauseset;
+ ListCell *lc;
+
+ /* Skip the whole mess if no indexes */
+ if (rel->indexlist == NIL)
+ return;
+
+ /* Bitmap paths are collected and then dealt with at the end */
+ bitindexpaths = bitjoinpaths = joinorclauses = NIL;
+
+ /* Examine each index in turn */
+ foreach(lc, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+
+ /* Protect limited-size array in IndexClauseSets */
+ Assert(index->nkeycolumns <= INDEX_MAX_KEYS);
+
+ /*
+ * Ignore partial indexes that do not match the query.
+ * (generate_bitmap_or_paths() might be able to do something with
+ * them, but that's of no concern here.)
+ */
+ if (index->indpred != NIL && !index->predOK)
+ continue;
+
+ /*
+ * Identify the restriction clauses that can match the index.
+ */
+ MemSet(&rclauseset, 0, sizeof(rclauseset));
+ match_restriction_clauses_to_index(root, index, &rclauseset);
+
+ /*
+ * Build index paths from the restriction clauses. These will be
+ * non-parameterized paths. Plain paths go directly to add_path(),
+ * bitmap paths are added to bitindexpaths to be handled below.
+ */
+ get_index_paths(root, rel, index, &rclauseset,
+ &bitindexpaths);
+
+ /*
+ * Identify the join clauses that can match the index. For the moment
+ * we keep them separate from the restriction clauses. Note that this
+ * step finds only "loose" join clauses that have not been merged into
+ * EquivalenceClasses. Also, collect join OR clauses for later.
+ */
+ MemSet(&jclauseset, 0, sizeof(jclauseset));
+ match_join_clauses_to_index(root, rel, index,
+ &jclauseset, &joinorclauses);
+
+ /*
+ * Look for EquivalenceClasses that can generate joinclauses matching
+ * the index.
+ */
+ MemSet(&eclauseset, 0, sizeof(eclauseset));
+ match_eclass_clauses_to_index(root, index,
+ &eclauseset);
+
+ /*
+ * If we found any plain or eclass join clauses, build parameterized
+ * index paths using them.
+ */
+ if (jclauseset.nonempty || eclauseset.nonempty)
+ consider_index_join_clauses(root, rel, index,
+ &rclauseset,
+ &jclauseset,
+ &eclauseset,
+ &bitjoinpaths);
+ }
+
+ /*
+ * Generate BitmapOrPaths for any suitable OR-clauses present in the
+ * restriction list. Add these to bitindexpaths.
+ */
+ indexpaths = generate_bitmap_or_paths(root, rel,
+ rel->baserestrictinfo, NIL);
+ bitindexpaths = list_concat(bitindexpaths, indexpaths);
+
+ /*
+ * Likewise, generate BitmapOrPaths for any suitable OR-clauses present in
+ * the joinclause list. Add these to bitjoinpaths.
+ */
+ indexpaths = generate_bitmap_or_paths(root, rel,
+ joinorclauses, rel->baserestrictinfo);
+ bitjoinpaths = list_concat(bitjoinpaths, indexpaths);
+
+ /*
+ * If we found anything usable, generate a BitmapHeapPath for the most
+ * promising combination of restriction bitmap index paths. Note there
+ * will be only one such path no matter how many indexes exist. This
+ * should be sufficient since there's basically only one figure of merit
+ * (total cost) for such a path.
+ */
+ if (bitindexpaths != NIL)
+ {
+ Path *bitmapqual;
+ BitmapHeapPath *bpath;
+
+ bitmapqual = choose_bitmap_and(root, rel, bitindexpaths);
+ bpath = create_bitmap_heap_path(root, rel, bitmapqual,
+ rel->lateral_relids, 1.0, 0);
+ add_path(rel, (Path *) bpath);
+
+ /* create a partial bitmap heap path */
+ if (rel->consider_parallel && rel->lateral_relids == NULL)
+ create_partial_bitmap_paths(root, rel, bitmapqual);
+ }
+
+ /*
+ * Likewise, if we found anything usable, generate BitmapHeapPaths for the
+ * most promising combinations of join bitmap index paths. Our strategy
+ * is to generate one such path for each distinct parameterization seen
+ * among the available bitmap index paths. This may look pretty
+ * expensive, but usually there won't be very many distinct
+ * parameterizations. (This logic is quite similar to that in
+ * consider_index_join_clauses, but we're working with whole paths not
+ * individual clauses.)
+ */
+ if (bitjoinpaths != NIL)
+ {
+ List *all_path_outers;
+ ListCell *lc;
+
+ /* Identify each distinct parameterization seen in bitjoinpaths */
+ all_path_outers = NIL;
+ foreach(lc, bitjoinpaths)
+ {
+ Path *path = (Path *) lfirst(lc);
+ Relids required_outer = PATH_REQ_OUTER(path);
+
+ if (!bms_equal_any(required_outer, all_path_outers))
+ all_path_outers = lappend(all_path_outers, required_outer);
+ }
+
+ /* Now, for each distinct parameterization set ... */
+ foreach(lc, all_path_outers)
+ {
+ Relids max_outers = (Relids) lfirst(lc);
+ List *this_path_set;
+ Path *bitmapqual;
+ Relids required_outer;
+ double loop_count;
+ BitmapHeapPath *bpath;
+ ListCell *lcp;
+
+ /* Identify all the bitmap join paths needing no more than that */
+ this_path_set = NIL;
+ foreach(lcp, bitjoinpaths)
+ {
+ Path *path = (Path *) lfirst(lcp);
+
+ if (bms_is_subset(PATH_REQ_OUTER(path), max_outers))
+ this_path_set = lappend(this_path_set, path);
+ }
+
+ /*
+ * Add in restriction bitmap paths, since they can be used
+ * together with any join paths.
+ */
+ this_path_set = list_concat(this_path_set, bitindexpaths);
+
+ /* Select best AND combination for this parameterization */
+ bitmapqual = choose_bitmap_and(root, rel, this_path_set);
+
+ /* And push that path into the mix */
+ required_outer = PATH_REQ_OUTER(bitmapqual);
+ loop_count = get_loop_count(root, rel->relid, required_outer);
+ bpath = create_bitmap_heap_path(root, rel, bitmapqual,
+ required_outer, loop_count, 0);
+ add_path(rel, (Path *) bpath);
+ }
+ }
+}
+
+/*
+ * consider_index_join_clauses
+ * Given sets of join clauses for an index, decide which parameterized
+ * index paths to build.
+ *
+ * Plain indexpaths are sent directly to add_path, while potential
+ * bitmap indexpaths are added to *bitindexpaths for later processing.
+ *
+ * 'rel' is the index's heap relation
+ * 'index' is the index for which we want to generate paths
+ * 'rclauseset' is the collection of indexable restriction clauses
+ * 'jclauseset' is the collection of indexable simple join clauses
+ * 'eclauseset' is the collection of indexable clauses from EquivalenceClasses
+ * '*bitindexpaths' is the list to add bitmap paths to
+ */
+static void
+consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index,
+ IndexClauseSet *rclauseset,
+ IndexClauseSet *jclauseset,
+ IndexClauseSet *eclauseset,
+ List **bitindexpaths)
+{
+ int considered_clauses = 0;
+ List *considered_relids = NIL;
+ int indexcol;
+
+ /*
+ * The strategy here is to identify every potentially useful set of outer
+ * rels that can provide indexable join clauses. For each such set,
+ * select all the join clauses available from those outer rels, add on all
+ * the indexable restriction clauses, and generate plain and/or bitmap
+ * index paths for that set of clauses. This is based on the assumption
+ * that it's always better to apply a clause as an indexqual than as a
+ * filter (qpqual); which is where an available clause would end up being
+ * applied if we omit it from the indexquals.
+ *
+ * This looks expensive, but in most practical cases there won't be very
+ * many distinct sets of outer rels to consider. As a safety valve when
+ * that's not true, we use a heuristic: limit the number of outer rel sets
+ * considered to a multiple of the number of clauses considered. (We'll
+ * always consider using each individual join clause, though.)
+ *
+ * For simplicity in selecting relevant clauses, we represent each set of
+ * outer rels as a maximum set of clause_relids --- that is, the indexed
+ * relation itself is also included in the relids set. considered_relids
+ * lists all relids sets we've already tried.
+ */
+ for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++)
+ {
+ /* Consider each applicable simple join clause */
+ considered_clauses += list_length(jclauseset->indexclauses[indexcol]);
+ consider_index_join_outer_rels(root, rel, index,
+ rclauseset, jclauseset, eclauseset,
+ bitindexpaths,
+ jclauseset->indexclauses[indexcol],
+ considered_clauses,
+ &considered_relids);
+ /* Consider each applicable eclass join clause */
+ considered_clauses += list_length(eclauseset->indexclauses[indexcol]);
+ consider_index_join_outer_rels(root, rel, index,
+ rclauseset, jclauseset, eclauseset,
+ bitindexpaths,
+ eclauseset->indexclauses[indexcol],
+ considered_clauses,
+ &considered_relids);
+ }
+}
+
+/*
+ * consider_index_join_outer_rels
+ * Generate parameterized paths based on clause relids in the clause list.
+ *
+ * Workhorse for consider_index_join_clauses; see notes therein for rationale.
+ *
+ * 'rel', 'index', 'rclauseset', 'jclauseset', 'eclauseset', and
+ * 'bitindexpaths' as above
+ * 'indexjoinclauses' is a list of IndexClauses for join clauses
+ * 'considered_clauses' is the total number of clauses considered (so far)
+ * '*considered_relids' is a list of all relids sets already considered
+ */
+static void
+consider_index_join_outer_rels(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index,
+ IndexClauseSet *rclauseset,
+ IndexClauseSet *jclauseset,
+ IndexClauseSet *eclauseset,
+ List **bitindexpaths,
+ List *indexjoinclauses,
+ int considered_clauses,
+ List **considered_relids)
+{
+ ListCell *lc;
+
+ /* Examine relids of each joinclause in the given list */
+ foreach(lc, indexjoinclauses)
+ {
+ IndexClause *iclause = (IndexClause *) lfirst(lc);
+ Relids clause_relids = iclause->rinfo->clause_relids;
+ EquivalenceClass *parent_ec = iclause->rinfo->parent_ec;
+ int num_considered_relids;
+
+ /* If we already tried its relids set, no need to do so again */
+ if (bms_equal_any(clause_relids, *considered_relids))
+ continue;
+
+ /*
+ * Generate the union of this clause's relids set with each
+ * previously-tried set. This ensures we try this clause along with
+ * every interesting subset of previous clauses. However, to avoid
+ * exponential growth of planning time when there are many clauses,
+ * limit the number of relid sets accepted to 10 * considered_clauses.
+ *
+ * Note: get_join_index_paths appends entries to *considered_relids,
+ * but we do not need to visit such newly-added entries within this
+ * loop, so we don't use foreach() here. No real harm would be done
+ * if we did visit them, since the subset check would reject them; but
+ * it would waste some cycles.
+ */
+ num_considered_relids = list_length(*considered_relids);
+ for (int pos = 0; pos < num_considered_relids; pos++)
+ {
+ Relids oldrelids = (Relids) list_nth(*considered_relids, pos);
+
+ /*
+ * If either is a subset of the other, no new set is possible.
+ * This isn't a complete test for redundancy, but it's easy and
+ * cheap. get_join_index_paths will check more carefully if we
+ * already generated the same relids set.
+ */
+ if (bms_subset_compare(clause_relids, oldrelids) != BMS_DIFFERENT)
+ continue;
+
+ /*
+ * If this clause was derived from an equivalence class, the
+ * clause list may contain other clauses derived from the same
+ * eclass. We should not consider that combining this clause with
+ * one of those clauses generates a usefully different
+ * parameterization; so skip if any clause derived from the same
+ * eclass would already have been included when using oldrelids.
+ */
+ if (parent_ec &&
+ eclass_already_used(parent_ec, oldrelids,
+ indexjoinclauses))
+ continue;
+
+ /*
+ * If the number of relid sets considered exceeds our heuristic
+ * limit, stop considering combinations of clauses. We'll still
+ * consider the current clause alone, though (below this loop).
+ */
+ if (list_length(*considered_relids) >= 10 * considered_clauses)
+ break;
+
+ /* OK, try the union set */
+ get_join_index_paths(root, rel, index,
+ rclauseset, jclauseset, eclauseset,
+ bitindexpaths,
+ bms_union(clause_relids, oldrelids),
+ considered_relids);
+ }
+
+ /* Also try this set of relids by itself */
+ get_join_index_paths(root, rel, index,
+ rclauseset, jclauseset, eclauseset,
+ bitindexpaths,
+ clause_relids,
+ considered_relids);
+ }
+}
+
+/*
+ * get_join_index_paths
+ * Generate index paths using clauses from the specified outer relations.
+ * In addition to generating paths, relids is added to *considered_relids
+ * if not already present.
+ *
+ * Workhorse for consider_index_join_clauses; see notes therein for rationale.
+ *
+ * 'rel', 'index', 'rclauseset', 'jclauseset', 'eclauseset',
+ * 'bitindexpaths', 'considered_relids' as above
+ * 'relids' is the current set of relids to consider (the target rel plus
+ * one or more outer rels)
+ */
+static void
+get_join_index_paths(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index,
+ IndexClauseSet *rclauseset,
+ IndexClauseSet *jclauseset,
+ IndexClauseSet *eclauseset,
+ List **bitindexpaths,
+ Relids relids,
+ List **considered_relids)
+{
+ IndexClauseSet clauseset;
+ int indexcol;
+
+ /* If we already considered this relids set, don't repeat the work */
+ if (bms_equal_any(relids, *considered_relids))
+ return;
+
+ /* Identify indexclauses usable with this relids set */
+ MemSet(&clauseset, 0, sizeof(clauseset));
+
+ for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++)
+ {
+ ListCell *lc;
+
+ /* First find applicable simple join clauses */
+ foreach(lc, jclauseset->indexclauses[indexcol])
+ {
+ IndexClause *iclause = (IndexClause *) lfirst(lc);
+
+ if (bms_is_subset(iclause->rinfo->clause_relids, relids))
+ clauseset.indexclauses[indexcol] =
+ lappend(clauseset.indexclauses[indexcol], iclause);
+ }
+
+ /*
+ * Add applicable eclass join clauses. The clauses generated for each
+ * column are redundant (cf generate_implied_equalities_for_column),
+ * so we need at most one. This is the only exception to the general
+ * rule of using all available index clauses.
+ */
+ foreach(lc, eclauseset->indexclauses[indexcol])
+ {
+ IndexClause *iclause = (IndexClause *) lfirst(lc);
+
+ if (bms_is_subset(iclause->rinfo->clause_relids, relids))
+ {
+ clauseset.indexclauses[indexcol] =
+ lappend(clauseset.indexclauses[indexcol], iclause);
+ break;
+ }
+ }
+
+ /* Add restriction clauses */
+ clauseset.indexclauses[indexcol] =
+ list_concat(clauseset.indexclauses[indexcol],
+ rclauseset->indexclauses[indexcol]);
+
+ if (clauseset.indexclauses[indexcol] != NIL)
+ clauseset.nonempty = true;
+ }
+
+ /* We should have found something, else caller passed silly relids */
+ Assert(clauseset.nonempty);
+
+ /* Build index path(s) using the collected set of clauses */
+ get_index_paths(root, rel, index, &clauseset, bitindexpaths);
+
+ /*
+ * Remember we considered paths for this set of relids.
+ */
+ *considered_relids = lappend(*considered_relids, relids);
+}
+
+/*
+ * eclass_already_used
+ * True if any join clause usable with oldrelids was generated from
+ * the specified equivalence class.
+ */
+static bool
+eclass_already_used(EquivalenceClass *parent_ec, Relids oldrelids,
+ List *indexjoinclauses)
+{
+ ListCell *lc;
+
+ foreach(lc, indexjoinclauses)
+ {
+ IndexClause *iclause = (IndexClause *) lfirst(lc);
+ RestrictInfo *rinfo = iclause->rinfo;
+
+ if (rinfo->parent_ec == parent_ec &&
+ bms_is_subset(rinfo->clause_relids, oldrelids))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * bms_equal_any
+ * True if relids is bms_equal to any member of relids_list
+ *
+ * Perhaps this should be in bitmapset.c someday.
+ */
+static bool
+bms_equal_any(Relids relids, List *relids_list)
+{
+ ListCell *lc;
+
+ foreach(lc, relids_list)
+ {
+ if (bms_equal(relids, (Relids) lfirst(lc)))
+ return true;
+ }
+ return false;
+}
+
+
+/*
+ * get_index_paths
+ * Given an index and a set of index clauses for it, construct IndexPaths.
+ *
+ * Plain indexpaths are sent directly to add_path, while potential
+ * bitmap indexpaths are added to *bitindexpaths for later processing.
+ *
+ * This is a fairly simple frontend to build_index_paths(). Its reason for
+ * existence is mainly to handle ScalarArrayOpExpr quals properly. If the
+ * index AM supports them natively, we should just include them in simple
+ * index paths. If not, we should exclude them while building simple index
+ * paths, and then make a separate attempt to include them in bitmap paths.
+ * Furthermore, we should consider excluding lower-order ScalarArrayOpExpr
+ * quals so as to create ordered paths.
+ */
+static void
+get_index_paths(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index, IndexClauseSet *clauses,
+ List **bitindexpaths)
+{
+ List *indexpaths;
+ bool skip_nonnative_saop = false;
+ bool skip_lower_saop = false;
+ ListCell *lc;
+
+ /*
+ * Build simple index paths using the clauses. Allow ScalarArrayOpExpr
+ * clauses only if the index AM supports them natively, and skip any such
+ * clauses for index columns after the first (so that we produce ordered
+ * paths if possible).
+ */
+ indexpaths = build_index_paths(root, rel,
+ index, clauses,
+ index->predOK,
+ ST_ANYSCAN,
+ &skip_nonnative_saop,
+ &skip_lower_saop);
+
+ /*
+ * If we skipped any lower-order ScalarArrayOpExprs on an index with an AM
+ * that supports them, then try again including those clauses. This will
+ * produce paths with more selectivity but no ordering.
+ */
+ if (skip_lower_saop)
+ {
+ indexpaths = list_concat(indexpaths,
+ build_index_paths(root, rel,
+ index, clauses,
+ index->predOK,
+ ST_ANYSCAN,
+ &skip_nonnative_saop,
+ NULL));
+ }
+
+ /*
+ * Submit all the ones that can form plain IndexScan plans to add_path. (A
+ * plain IndexPath can represent either a plain IndexScan or an
+ * IndexOnlyScan, but for our purposes here that distinction does not
+ * matter. However, some of the indexes might support only bitmap scans,
+ * and those we mustn't submit to add_path here.)
+ *
+ * Also, pick out the ones that are usable as bitmap scans. For that, we
+ * must discard indexes that don't support bitmap scans, and we also are
+ * only interested in paths that have some selectivity; we should discard
+ * anything that was generated solely for ordering purposes.
+ */
+ foreach(lc, indexpaths)
+ {
+ IndexPath *ipath = (IndexPath *) lfirst(lc);
+
+ if (index->amhasgettuple)
+ add_path(rel, (Path *) ipath);
+
+ if (index->amhasgetbitmap &&
+ (ipath->path.pathkeys == NIL ||
+ ipath->indexselectivity < 1.0))
+ *bitindexpaths = lappend(*bitindexpaths, ipath);
+ }
+
+ /*
+ * If there were ScalarArrayOpExpr clauses that the index can't handle
+ * natively, generate bitmap scan paths relying on executor-managed
+ * ScalarArrayOpExpr.
+ */
+ if (skip_nonnative_saop)
+ {
+ indexpaths = build_index_paths(root, rel,
+ index, clauses,
+ false,
+ ST_BITMAPSCAN,
+ NULL,
+ NULL);
+ *bitindexpaths = list_concat(*bitindexpaths, indexpaths);
+ }
+}
+
+/*
+ * build_index_paths
+ * Given an index and a set of index clauses for it, construct zero
+ * or more IndexPaths. It also constructs zero or more partial IndexPaths.
+ *
+ * We return a list of paths because (1) this routine checks some cases
+ * that should cause us to not generate any IndexPath, and (2) in some
+ * cases we want to consider both a forward and a backward scan, so as
+ * to obtain both sort orders. Note that the paths are just returned
+ * to the caller and not immediately fed to add_path().
+ *
+ * At top level, useful_predicate should be exactly the index's predOK flag
+ * (ie, true if it has a predicate that was proven from the restriction
+ * clauses). When working on an arm of an OR clause, useful_predicate
+ * should be true if the predicate required the current OR list to be proven.
+ * Note that this routine should never be called at all if the index has an
+ * unprovable predicate.
+ *
+ * scantype indicates whether we want to create plain indexscans, bitmap
+ * indexscans, or both. When it's ST_BITMAPSCAN, we will not consider
+ * index ordering while deciding if a Path is worth generating.
+ *
+ * If skip_nonnative_saop is non-NULL, we ignore ScalarArrayOpExpr clauses
+ * unless the index AM supports them directly, and we set *skip_nonnative_saop
+ * to true if we found any such clauses (caller must initialize the variable
+ * to false). If it's NULL, we do not ignore ScalarArrayOpExpr clauses.
+ *
+ * If skip_lower_saop is non-NULL, we ignore ScalarArrayOpExpr clauses for
+ * non-first index columns, and we set *skip_lower_saop to true if we found
+ * any such clauses (caller must initialize the variable to false). If it's
+ * NULL, we do not ignore non-first ScalarArrayOpExpr clauses, but they will
+ * result in considering the scan's output to be unordered.
+ *
+ * 'rel' is the index's heap relation
+ * 'index' is the index for which we want to generate paths
+ * 'clauses' is the collection of indexable clauses (IndexClause nodes)
+ * 'useful_predicate' indicates whether the index has a useful predicate
+ * 'scantype' indicates whether we need plain or bitmap scan support
+ * 'skip_nonnative_saop' indicates whether to accept SAOP if index AM doesn't
+ * 'skip_lower_saop' indicates whether to accept non-first-column SAOP
+ */
+static List *
+build_index_paths(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index, IndexClauseSet *clauses,
+ bool useful_predicate,
+ ScanTypeControl scantype,
+ bool *skip_nonnative_saop,
+ bool *skip_lower_saop)
+{
+ List *result = NIL;
+ IndexPath *ipath;
+ List *index_clauses;
+ Relids outer_relids;
+ double loop_count;
+ List *orderbyclauses;
+ List *orderbyclausecols;
+ List *index_pathkeys;
+ List *useful_pathkeys;
+ bool found_lower_saop_clause;
+ bool pathkeys_possibly_useful;
+ bool index_is_ordered;
+ bool index_only_scan;
+ int indexcol;
+
+ /*
+ * Check that index supports the desired scan type(s)
+ */
+ switch (scantype)
+ {
+ case ST_INDEXSCAN:
+ if (!index->amhasgettuple)
+ return NIL;
+ break;
+ case ST_BITMAPSCAN:
+ if (!index->amhasgetbitmap)
+ return NIL;
+ break;
+ case ST_ANYSCAN:
+ /* either or both are OK */
+ break;
+ }
+
+ /*
+ * 1. Combine the per-column IndexClause lists into an overall list.
+ *
+ * In the resulting list, clauses are ordered by index key, so that the
+ * column numbers form a nondecreasing sequence. (This order is depended
+ * on by btree and possibly other places.) The list can be empty, if the
+ * index AM allows that.
+ *
+ * found_lower_saop_clause is set true if we accept a ScalarArrayOpExpr
+ * index clause for a non-first index column. This prevents us from
+ * assuming that the scan result is ordered. (Actually, the result is
+ * still ordered if there are equality constraints for all earlier
+ * columns, but it seems too expensive and non-modular for this code to be
+ * aware of that refinement.)
+ *
+ * We also build a Relids set showing which outer rels are required by the
+ * selected clauses. Any lateral_relids are included in that, but not
+ * otherwise accounted for.
+ */
+ index_clauses = NIL;
+ found_lower_saop_clause = false;
+ outer_relids = bms_copy(rel->lateral_relids);
+ for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++)
+ {
+ ListCell *lc;
+
+ foreach(lc, clauses->indexclauses[indexcol])
+ {
+ IndexClause *iclause = (IndexClause *) lfirst(lc);
+ RestrictInfo *rinfo = iclause->rinfo;
+
+ /* We might need to omit ScalarArrayOpExpr clauses */
+ if (IsA(rinfo->clause, ScalarArrayOpExpr))
+ {
+ if (!index->amsearcharray)
+ {
+ if (skip_nonnative_saop)
+ {
+ /* Ignore because not supported by index */
+ *skip_nonnative_saop = true;
+ continue;
+ }
+ /* Caller had better intend this only for bitmap scan */
+ Assert(scantype == ST_BITMAPSCAN);
+ }
+ if (indexcol > 0)
+ {
+ if (skip_lower_saop)
+ {
+ /* Caller doesn't want to lose index ordering */
+ *skip_lower_saop = true;
+ continue;
+ }
+ found_lower_saop_clause = true;
+ }
+ }
+
+ /* OK to include this clause */
+ index_clauses = lappend(index_clauses, iclause);
+ outer_relids = bms_add_members(outer_relids,
+ rinfo->clause_relids);
+ }
+
+ /*
+ * If no clauses match the first index column, check for amoptionalkey
+ * restriction. We can't generate a scan over an index with
+ * amoptionalkey = false unless there's at least one index clause.
+ * (When working on columns after the first, this test cannot fail. It
+ * is always okay for columns after the first to not have any
+ * clauses.)
+ */
+ if (index_clauses == NIL && !index->amoptionalkey)
+ return NIL;
+ }
+
+ /* We do not want the index's rel itself listed in outer_relids */
+ outer_relids = bms_del_member(outer_relids, rel->relid);
+ /* Enforce convention that outer_relids is exactly NULL if empty */
+ if (bms_is_empty(outer_relids))
+ outer_relids = NULL;
+
+ /* Compute loop_count for cost estimation purposes */
+ loop_count = get_loop_count(root, rel->relid, outer_relids);
+
+ /*
+ * 2. Compute pathkeys describing index's ordering, if any, then see how
+ * many of them are actually useful for this query. This is not relevant
+ * if we are only trying to build bitmap indexscans, nor if we have to
+ * assume the scan is unordered.
+ */
+ pathkeys_possibly_useful = (scantype != ST_BITMAPSCAN &&
+ !found_lower_saop_clause &&
+ has_useful_pathkeys(root, rel));
+ index_is_ordered = (index->sortopfamily != NULL);
+ if (index_is_ordered && pathkeys_possibly_useful)
+ {
+ index_pathkeys = build_index_pathkeys(root, index,
+ ForwardScanDirection);
+ useful_pathkeys = truncate_useless_pathkeys(root, rel,
+ index_pathkeys);
+ orderbyclauses = NIL;
+ orderbyclausecols = NIL;
+ }
+ else if (index->amcanorderbyop && pathkeys_possibly_useful)
+ {
+ /* see if we can generate ordering operators for query_pathkeys */
+ match_pathkeys_to_index(index, root->query_pathkeys,
+ &orderbyclauses,
+ &orderbyclausecols);
+ if (orderbyclauses)
+ useful_pathkeys = root->query_pathkeys;
+ else
+ useful_pathkeys = NIL;
+ }
+ else
+ {
+ useful_pathkeys = NIL;
+ orderbyclauses = NIL;
+ orderbyclausecols = NIL;
+ }
+
+ /*
+ * 3. Check if an index-only scan is possible. If we're not building
+ * plain indexscans, this isn't relevant since bitmap scans don't support
+ * index data retrieval anyway.
+ */
+ index_only_scan = (scantype != ST_BITMAPSCAN &&
+ check_index_only(rel, index));
+
+ /*
+ * 4. Generate an indexscan path if there are relevant restriction clauses
+ * in the current clauses, OR the index ordering is potentially useful for
+ * later merging or final output ordering, OR the index has a useful
+ * predicate, OR an index-only scan is possible.
+ */
+ if (index_clauses != NIL || useful_pathkeys != NIL || useful_predicate ||
+ index_only_scan)
+ {
+ ipath = create_index_path(root, index,
+ index_clauses,
+ orderbyclauses,
+ orderbyclausecols,
+ useful_pathkeys,
+ index_is_ordered ?
+ ForwardScanDirection :
+ NoMovementScanDirection,
+ index_only_scan,
+ outer_relids,
+ loop_count,
+ false);
+ result = lappend(result, ipath);
+
+ /*
+ * If appropriate, consider parallel index scan. We don't allow
+ * parallel index scan for bitmap index scans.
+ */
+ if (index->amcanparallel &&
+ rel->consider_parallel && outer_relids == NULL &&
+ scantype != ST_BITMAPSCAN)
+ {
+ ipath = create_index_path(root, index,
+ index_clauses,
+ orderbyclauses,
+ orderbyclausecols,
+ useful_pathkeys,
+ index_is_ordered ?
+ ForwardScanDirection :
+ NoMovementScanDirection,
+ index_only_scan,
+ outer_relids,
+ loop_count,
+ true);
+
+ /*
+ * if, after costing the path, we find that it's not worth using
+ * parallel workers, just free it.
+ */
+ if (ipath->path.parallel_workers > 0)
+ add_partial_path(rel, (Path *) ipath);
+ else
+ pfree(ipath);
+ }
+ }
+
+ /*
+ * 5. If the index is ordered, a backwards scan might be interesting.
+ */
+ if (index_is_ordered && pathkeys_possibly_useful)
+ {
+ index_pathkeys = build_index_pathkeys(root, index,
+ BackwardScanDirection);
+ useful_pathkeys = truncate_useless_pathkeys(root, rel,
+ index_pathkeys);
+ if (useful_pathkeys != NIL)
+ {
+ ipath = create_index_path(root, index,
+ index_clauses,
+ NIL,
+ NIL,
+ useful_pathkeys,
+ BackwardScanDirection,
+ index_only_scan,
+ outer_relids,
+ loop_count,
+ false);
+ result = lappend(result, ipath);
+
+ /* If appropriate, consider parallel index scan */
+ if (index->amcanparallel &&
+ rel->consider_parallel && outer_relids == NULL &&
+ scantype != ST_BITMAPSCAN)
+ {
+ ipath = create_index_path(root, index,
+ index_clauses,
+ NIL,
+ NIL,
+ useful_pathkeys,
+ BackwardScanDirection,
+ index_only_scan,
+ outer_relids,
+ loop_count,
+ true);
+
+ /*
+ * if, after costing the path, we find that it's not worth
+ * using parallel workers, just free it.
+ */
+ if (ipath->path.parallel_workers > 0)
+ add_partial_path(rel, (Path *) ipath);
+ else
+ pfree(ipath);
+ }
+ }
+ }
+
+ return result;
+}
+
+/*
+ * build_paths_for_OR
+ * Given a list of restriction clauses from one arm of an OR clause,
+ * construct all matching IndexPaths for the relation.
+ *
+ * Here we must scan all indexes of the relation, since a bitmap OR tree
+ * can use multiple indexes.
+ *
+ * The caller actually supplies two lists of restriction clauses: some
+ * "current" ones and some "other" ones. Both lists can be used freely
+ * to match keys of the index, but an index must use at least one of the
+ * "current" clauses to be considered usable. The motivation for this is
+ * examples like
+ * WHERE (x = 42) AND (... OR (y = 52 AND z = 77) OR ....)
+ * While we are considering the y/z subclause of the OR, we can use "x = 42"
+ * as one of the available index conditions; but we shouldn't match the
+ * subclause to any index on x alone, because such a Path would already have
+ * been generated at the upper level. So we could use an index on x,y,z
+ * or an index on x,y for the OR subclause, but not an index on just x.
+ * When dealing with a partial index, a match of the index predicate to
+ * one of the "current" clauses also makes the index usable.
+ *
+ * 'rel' is the relation for which we want to generate index paths
+ * 'clauses' is the current list of clauses (RestrictInfo nodes)
+ * 'other_clauses' is the list of additional upper-level clauses
+ */
+static List *
+build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
+ List *clauses, List *other_clauses)
+{
+ List *result = NIL;
+ List *all_clauses = NIL; /* not computed till needed */
+ ListCell *lc;
+
+ foreach(lc, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+ IndexClauseSet clauseset;
+ List *indexpaths;
+ bool useful_predicate;
+
+ /* Ignore index if it doesn't support bitmap scans */
+ if (!index->amhasgetbitmap)
+ continue;
+
+ /*
+ * Ignore partial indexes that do not match the query. If a partial
+ * index is marked predOK then we know it's OK. Otherwise, we have to
+ * test whether the added clauses are sufficient to imply the
+ * predicate. If so, we can use the index in the current context.
+ *
+ * We set useful_predicate to true iff the predicate was proven using
+ * the current set of clauses. This is needed to prevent matching a
+ * predOK index to an arm of an OR, which would be a legal but
+ * pointlessly inefficient plan. (A better plan will be generated by
+ * just scanning the predOK index alone, no OR.)
+ */
+ useful_predicate = false;
+ if (index->indpred != NIL)
+ {
+ if (index->predOK)
+ {
+ /* Usable, but don't set useful_predicate */
+ }
+ else
+ {
+ /* Form all_clauses if not done already */
+ if (all_clauses == NIL)
+ all_clauses = list_concat_copy(clauses, other_clauses);
+
+ if (!predicate_implied_by(index->indpred, all_clauses, false))
+ continue; /* can't use it at all */
+
+ if (!predicate_implied_by(index->indpred, other_clauses, false))
+ useful_predicate = true;
+ }
+ }
+
+ /*
+ * Identify the restriction clauses that can match the index.
+ */
+ MemSet(&clauseset, 0, sizeof(clauseset));
+ match_clauses_to_index(root, clauses, index, &clauseset);
+
+ /*
+ * If no matches so far, and the index predicate isn't useful, we
+ * don't want it.
+ */
+ if (!clauseset.nonempty && !useful_predicate)
+ continue;
+
+ /*
+ * Add "other" restriction clauses to the clauseset.
+ */
+ match_clauses_to_index(root, other_clauses, index, &clauseset);
+
+ /*
+ * Construct paths if possible.
+ */
+ indexpaths = build_index_paths(root, rel,
+ index, &clauseset,
+ useful_predicate,
+ ST_BITMAPSCAN,
+ NULL,
+ NULL);
+ result = list_concat(result, indexpaths);
+ }
+
+ return result;
+}
+
+/*
+ * generate_bitmap_or_paths
+ * Look through the list of clauses to find OR clauses, and generate
+ * a BitmapOrPath for each one we can handle that way. Return a list
+ * of the generated BitmapOrPaths.
+ *
+ * other_clauses is a list of additional clauses that can be assumed true
+ * for the purpose of generating indexquals, but are not to be searched for
+ * ORs. (See build_paths_for_OR() for motivation.)
+ */
+static List *
+generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
+ List *clauses, List *other_clauses)
+{
+ List *result = NIL;
+ List *all_clauses;
+ ListCell *lc;
+
+ /*
+ * We can use both the current and other clauses as context for
+ * build_paths_for_OR; no need to remove ORs from the lists.
+ */
+ all_clauses = list_concat_copy(clauses, other_clauses);
+
+ foreach(lc, clauses)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+ List *pathlist;
+ Path *bitmapqual;
+ ListCell *j;
+
+ /* Ignore RestrictInfos that aren't ORs */
+ if (!restriction_is_or_clause(rinfo))
+ continue;
+
+ /*
+ * We must be able to match at least one index to each of the arms of
+ * the OR, else we can't use it.
+ */
+ pathlist = NIL;
+ foreach(j, ((BoolExpr *) rinfo->orclause)->args)
+ {
+ Node *orarg = (Node *) lfirst(j);
+ List *indlist;
+
+ /* OR arguments should be ANDs or sub-RestrictInfos */
+ if (is_andclause(orarg))
+ {
+ List *andargs = ((BoolExpr *) orarg)->args;
+
+ indlist = build_paths_for_OR(root, rel,
+ andargs,
+ all_clauses);
+
+ /* Recurse in case there are sub-ORs */
+ indlist = list_concat(indlist,
+ generate_bitmap_or_paths(root, rel,
+ andargs,
+ all_clauses));
+ }
+ else
+ {
+ RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
+ List *orargs;
+
+ Assert(!restriction_is_or_clause(rinfo));
+ orargs = list_make1(rinfo);
+
+ indlist = build_paths_for_OR(root, rel,
+ orargs,
+ all_clauses);
+ }
+
+ /*
+ * If nothing matched this arm, we can't do anything with this OR
+ * clause.
+ */
+ if (indlist == NIL)
+ {
+ pathlist = NIL;
+ break;
+ }
+
+ /*
+ * OK, pick the most promising AND combination, and add it to
+ * pathlist.
+ */
+ bitmapqual = choose_bitmap_and(root, rel, indlist);
+ pathlist = lappend(pathlist, bitmapqual);
+ }
+
+ /*
+ * If we have a match for every arm, then turn them into a
+ * BitmapOrPath, and add to result list.
+ */
+ if (pathlist != NIL)
+ {
+ bitmapqual = (Path *) create_bitmap_or_path(root, rel, pathlist);
+ result = lappend(result, bitmapqual);
+ }
+ }
+
+ return result;
+}
+
+
+/*
+ * choose_bitmap_and
+ * Given a nonempty list of bitmap paths, AND them into one path.
+ *
+ * This is a nontrivial decision since we can legally use any subset of the
+ * given path set. We want to choose a good tradeoff between selectivity
+ * and cost of computing the bitmap.
+ *
+ * The result is either a single one of the inputs, or a BitmapAndPath
+ * combining multiple inputs.
+ */
+static Path *
+choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths)
+{
+ int npaths = list_length(paths);
+ PathClauseUsage **pathinfoarray;
+ PathClauseUsage *pathinfo;
+ List *clauselist;
+ List *bestpaths = NIL;
+ Cost bestcost = 0;
+ int i,
+ j;
+ ListCell *l;
+
+ Assert(npaths > 0); /* else caller error */
+ if (npaths == 1)
+ return (Path *) linitial(paths); /* easy case */
+
+ /*
+ * In theory we should consider every nonempty subset of the given paths.
+ * In practice that seems like overkill, given the crude nature of the
+ * estimates, not to mention the possible effects of higher-level AND and
+ * OR clauses. Moreover, it's completely impractical if there are a large
+ * number of paths, since the work would grow as O(2^N).
+ *
+ * As a heuristic, we first check for paths using exactly the same sets of
+ * WHERE clauses + index predicate conditions, and reject all but the
+ * cheapest-to-scan in any such group. This primarily gets rid of indexes
+ * that include the interesting columns but also irrelevant columns. (In
+ * situations where the DBA has gone overboard on creating variant
+ * indexes, this can make for a very large reduction in the number of
+ * paths considered further.)
+ *
+ * We then sort the surviving paths with the cheapest-to-scan first, and
+ * for each path, consider using that path alone as the basis for a bitmap
+ * scan. Then we consider bitmap AND scans formed from that path plus
+ * each subsequent (higher-cost) path, adding on a subsequent path if it
+ * results in a reduction in the estimated total scan cost. This means we
+ * consider about O(N^2) rather than O(2^N) path combinations, which is
+ * quite tolerable, especially given than N is usually reasonably small
+ * because of the prefiltering step. The cheapest of these is returned.
+ *
+ * We will only consider AND combinations in which no two indexes use the
+ * same WHERE clause. This is a bit of a kluge: it's needed because
+ * costsize.c and clausesel.c aren't very smart about redundant clauses.
+ * They will usually double-count the redundant clauses, producing a
+ * too-small selectivity that makes a redundant AND step look like it
+ * reduces the total cost. Perhaps someday that code will be smarter and
+ * we can remove this limitation. (But note that this also defends
+ * against flat-out duplicate input paths, which can happen because
+ * match_join_clauses_to_index will find the same OR join clauses that
+ * extract_restriction_or_clauses has pulled OR restriction clauses out
+ * of.)
+ *
+ * For the same reason, we reject AND combinations in which an index
+ * predicate clause duplicates another clause. Here we find it necessary
+ * to be even stricter: we'll reject a partial index if any of its
+ * predicate clauses are implied by the set of WHERE clauses and predicate
+ * clauses used so far. This covers cases such as a condition "x = 42"
+ * used with a plain index, followed by a clauseless scan of a partial
+ * index "WHERE x >= 40 AND x < 50". The partial index has been accepted
+ * only because "x = 42" was present, and so allowing it would partially
+ * double-count selectivity. (We could use predicate_implied_by on
+ * regular qual clauses too, to have a more intelligent, but much more
+ * expensive, check for redundancy --- but in most cases simple equality
+ * seems to suffice.)
+ */
+
+ /*
+ * Extract clause usage info and detect any paths that use exactly the
+ * same set of clauses; keep only the cheapest-to-scan of any such groups.
+ * The surviving paths are put into an array for qsort'ing.
+ */
+ pathinfoarray = (PathClauseUsage **)
+ palloc(npaths * sizeof(PathClauseUsage *));
+ clauselist = NIL;
+ npaths = 0;
+ foreach(l, paths)
+ {
+ Path *ipath = (Path *) lfirst(l);
+
+ pathinfo = classify_index_clause_usage(ipath, &clauselist);
+
+ /* If it's unclassifiable, treat it as distinct from all others */
+ if (pathinfo->unclassifiable)
+ {
+ pathinfoarray[npaths++] = pathinfo;
+ continue;
+ }
+
+ for (i = 0; i < npaths; i++)
+ {
+ if (!pathinfoarray[i]->unclassifiable &&
+ bms_equal(pathinfo->clauseids, pathinfoarray[i]->clauseids))
+ break;
+ }
+ if (i < npaths)
+ {
+ /* duplicate clauseids, keep the cheaper one */
+ Cost ncost;
+ Cost ocost;
+ Selectivity nselec;
+ Selectivity oselec;
+
+ cost_bitmap_tree_node(pathinfo->path, &ncost, &nselec);
+ cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, &oselec);
+ if (ncost < ocost)
+ pathinfoarray[i] = pathinfo;
+ }
+ else
+ {
+ /* not duplicate clauseids, add to array */
+ pathinfoarray[npaths++] = pathinfo;
+ }
+ }
+
+ /* If only one surviving path, we're done */
+ if (npaths == 1)
+ return pathinfoarray[0]->path;
+
+ /* Sort the surviving paths by index access cost */
+ qsort(pathinfoarray, npaths, sizeof(PathClauseUsage *),
+ path_usage_comparator);
+
+ /*
+ * For each surviving index, consider it as an "AND group leader", and see
+ * whether adding on any of the later indexes results in an AND path with
+ * cheaper total cost than before. Then take the cheapest AND group.
+ *
+ * Note: paths that are either clauseless or unclassifiable will have
+ * empty clauseids, so that they will not be rejected by the clauseids
+ * filter here, nor will they cause later paths to be rejected by it.
+ */
+ for (i = 0; i < npaths; i++)
+ {
+ Cost costsofar;
+ List *qualsofar;
+ Bitmapset *clauseidsofar;
+
+ pathinfo = pathinfoarray[i];
+ paths = list_make1(pathinfo->path);
+ costsofar = bitmap_scan_cost_est(root, rel, pathinfo->path);
+ qualsofar = list_concat_copy(pathinfo->quals, pathinfo->preds);
+ clauseidsofar = bms_copy(pathinfo->clauseids);
+
+ for (j = i + 1; j < npaths; j++)
+ {
+ Cost newcost;
+
+ pathinfo = pathinfoarray[j];
+ /* Check for redundancy */
+ if (bms_overlap(pathinfo->clauseids, clauseidsofar))
+ continue; /* consider it redundant */
+ if (pathinfo->preds)
+ {
+ bool redundant = false;
+
+ /* we check each predicate clause separately */
+ foreach(l, pathinfo->preds)
+ {
+ Node *np = (Node *) lfirst(l);
+
+ if (predicate_implied_by(list_make1(np), qualsofar, false))
+ {
+ redundant = true;
+ break; /* out of inner foreach loop */
+ }
+ }
+ if (redundant)
+ continue;
+ }
+ /* tentatively add new path to paths, so we can estimate cost */
+ paths = lappend(paths, pathinfo->path);
+ newcost = bitmap_and_cost_est(root, rel, paths);
+ if (newcost < costsofar)
+ {
+ /* keep new path in paths, update subsidiary variables */
+ costsofar = newcost;
+ qualsofar = list_concat(qualsofar, pathinfo->quals);
+ qualsofar = list_concat(qualsofar, pathinfo->preds);
+ clauseidsofar = bms_add_members(clauseidsofar,
+ pathinfo->clauseids);
+ }
+ else
+ {
+ /* reject new path, remove it from paths list */
+ paths = list_truncate(paths, list_length(paths) - 1);
+ }
+ }
+
+ /* Keep the cheapest AND-group (or singleton) */
+ if (i == 0 || costsofar < bestcost)
+ {
+ bestpaths = paths;
+ bestcost = costsofar;
+ }
+
+ /* some easy cleanup (we don't try real hard though) */
+ list_free(qualsofar);
+ }
+
+ if (list_length(bestpaths) == 1)
+ return (Path *) linitial(bestpaths); /* no need for AND */
+ return (Path *) create_bitmap_and_path(root, rel, bestpaths);
+}
+
+/* qsort comparator to sort in increasing index access cost order */
+static int
+path_usage_comparator(const void *a, const void *b)
+{
+ PathClauseUsage *pa = *(PathClauseUsage *const *) a;
+ PathClauseUsage *pb = *(PathClauseUsage *const *) b;
+ Cost acost;
+ Cost bcost;
+ Selectivity aselec;
+ Selectivity bselec;
+
+ cost_bitmap_tree_node(pa->path, &acost, &aselec);
+ cost_bitmap_tree_node(pb->path, &bcost, &bselec);
+
+ /*
+ * If costs are the same, sort by selectivity.
+ */
+ if (acost < bcost)
+ return -1;
+ if (acost > bcost)
+ return 1;
+
+ if (aselec < bselec)
+ return -1;
+ if (aselec > bselec)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Estimate the cost of actually executing a bitmap scan with a single
+ * index path (which could be a BitmapAnd or BitmapOr node).
+ */
+static Cost
+bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, Path *ipath)
+{
+ BitmapHeapPath bpath;
+
+ /* Set up a dummy BitmapHeapPath */
+ bpath.path.type = T_BitmapHeapPath;
+ bpath.path.pathtype = T_BitmapHeapScan;
+ bpath.path.parent = rel;
+ bpath.path.pathtarget = rel->reltarget;
+ bpath.path.param_info = ipath->param_info;
+ bpath.path.pathkeys = NIL;
+ bpath.bitmapqual = ipath;
+
+ /*
+ * Check the cost of temporary path without considering parallelism.
+ * Parallel bitmap heap path will be considered at later stage.
+ */
+ bpath.path.parallel_workers = 0;
+
+ /* Now we can do cost_bitmap_heap_scan */
+ cost_bitmap_heap_scan(&bpath.path, root, rel,
+ bpath.path.param_info,
+ ipath,
+ get_loop_count(root, rel->relid,
+ PATH_REQ_OUTER(ipath)));
+
+ return bpath.path.total_cost;
+}
+
+/*
+ * Estimate the cost of actually executing a BitmapAnd scan with the given
+ * inputs.
+ */
+static Cost
+bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths)
+{
+ BitmapAndPath *apath;
+
+ /*
+ * Might as well build a real BitmapAndPath here, as the work is slightly
+ * too complicated to be worth repeating just to save one palloc.
+ */
+ apath = create_bitmap_and_path(root, rel, paths);
+
+ return bitmap_scan_cost_est(root, rel, (Path *) apath);
+}
+
+
+/*
+ * classify_index_clause_usage
+ * Construct a PathClauseUsage struct describing the WHERE clauses and
+ * index predicate clauses used by the given indexscan path.
+ * We consider two clauses the same if they are equal().
+ *
+ * At some point we might want to migrate this info into the Path data
+ * structure proper, but for the moment it's only needed within
+ * choose_bitmap_and().
+ *
+ * *clauselist is used and expanded as needed to identify all the distinct
+ * clauses seen across successive calls. Caller must initialize it to NIL
+ * before first call of a set.
+ */
+static PathClauseUsage *
+classify_index_clause_usage(Path *path, List **clauselist)
+{
+ PathClauseUsage *result;
+ Bitmapset *clauseids;
+ ListCell *lc;
+
+ result = (PathClauseUsage *) palloc(sizeof(PathClauseUsage));
+ result->path = path;
+
+ /* Recursively find the quals and preds used by the path */
+ result->quals = NIL;
+ result->preds = NIL;
+ find_indexpath_quals(path, &result->quals, &result->preds);
+
+ /*
+ * Some machine-generated queries have outlandish numbers of qual clauses.
+ * To avoid getting into O(N^2) behavior even in this preliminary
+ * classification step, we want to limit the number of entries we can
+ * accumulate in *clauselist. Treat any path with more than 100 quals +
+ * preds as unclassifiable, which will cause calling code to consider it
+ * distinct from all other paths.
+ */
+ if (list_length(result->quals) + list_length(result->preds) > 100)
+ {
+ result->clauseids = NULL;
+ result->unclassifiable = true;
+ return result;
+ }
+
+ /* Build up a bitmapset representing the quals and preds */
+ clauseids = NULL;
+ foreach(lc, result->quals)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ clauseids = bms_add_member(clauseids,
+ find_list_position(node, clauselist));
+ }
+ foreach(lc, result->preds)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ clauseids = bms_add_member(clauseids,
+ find_list_position(node, clauselist));
+ }
+ result->clauseids = clauseids;
+ result->unclassifiable = false;
+
+ return result;
+}
+
+
+/*
+ * find_indexpath_quals
+ *
+ * Given the Path structure for a plain or bitmap indexscan, extract lists
+ * of all the index clauses and index predicate conditions used in the Path.
+ * These are appended to the initial contents of *quals and *preds (hence
+ * caller should initialize those to NIL).
+ *
+ * Note we are not trying to produce an accurate representation of the AND/OR
+ * semantics of the Path, but just find out all the base conditions used.
+ *
+ * The result lists contain pointers to the expressions used in the Path,
+ * but all the list cells are freshly built, so it's safe to destructively
+ * modify the lists (eg, by concat'ing with other lists).
+ */
+static void
+find_indexpath_quals(Path *bitmapqual, List **quals, List **preds)
+{
+ if (IsA(bitmapqual, BitmapAndPath))
+ {
+ BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;
+ ListCell *l;
+
+ foreach(l, apath->bitmapquals)
+ {
+ find_indexpath_quals((Path *) lfirst(l), quals, preds);
+ }
+ }
+ else if (IsA(bitmapqual, BitmapOrPath))
+ {
+ BitmapOrPath *opath = (BitmapOrPath *) bitmapqual;
+ ListCell *l;
+
+ foreach(l, opath->bitmapquals)
+ {
+ find_indexpath_quals((Path *) lfirst(l), quals, preds);
+ }
+ }
+ else if (IsA(bitmapqual, IndexPath))
+ {
+ IndexPath *ipath = (IndexPath *) bitmapqual;
+ ListCell *l;
+
+ foreach(l, ipath->indexclauses)
+ {
+ IndexClause *iclause = (IndexClause *) lfirst(l);
+
+ *quals = lappend(*quals, iclause->rinfo->clause);
+ }
+ *preds = list_concat(*preds, ipath->indexinfo->indpred);
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
+}
+
+
+/*
+ * find_list_position
+ * Return the given node's position (counting from 0) in the given
+ * list of nodes. If it's not equal() to any existing list member,
+ * add it at the end, and return that position.
+ */
+static int
+find_list_position(Node *node, List **nodelist)
+{
+ int i;
+ ListCell *lc;
+
+ i = 0;
+ foreach(lc, *nodelist)
+ {
+ Node *oldnode = (Node *) lfirst(lc);
+
+ if (equal(node, oldnode))
+ return i;
+ i++;
+ }
+
+ *nodelist = lappend(*nodelist, node);
+
+ return i;
+}
+
+
+/*
+ * check_index_only
+ * Determine whether an index-only scan is possible for this index.
+ */
+static bool
+check_index_only(RelOptInfo *rel, IndexOptInfo *index)
+{
+ bool result;
+ Bitmapset *attrs_used = NULL;
+ Bitmapset *index_canreturn_attrs = NULL;
+ ListCell *lc;
+ int i;
+
+ /* Index-only scans must be enabled */
+ if (!enable_indexonlyscan)
+ return false;
+
+ /*
+ * Check that all needed attributes of the relation are available from the
+ * index.
+ */
+
+ /*
+ * First, identify all the attributes needed for joins or final output.
+ * Note: we must look at rel's targetlist, not the attr_needed data,
+ * because attr_needed isn't computed for inheritance child rels.
+ */
+ pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &attrs_used);
+
+ /*
+ * Add all the attributes used by restriction clauses; but consider only
+ * those clauses not implied by the index predicate, since ones that are
+ * so implied don't need to be checked explicitly in the plan.
+ *
+ * Note: attributes used only in index quals would not be needed at
+ * runtime either, if we are certain that the index is not lossy. However
+ * it'd be complicated to account for that accurately, and it doesn't
+ * matter in most cases, since we'd conclude that such attributes are
+ * available from the index anyway.
+ */
+ foreach(lc, index->indrestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
+ }
+
+ /*
+ * Construct a bitmapset of columns that the index can return back in an
+ * index-only scan.
+ */
+ for (i = 0; i < index->ncolumns; i++)
+ {
+ int attno = index->indexkeys[i];
+
+ /*
+ * For the moment, we just ignore index expressions. It might be nice
+ * to do something with them, later.
+ */
+ if (attno == 0)
+ continue;
+
+ if (index->canreturn[i])
+ index_canreturn_attrs =
+ bms_add_member(index_canreturn_attrs,
+ attno - FirstLowInvalidHeapAttributeNumber);
+ }
+
+ /* Do we have all the necessary attributes? */
+ result = bms_is_subset(attrs_used, index_canreturn_attrs);
+
+ bms_free(attrs_used);
+ bms_free(index_canreturn_attrs);
+
+ return result;
+}
+
+/*
+ * get_loop_count
+ * Choose the loop count estimate to use for costing a parameterized path
+ * with the given set of outer relids.
+ *
+ * Since we produce parameterized paths before we've begun to generate join
+ * relations, it's impossible to predict exactly how many times a parameterized
+ * path will be iterated; we don't know the size of the relation that will be
+ * on the outside of the nestloop. However, we should try to account for
+ * multiple iterations somehow in costing the path. The heuristic embodied
+ * here is to use the rowcount of the smallest other base relation needed in
+ * the join clauses used by the path. (We could alternatively consider the
+ * largest one, but that seems too optimistic.) This is of course the right
+ * answer for single-other-relation cases, and it seems like a reasonable
+ * zero-order approximation for multiway-join cases.
+ *
+ * In addition, we check to see if the other side of each join clause is on
+ * the inside of some semijoin that the current relation is on the outside of.
+ * If so, the only way that a parameterized path could be used is if the
+ * semijoin RHS has been unique-ified, so we should use the number of unique
+ * RHS rows rather than using the relation's raw rowcount.
+ *
+ * Note: for this to work, allpaths.c must establish all baserel size
+ * estimates before it begins to compute paths, or at least before it
+ * calls create_index_paths().
+ */
+static double
+get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids)
+{
+ double result;
+ int outer_relid;
+
+ /* For a non-parameterized path, just return 1.0 quickly */
+ if (outer_relids == NULL)
+ return 1.0;
+
+ result = 0.0;
+ outer_relid = -1;
+ while ((outer_relid = bms_next_member(outer_relids, outer_relid)) >= 0)
+ {
+ RelOptInfo *outer_rel;
+ double rowcount;
+
+ /* Paranoia: ignore bogus relid indexes */
+ if (outer_relid >= root->simple_rel_array_size)
+ continue;
+ outer_rel = root->simple_rel_array[outer_relid];
+ if (outer_rel == NULL)
+ continue;
+ Assert(outer_rel->relid == outer_relid); /* sanity check on array */
+
+ /* Other relation could be proven empty, if so ignore */
+ if (IS_DUMMY_REL(outer_rel))
+ continue;
+
+ /* Otherwise, rel's rows estimate should be valid by now */
+ Assert(outer_rel->rows > 0);
+
+ /* Check to see if rel is on the inside of any semijoins */
+ rowcount = adjust_rowcount_for_semijoins(root,
+ cur_relid,
+ outer_relid,
+ outer_rel->rows);
+
+ /* Remember smallest row count estimate among the outer rels */
+ if (result == 0.0 || result > rowcount)
+ result = rowcount;
+ }
+ /* Return 1.0 if we found no valid relations (shouldn't happen) */
+ return (result > 0.0) ? result : 1.0;
+}
+
+/*
+ * Check to see if outer_relid is on the inside of any semijoin that cur_relid
+ * is on the outside of. If so, replace rowcount with the estimated number of
+ * unique rows from the semijoin RHS (assuming that's smaller, which it might
+ * not be). The estimate is crude but it's the best we can do at this stage
+ * of the proceedings.
+ */
+static double
+adjust_rowcount_for_semijoins(PlannerInfo *root,
+ Index cur_relid,
+ Index outer_relid,
+ double rowcount)
+{
+ ListCell *lc;
+
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+ if (sjinfo->jointype == JOIN_SEMI &&
+ bms_is_member(cur_relid, sjinfo->syn_lefthand) &&
+ bms_is_member(outer_relid, sjinfo->syn_righthand))
+ {
+ /* Estimate number of unique-ified rows */
+ double nraw;
+ double nunique;
+
+ nraw = approximate_joinrel_size(root, sjinfo->syn_righthand);
+ nunique = estimate_num_groups(root,
+ sjinfo->semi_rhs_exprs,
+ nraw,
+ NULL,
+ NULL);
+ if (rowcount > nunique)
+ rowcount = nunique;
+ }
+ }
+ return rowcount;
+}
+
+/*
+ * Make an approximate estimate of the size of a joinrel.
+ *
+ * We don't have enough info at this point to get a good estimate, so we
+ * just multiply the base relation sizes together. Fortunately, this is
+ * the right answer anyway for the most common case with a single relation
+ * on the RHS of a semijoin. Also, estimate_num_groups() has only a weak
+ * dependency on its input_rows argument (it basically uses it as a clamp).
+ * So we might be able to get a fairly decent end result even with a severe
+ * overestimate of the RHS's raw size.
+ */
+static double
+approximate_joinrel_size(PlannerInfo *root, Relids relids)
+{
+ double rowcount = 1.0;
+ int relid;
+
+ relid = -1;
+ while ((relid = bms_next_member(relids, relid)) >= 0)
+ {
+ RelOptInfo *rel;
+
+ /* Paranoia: ignore bogus relid indexes */
+ if (relid >= root->simple_rel_array_size)
+ continue;
+ rel = root->simple_rel_array[relid];
+ if (rel == NULL)
+ continue;
+ Assert(rel->relid == relid); /* sanity check on array */
+
+ /* Relation could be proven empty, if so ignore */
+ if (IS_DUMMY_REL(rel))
+ continue;
+
+ /* Otherwise, rel's rows estimate should be valid by now */
+ Assert(rel->rows > 0);
+
+ /* Accumulate product */
+ rowcount *= rel->rows;
+ }
+ return rowcount;
+}
+
+
+/****************************************************************************
+ * ---- ROUTINES TO CHECK QUERY CLAUSES ----
+ ****************************************************************************/
+
+/*
+ * match_restriction_clauses_to_index
+ * Identify restriction clauses for the rel that match the index.
+ * Matching clauses are added to *clauseset.
+ */
+static void
+match_restriction_clauses_to_index(PlannerInfo *root,
+ IndexOptInfo *index,
+ IndexClauseSet *clauseset)
+{
+ /* We can ignore clauses that are implied by the index predicate */
+ match_clauses_to_index(root, index->indrestrictinfo, index, clauseset);
+}
+
+/*
+ * match_join_clauses_to_index
+ * Identify join clauses for the rel that match the index.
+ * Matching clauses are added to *clauseset.
+ * Also, add any potentially usable join OR clauses to *joinorclauses.
+ */
+static void
+match_join_clauses_to_index(PlannerInfo *root,
+ RelOptInfo *rel, IndexOptInfo *index,
+ IndexClauseSet *clauseset,
+ List **joinorclauses)
+{
+ ListCell *lc;
+
+ /* Scan the rel's join clauses */
+ foreach(lc, rel->joininfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ /* Check if clause can be moved to this rel */
+ if (!join_clause_is_movable_to(rinfo, rel))
+ continue;
+
+ /* Potentially usable, so see if it matches the index or is an OR */
+ if (restriction_is_or_clause(rinfo))
+ *joinorclauses = lappend(*joinorclauses, rinfo);
+ else
+ match_clause_to_index(root, rinfo, index, clauseset);
+ }
+}
+
+/*
+ * match_eclass_clauses_to_index
+ * Identify EquivalenceClass join clauses for the rel that match the index.
+ * Matching clauses are added to *clauseset.
+ */
+static void
+match_eclass_clauses_to_index(PlannerInfo *root, IndexOptInfo *index,
+ IndexClauseSet *clauseset)
+{
+ int indexcol;
+
+ /* No work if rel is not in any such ECs */
+ if (!index->rel->has_eclass_joins)
+ return;
+
+ for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++)
+ {
+ ec_member_matches_arg arg;
+ List *clauses;
+
+ /* Generate clauses, skipping any that join to lateral_referencers */
+ arg.index = index;
+ arg.indexcol = indexcol;
+ clauses = generate_implied_equalities_for_column(root,
+ index->rel,
+ ec_member_matches_indexcol,
+ (void *) &arg,
+ index->rel->lateral_referencers);
+
+ /*
+ * We have to check whether the results actually do match the index,
+ * since for non-btree indexes the EC's equality operators might not
+ * be in the index opclass (cf ec_member_matches_indexcol).
+ */
+ match_clauses_to_index(root, clauses, index, clauseset);
+ }
+}
+
+/*
+ * match_clauses_to_index
+ * Perform match_clause_to_index() for each clause in a list.
+ * Matching clauses are added to *clauseset.
+ */
+static void
+match_clauses_to_index(PlannerInfo *root,
+ List *clauses,
+ IndexOptInfo *index,
+ IndexClauseSet *clauseset)
+{
+ ListCell *lc;
+
+ foreach(lc, clauses)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+
+ match_clause_to_index(root, rinfo, index, clauseset);
+ }
+}
+
+/*
+ * match_clause_to_index
+ * Test whether a qual clause can be used with an index.
+ *
+ * If the clause is usable, add an IndexClause entry for it to the appropriate
+ * list in *clauseset. (*clauseset must be initialized to zeroes before first
+ * call.)
+ *
+ * Note: in some circumstances we may find the same RestrictInfos coming from
+ * multiple places. Defend against redundant outputs by refusing to add a
+ * clause twice (pointer equality should be a good enough check for this).
+ *
+ * Note: it's possible that a badly-defined index could have multiple matching
+ * columns. We always select the first match if so; this avoids scenarios
+ * wherein we get an inflated idea of the index's selectivity by using the
+ * same clause multiple times with different index columns.
+ */
+static void
+match_clause_to_index(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ IndexOptInfo *index,
+ IndexClauseSet *clauseset)
+{
+ int indexcol;
+
+ /*
+ * Never match pseudoconstants to indexes. (Normally a match could not
+ * happen anyway, since a pseudoconstant clause couldn't contain a Var,
+ * but what if someone builds an expression index on a constant? It's not
+ * totally unreasonable to do so with a partial index, either.)
+ */
+ if (rinfo->pseudoconstant)
+ return;
+
+ /*
+ * If clause can't be used as an indexqual because it must wait till after
+ * some lower-security-level restriction clause, reject it.
+ */
+ if (!restriction_is_securely_promotable(rinfo, index->rel))
+ return;
+
+ /* OK, check each index key column for a match */
+ for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++)
+ {
+ IndexClause *iclause;
+ ListCell *lc;
+
+ /* Ignore duplicates */
+ foreach(lc, clauseset->indexclauses[indexcol])
+ {
+ IndexClause *iclause = (IndexClause *) lfirst(lc);
+
+ if (iclause->rinfo == rinfo)
+ return;
+ }
+
+ /* OK, try to match the clause to the index column */
+ iclause = match_clause_to_indexcol(root,
+ rinfo,
+ indexcol,
+ index);
+ if (iclause)
+ {
+ /* Success, so record it */
+ clauseset->indexclauses[indexcol] =
+ lappend(clauseset->indexclauses[indexcol], iclause);
+ clauseset->nonempty = true;
+ return;
+ }
+ }
+}
+
+/*
+ * match_clause_to_indexcol()
+ * Determine whether a restriction clause matches a column of an index,
+ * and if so, build an IndexClause node describing the details.
+ *
+ * To match an index normally, an operator clause:
+ *
+ * (1) must be in the form (indexkey op const) or (const op indexkey);
+ * and
+ * (2) must contain an operator which is in the index's operator family
+ * for this column; and
+ * (3) must match the collation of the index, if collation is relevant.
+ *
+ * Our definition of "const" is exceedingly liberal: we allow anything that
+ * doesn't involve a volatile function or a Var of the index's relation.
+ * In particular, Vars belonging to other relations of the query are
+ * accepted here, since a clause of that form can be used in a
+ * parameterized indexscan. It's the responsibility of higher code levels
+ * to manage restriction and join clauses appropriately.
+ *
+ * Note: we do need to check for Vars of the index's relation on the
+ * "const" side of the clause, since clauses like (a.f1 OP (b.f2 OP a.f3))
+ * are not processable by a parameterized indexscan on a.f1, whereas
+ * something like (a.f1 OP (b.f2 OP c.f3)) is.
+ *
+ * Presently, the executor can only deal with indexquals that have the
+ * indexkey on the left, so we can only use clauses that have the indexkey
+ * on the right if we can commute the clause to put the key on the left.
+ * We handle that by generating an IndexClause with the correctly-commuted
+ * opclause as a derived indexqual.
+ *
+ * If the index has a collation, the clause must have the same collation.
+ * For collation-less indexes, we assume it doesn't matter; this is
+ * necessary for cases like "hstore ? text", wherein hstore's operators
+ * don't care about collation but the clause will get marked with a
+ * collation anyway because of the text argument. (This logic is
+ * embodied in the macro IndexCollMatchesExprColl.)
+ *
+ * It is also possible to match RowCompareExpr clauses to indexes (but
+ * currently, only btree indexes handle this).
+ *
+ * It is also possible to match ScalarArrayOpExpr clauses to indexes, when
+ * the clause is of the form "indexkey op ANY (arrayconst)".
+ *
+ * For boolean indexes, it is also possible to match the clause directly
+ * to the indexkey; or perhaps the clause is (NOT indexkey).
+ *
+ * And, last but not least, some operators and functions can be processed
+ * to derive (typically lossy) indexquals from a clause that isn't in
+ * itself indexable. If we see that any operand of an OpExpr or FuncExpr
+ * matches the index key, and the function has a planner support function
+ * attached to it, we'll invoke the support function to see if such an
+ * indexqual can be built.
+ *
+ * 'rinfo' is the clause to be tested (as a RestrictInfo node).
+ * 'indexcol' is a column number of 'index' (counting from 0).
+ * 'index' is the index of interest.
+ *
+ * Returns an IndexClause if the clause can be used with this index key,
+ * or NULL if not.
+ *
+ * NOTE: returns NULL if clause is an OR or AND clause; it is the
+ * responsibility of higher-level routines to cope with those.
+ */
+static IndexClause *
+match_clause_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index)
+{
+ IndexClause *iclause;
+ Expr *clause = rinfo->clause;
+ Oid opfamily;
+
+ Assert(indexcol < index->nkeycolumns);
+
+ /*
+ * Historically this code has coped with NULL clauses. That's probably
+ * not possible anymore, but we might as well continue to cope.
+ */
+ if (clause == NULL)
+ return NULL;
+
+ /* First check for boolean-index cases. */
+ opfamily = index->opfamily[indexcol];
+ if (IsBooleanOpfamily(opfamily))
+ {
+ iclause = match_boolean_index_clause(root, rinfo, indexcol, index);
+ if (iclause)
+ return iclause;
+ }
+
+ /*
+ * Clause must be an opclause, funcclause, ScalarArrayOpExpr, or
+ * RowCompareExpr. Or, if the index supports it, we can handle IS
+ * NULL/NOT NULL clauses.
+ */
+ if (IsA(clause, OpExpr))
+ {
+ return match_opclause_to_indexcol(root, rinfo, indexcol, index);
+ }
+ else if (IsA(clause, FuncExpr))
+ {
+ return match_funcclause_to_indexcol(root, rinfo, indexcol, index);
+ }
+ else if (IsA(clause, ScalarArrayOpExpr))
+ {
+ return match_saopclause_to_indexcol(root, rinfo, indexcol, index);
+ }
+ else if (IsA(clause, RowCompareExpr))
+ {
+ return match_rowcompare_to_indexcol(root, rinfo, indexcol, index);
+ }
+ else if (index->amsearchnulls && IsA(clause, NullTest))
+ {
+ NullTest *nt = (NullTest *) clause;
+
+ if (!nt->argisrow &&
+ match_index_to_operand((Node *) nt->arg, indexcol, index))
+ {
+ iclause = makeNode(IndexClause);
+ iclause->rinfo = rinfo;
+ iclause->indexquals = list_make1(rinfo);
+ iclause->lossy = false;
+ iclause->indexcol = indexcol;
+ iclause->indexcols = NIL;
+ return iclause;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * match_boolean_index_clause
+ * Recognize restriction clauses that can be matched to a boolean index.
+ *
+ * The idea here is that, for an index on a boolean column that supports the
+ * BooleanEqualOperator, we can transform a plain reference to the indexkey
+ * into "indexkey = true", or "NOT indexkey" into "indexkey = false", etc,
+ * so as to make the expression indexable using the index's "=" operator.
+ * Since Postgres 8.1, we must do this because constant simplification does
+ * the reverse transformation; without this code there'd be no way to use
+ * such an index at all.
+ *
+ * This should be called only when IsBooleanOpfamily() recognizes the
+ * index's operator family. We check to see if the clause matches the
+ * index's key, and if so, build a suitable IndexClause.
+ */
+static IndexClause *
+match_boolean_index_clause(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index)
+{
+ Node *clause = (Node *) rinfo->clause;
+ Expr *op = NULL;
+
+ /* Direct match? */
+ if (match_index_to_operand(clause, indexcol, index))
+ {
+ /* convert to indexkey = TRUE */
+ op = make_opclause(BooleanEqualOperator, BOOLOID, false,
+ (Expr *) clause,
+ (Expr *) makeBoolConst(true, false),
+ InvalidOid, InvalidOid);
+ }
+ /* NOT clause? */
+ else if (is_notclause(clause))
+ {
+ Node *arg = (Node *) get_notclausearg((Expr *) clause);
+
+ if (match_index_to_operand(arg, indexcol, index))
+ {
+ /* convert to indexkey = FALSE */
+ op = make_opclause(BooleanEqualOperator, BOOLOID, false,
+ (Expr *) arg,
+ (Expr *) makeBoolConst(false, false),
+ InvalidOid, InvalidOid);
+ }
+ }
+
+ /*
+ * Since we only consider clauses at top level of WHERE, we can convert
+ * indexkey IS TRUE and indexkey IS FALSE to index searches as well. The
+ * different meaning for NULL isn't important.
+ */
+ else if (clause && IsA(clause, BooleanTest))
+ {
+ BooleanTest *btest = (BooleanTest *) clause;
+ Node *arg = (Node *) btest->arg;
+
+ if (btest->booltesttype == IS_TRUE &&
+ match_index_to_operand(arg, indexcol, index))
+ {
+ /* convert to indexkey = TRUE */
+ op = make_opclause(BooleanEqualOperator, BOOLOID, false,
+ (Expr *) arg,
+ (Expr *) makeBoolConst(true, false),
+ InvalidOid, InvalidOid);
+ }
+ else if (btest->booltesttype == IS_FALSE &&
+ match_index_to_operand(arg, indexcol, index))
+ {
+ /* convert to indexkey = FALSE */
+ op = make_opclause(BooleanEqualOperator, BOOLOID, false,
+ (Expr *) arg,
+ (Expr *) makeBoolConst(false, false),
+ InvalidOid, InvalidOid);
+ }
+ }
+
+ /*
+ * If we successfully made an operator clause from the given qual, we must
+ * wrap it in an IndexClause. It's not lossy.
+ */
+ if (op)
+ {
+ IndexClause *iclause = makeNode(IndexClause);
+
+ iclause->rinfo = rinfo;
+ iclause->indexquals = list_make1(make_simple_restrictinfo(root, op));
+ iclause->lossy = false;
+ iclause->indexcol = indexcol;
+ iclause->indexcols = NIL;
+ return iclause;
+ }
+
+ return NULL;
+}
+
+/*
+ * match_opclause_to_indexcol()
+ * Handles the OpExpr case for match_clause_to_indexcol(),
+ * which see for comments.
+ */
+static IndexClause *
+match_opclause_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index)
+{
+ IndexClause *iclause;
+ OpExpr *clause = (OpExpr *) rinfo->clause;
+ Node *leftop,
+ *rightop;
+ Oid expr_op;
+ Oid expr_coll;
+ Index index_relid;
+ Oid opfamily;
+ Oid idxcollation;
+
+ /*
+ * Only binary operators need apply. (In theory, a planner support
+ * function could do something with a unary operator, but it seems
+ * unlikely to be worth the cycles to check.)
+ */
+ if (list_length(clause->args) != 2)
+ return NULL;
+
+ leftop = (Node *) linitial(clause->args);
+ rightop = (Node *) lsecond(clause->args);
+ expr_op = clause->opno;
+ expr_coll = clause->inputcollid;
+
+ index_relid = index->rel->relid;
+ opfamily = index->opfamily[indexcol];
+ idxcollation = index->indexcollations[indexcol];
+
+ /*
+ * Check for clauses of the form: (indexkey operator constant) or
+ * (constant operator indexkey). See match_clause_to_indexcol's notes
+ * about const-ness.
+ *
+ * Note that we don't ask the support function about clauses that don't
+ * have one of these forms. Again, in principle it might be possible to
+ * do something, but it seems unlikely to be worth the cycles to check.
+ */
+ if (match_index_to_operand(leftop, indexcol, index) &&
+ !bms_is_member(index_relid, rinfo->right_relids) &&
+ !contain_volatile_functions(rightop))
+ {
+ if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
+ op_in_opfamily(expr_op, opfamily))
+ {
+ iclause = makeNode(IndexClause);
+ iclause->rinfo = rinfo;
+ iclause->indexquals = list_make1(rinfo);
+ iclause->lossy = false;
+ iclause->indexcol = indexcol;
+ iclause->indexcols = NIL;
+ return iclause;
+ }
+
+ /*
+ * If we didn't find a member of the index's opfamily, try the support
+ * function for the operator's underlying function.
+ */
+ set_opfuncid(clause); /* make sure we have opfuncid */
+ return get_index_clause_from_support(root,
+ rinfo,
+ clause->opfuncid,
+ 0, /* indexarg on left */
+ indexcol,
+ index);
+ }
+
+ if (match_index_to_operand(rightop, indexcol, index) &&
+ !bms_is_member(index_relid, rinfo->left_relids) &&
+ !contain_volatile_functions(leftop))
+ {
+ if (IndexCollMatchesExprColl(idxcollation, expr_coll))
+ {
+ Oid comm_op = get_commutator(expr_op);
+
+ if (OidIsValid(comm_op) &&
+ op_in_opfamily(comm_op, opfamily))
+ {
+ RestrictInfo *commrinfo;
+
+ /* Build a commuted OpExpr and RestrictInfo */
+ commrinfo = commute_restrictinfo(rinfo, comm_op);
+
+ /* Make an IndexClause showing that as a derived qual */
+ iclause = makeNode(IndexClause);
+ iclause->rinfo = rinfo;
+ iclause->indexquals = list_make1(commrinfo);
+ iclause->lossy = false;
+ iclause->indexcol = indexcol;
+ iclause->indexcols = NIL;
+ return iclause;
+ }
+ }
+
+ /*
+ * If we didn't find a member of the index's opfamily, try the support
+ * function for the operator's underlying function.
+ */
+ set_opfuncid(clause); /* make sure we have opfuncid */
+ return get_index_clause_from_support(root,
+ rinfo,
+ clause->opfuncid,
+ 1, /* indexarg on right */
+ indexcol,
+ index);
+ }
+
+ return NULL;
+}
+
+/*
+ * match_funcclause_to_indexcol()
+ * Handles the FuncExpr case for match_clause_to_indexcol(),
+ * which see for comments.
+ */
+static IndexClause *
+match_funcclause_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index)
+{
+ FuncExpr *clause = (FuncExpr *) rinfo->clause;
+ int indexarg;
+ ListCell *lc;
+
+ /*
+ * We have no built-in intelligence about function clauses, but if there's
+ * a planner support function, it might be able to do something. But, to
+ * cut down on wasted planning cycles, only call the support function if
+ * at least one argument matches the target index column.
+ *
+ * Note that we don't insist on the other arguments being pseudoconstants;
+ * the support function has to check that. This is to allow cases where
+ * only some of the other arguments need to be included in the indexqual.
+ */
+ indexarg = 0;
+ foreach(lc, clause->args)
+ {
+ Node *op = (Node *) lfirst(lc);
+
+ if (match_index_to_operand(op, indexcol, index))
+ {
+ return get_index_clause_from_support(root,
+ rinfo,
+ clause->funcid,
+ indexarg,
+ indexcol,
+ index);
+ }
+
+ indexarg++;
+ }
+
+ return NULL;
+}
+
+/*
+ * get_index_clause_from_support()
+ * If the function has a planner support function, try to construct
+ * an IndexClause using indexquals created by the support function.
+ */
+static IndexClause *
+get_index_clause_from_support(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ Oid funcid,
+ int indexarg,
+ int indexcol,
+ IndexOptInfo *index)
+{
+ Oid prosupport = get_func_support(funcid);
+ SupportRequestIndexCondition req;
+ List *sresult;
+
+ if (!OidIsValid(prosupport))
+ return NULL;
+
+ req.type = T_SupportRequestIndexCondition;
+ req.root = root;
+ req.funcid = funcid;
+ req.node = (Node *) rinfo->clause;
+ req.indexarg = indexarg;
+ req.index = index;
+ req.indexcol = indexcol;
+ req.opfamily = index->opfamily[indexcol];
+ req.indexcollation = index->indexcollations[indexcol];
+
+ req.lossy = true; /* default assumption */
+
+ sresult = (List *)
+ DatumGetPointer(OidFunctionCall1(prosupport,
+ PointerGetDatum(&req)));
+
+ if (sresult != NIL)
+ {
+ IndexClause *iclause = makeNode(IndexClause);
+ List *indexquals = NIL;
+ ListCell *lc;
+
+ /*
+ * The support function API says it should just give back bare
+ * clauses, so here we must wrap each one in a RestrictInfo.
+ */
+ foreach(lc, sresult)
+ {
+ Expr *clause = (Expr *) lfirst(lc);
+
+ indexquals = lappend(indexquals,
+ make_simple_restrictinfo(root, clause));
+ }
+
+ iclause->rinfo = rinfo;
+ iclause->indexquals = indexquals;
+ iclause->lossy = req.lossy;
+ iclause->indexcol = indexcol;
+ iclause->indexcols = NIL;
+
+ return iclause;
+ }
+
+ return NULL;
+}
+
+/*
+ * match_saopclause_to_indexcol()
+ * Handles the ScalarArrayOpExpr case for match_clause_to_indexcol(),
+ * which see for comments.
+ */
+static IndexClause *
+match_saopclause_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index)
+{
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
+ Node *leftop,
+ *rightop;
+ Relids right_relids;
+ Oid expr_op;
+ Oid expr_coll;
+ Index index_relid;
+ Oid opfamily;
+ Oid idxcollation;
+
+ /* We only accept ANY clauses, not ALL */
+ if (!saop->useOr)
+ return NULL;
+ leftop = (Node *) linitial(saop->args);
+ rightop = (Node *) lsecond(saop->args);
+ right_relids = pull_varnos(root, rightop);
+ expr_op = saop->opno;
+ expr_coll = saop->inputcollid;
+
+ index_relid = index->rel->relid;
+ opfamily = index->opfamily[indexcol];
+ idxcollation = index->indexcollations[indexcol];
+
+ /*
+ * We must have indexkey on the left and a pseudo-constant array argument.
+ */
+ if (match_index_to_operand(leftop, indexcol, index) &&
+ !bms_is_member(index_relid, right_relids) &&
+ !contain_volatile_functions(rightop))
+ {
+ if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
+ op_in_opfamily(expr_op, opfamily))
+ {
+ IndexClause *iclause = makeNode(IndexClause);
+
+ iclause->rinfo = rinfo;
+ iclause->indexquals = list_make1(rinfo);
+ iclause->lossy = false;
+ iclause->indexcol = indexcol;
+ iclause->indexcols = NIL;
+ return iclause;
+ }
+
+ /*
+ * We do not currently ask support functions about ScalarArrayOpExprs,
+ * though in principle we could.
+ */
+ }
+
+ return NULL;
+}
+
+/*
+ * match_rowcompare_to_indexcol()
+ * Handles the RowCompareExpr case for match_clause_to_indexcol(),
+ * which see for comments.
+ *
+ * In this routine we check whether the first column of the row comparison
+ * matches the target index column. This is sufficient to guarantee that some
+ * index condition can be constructed from the RowCompareExpr --- the rest
+ * is handled by expand_indexqual_rowcompare().
+ */
+static IndexClause *
+match_rowcompare_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index)
+{
+ RowCompareExpr *clause = (RowCompareExpr *) rinfo->clause;
+ Index index_relid;
+ Oid opfamily;
+ Oid idxcollation;
+ Node *leftop,
+ *rightop;
+ bool var_on_left;
+ Oid expr_op;
+ Oid expr_coll;
+
+ /* Forget it if we're not dealing with a btree index */
+ if (index->relam != BTREE_AM_OID)
+ return NULL;
+
+ index_relid = index->rel->relid;
+ opfamily = index->opfamily[indexcol];
+ idxcollation = index->indexcollations[indexcol];
+
+ /*
+ * We could do the matching on the basis of insisting that the opfamily
+ * shown in the RowCompareExpr be the same as the index column's opfamily,
+ * but that could fail in the presence of reverse-sort opfamilies: it'd be
+ * a matter of chance whether RowCompareExpr had picked the forward or
+ * reverse-sort family. So look only at the operator, and match if it is
+ * a member of the index's opfamily (after commutation, if the indexkey is
+ * on the right). We'll worry later about whether any additional
+ * operators are matchable to the index.
+ */
+ leftop = (Node *) linitial(clause->largs);
+ rightop = (Node *) linitial(clause->rargs);
+ expr_op = linitial_oid(clause->opnos);
+ expr_coll = linitial_oid(clause->inputcollids);
+
+ /* Collations must match, if relevant */
+ if (!IndexCollMatchesExprColl(idxcollation, expr_coll))
+ return NULL;
+
+ /*
+ * These syntactic tests are the same as in match_opclause_to_indexcol()
+ */
+ if (match_index_to_operand(leftop, indexcol, index) &&
+ !bms_is_member(index_relid, pull_varnos(root, rightop)) &&
+ !contain_volatile_functions(rightop))
+ {
+ /* OK, indexkey is on left */
+ var_on_left = true;
+ }
+ else if (match_index_to_operand(rightop, indexcol, index) &&
+ !bms_is_member(index_relid, pull_varnos(root, leftop)) &&
+ !contain_volatile_functions(leftop))
+ {
+ /* indexkey is on right, so commute the operator */
+ expr_op = get_commutator(expr_op);
+ if (expr_op == InvalidOid)
+ return NULL;
+ var_on_left = false;
+ }
+ else
+ return NULL;
+
+ /* We're good if the operator is the right type of opfamily member */
+ switch (get_op_opfamily_strategy(expr_op, opfamily))
+ {
+ case BTLessStrategyNumber:
+ case BTLessEqualStrategyNumber:
+ case BTGreaterEqualStrategyNumber:
+ case BTGreaterStrategyNumber:
+ return expand_indexqual_rowcompare(root,
+ rinfo,
+ indexcol,
+ index,
+ expr_op,
+ var_on_left);
+ }
+
+ return NULL;
+}
+
+/*
+ * expand_indexqual_rowcompare --- expand a single indexqual condition
+ * that is a RowCompareExpr
+ *
+ * It's already known that the first column of the row comparison matches
+ * the specified column of the index. We can use additional columns of the
+ * row comparison as index qualifications, so long as they match the index
+ * in the "same direction", ie, the indexkeys are all on the same side of the
+ * clause and the operators are all the same-type members of the opfamilies.
+ *
+ * If all the columns of the RowCompareExpr match in this way, we just use it
+ * as-is, except for possibly commuting it to put the indexkeys on the left.
+ *
+ * Otherwise, we build a shortened RowCompareExpr (if more than one
+ * column matches) or a simple OpExpr (if the first-column match is all
+ * there is). In these cases the modified clause is always "<=" or ">="
+ * even when the original was "<" or ">" --- this is necessary to match all
+ * the rows that could match the original. (We are building a lossy version
+ * of the row comparison when we do this, so we set lossy = true.)
+ *
+ * Note: this is really just the last half of match_rowcompare_to_indexcol,
+ * but we split it out for comprehensibility.
+ */
+static IndexClause *
+expand_indexqual_rowcompare(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index,
+ Oid expr_op,
+ bool var_on_left)
+{
+ IndexClause *iclause = makeNode(IndexClause);
+ RowCompareExpr *clause = (RowCompareExpr *) rinfo->clause;
+ int op_strategy;
+ Oid op_lefttype;
+ Oid op_righttype;
+ int matching_cols;
+ List *expr_ops;
+ List *opfamilies;
+ List *lefttypes;
+ List *righttypes;
+ List *new_ops;
+ List *var_args;
+ List *non_var_args;
+
+ iclause->rinfo = rinfo;
+ iclause->indexcol = indexcol;
+
+ if (var_on_left)
+ {
+ var_args = clause->largs;
+ non_var_args = clause->rargs;
+ }
+ else
+ {
+ var_args = clause->rargs;
+ non_var_args = clause->largs;
+ }
+
+ get_op_opfamily_properties(expr_op, index->opfamily[indexcol], false,
+ &op_strategy,
+ &op_lefttype,
+ &op_righttype);
+
+ /* Initialize returned list of which index columns are used */
+ iclause->indexcols = list_make1_int(indexcol);
+
+ /* Build lists of ops, opfamilies and operator datatypes in case needed */
+ expr_ops = list_make1_oid(expr_op);
+ opfamilies = list_make1_oid(index->opfamily[indexcol]);
+ lefttypes = list_make1_oid(op_lefttype);
+ righttypes = list_make1_oid(op_righttype);
+
+ /*
+ * See how many of the remaining columns match some index column in the
+ * same way. As in match_clause_to_indexcol(), the "other" side of any
+ * potential index condition is OK as long as it doesn't use Vars from the
+ * indexed relation.
+ */
+ matching_cols = 1;
+
+ while (matching_cols < list_length(var_args))
+ {
+ Node *varop = (Node *) list_nth(var_args, matching_cols);
+ Node *constop = (Node *) list_nth(non_var_args, matching_cols);
+ int i;
+
+ expr_op = list_nth_oid(clause->opnos, matching_cols);
+ if (!var_on_left)
+ {
+ /* indexkey is on right, so commute the operator */
+ expr_op = get_commutator(expr_op);
+ if (expr_op == InvalidOid)
+ break; /* operator is not usable */
+ }
+ if (bms_is_member(index->rel->relid, pull_varnos(root, constop)))
+ break; /* no good, Var on wrong side */
+ if (contain_volatile_functions(constop))
+ break; /* no good, volatile comparison value */
+
+ /*
+ * The Var side can match any key column of the index.
+ */
+ for (i = 0; i < index->nkeycolumns; i++)
+ {
+ if (match_index_to_operand(varop, i, index) &&
+ get_op_opfamily_strategy(expr_op,
+ index->opfamily[i]) == op_strategy &&
+ IndexCollMatchesExprColl(index->indexcollations[i],
+ list_nth_oid(clause->inputcollids,
+ matching_cols)))
+ break;
+ }
+ if (i >= index->nkeycolumns)
+ break; /* no match found */
+
+ /* Add column number to returned list */
+ iclause->indexcols = lappend_int(iclause->indexcols, i);
+
+ /* Add operator info to lists */
+ get_op_opfamily_properties(expr_op, index->opfamily[i], false,
+ &op_strategy,
+ &op_lefttype,
+ &op_righttype);
+ expr_ops = lappend_oid(expr_ops, expr_op);
+ opfamilies = lappend_oid(opfamilies, index->opfamily[i]);
+ lefttypes = lappend_oid(lefttypes, op_lefttype);
+ righttypes = lappend_oid(righttypes, op_righttype);
+
+ /* This column matches, keep scanning */
+ matching_cols++;
+ }
+
+ /* Result is non-lossy if all columns are usable as index quals */
+ iclause->lossy = (matching_cols != list_length(clause->opnos));
+
+ /*
+ * We can use rinfo->clause as-is if we have var on left and it's all
+ * usable as index quals.
+ */
+ if (var_on_left && !iclause->lossy)
+ iclause->indexquals = list_make1(rinfo);
+ else
+ {
+ /*
+ * We have to generate a modified rowcompare (possibly just one
+ * OpExpr). The painful part of this is changing < to <= or > to >=,
+ * so deal with that first.
+ */
+ if (!iclause->lossy)
+ {
+ /* very easy, just use the commuted operators */
+ new_ops = expr_ops;
+ }
+ else if (op_strategy == BTLessEqualStrategyNumber ||
+ op_strategy == BTGreaterEqualStrategyNumber)
+ {
+ /* easy, just use the same (possibly commuted) operators */
+ new_ops = list_truncate(expr_ops, matching_cols);
+ }
+ else
+ {
+ ListCell *opfamilies_cell;
+ ListCell *lefttypes_cell;
+ ListCell *righttypes_cell;
+
+ if (op_strategy == BTLessStrategyNumber)
+ op_strategy = BTLessEqualStrategyNumber;
+ else if (op_strategy == BTGreaterStrategyNumber)
+ op_strategy = BTGreaterEqualStrategyNumber;
+ else
+ elog(ERROR, "unexpected strategy number %d", op_strategy);
+ new_ops = NIL;
+ forthree(opfamilies_cell, opfamilies,
+ lefttypes_cell, lefttypes,
+ righttypes_cell, righttypes)
+ {
+ Oid opfam = lfirst_oid(opfamilies_cell);
+ Oid lefttype = lfirst_oid(lefttypes_cell);
+ Oid righttype = lfirst_oid(righttypes_cell);
+
+ expr_op = get_opfamily_member(opfam, lefttype, righttype,
+ op_strategy);
+ if (!OidIsValid(expr_op)) /* should not happen */
+ elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+ op_strategy, lefttype, righttype, opfam);
+ new_ops = lappend_oid(new_ops, expr_op);
+ }
+ }
+
+ /* If we have more than one matching col, create a subset rowcompare */
+ if (matching_cols > 1)
+ {
+ RowCompareExpr *rc = makeNode(RowCompareExpr);
+
+ rc->rctype = (RowCompareType) op_strategy;
+ rc->opnos = new_ops;
+ rc->opfamilies = list_truncate(list_copy(clause->opfamilies),
+ matching_cols);
+ rc->inputcollids = list_truncate(list_copy(clause->inputcollids),
+ matching_cols);
+ rc->largs = list_truncate(copyObject(var_args),
+ matching_cols);
+ rc->rargs = list_truncate(copyObject(non_var_args),
+ matching_cols);
+ iclause->indexquals = list_make1(make_simple_restrictinfo(root,
+ (Expr *) rc));
+ }
+ else
+ {
+ Expr *op;
+
+ /* We don't report an index column list in this case */
+ iclause->indexcols = NIL;
+
+ op = make_opclause(linitial_oid(new_ops), BOOLOID, false,
+ copyObject(linitial(var_args)),
+ copyObject(linitial(non_var_args)),
+ InvalidOid,
+ linitial_oid(clause->inputcollids));
+ iclause->indexquals = list_make1(make_simple_restrictinfo(root, op));
+ }
+ }
+
+ return iclause;
+}
+
+
+/****************************************************************************
+ * ---- ROUTINES TO CHECK ORDERING OPERATORS ----
+ ****************************************************************************/
+
+/*
+ * match_pathkeys_to_index
+ * Test whether an index can produce output ordered according to the
+ * given pathkeys using "ordering operators".
+ *
+ * If it can, return a list of suitable ORDER BY expressions, each of the form
+ * "indexedcol operator pseudoconstant", along with an integer list of the
+ * index column numbers (zero based) that each clause would be used with.
+ * NIL lists are returned if the ordering is not achievable this way.
+ *
+ * On success, the result list is ordered by pathkeys, and in fact is
+ * one-to-one with the requested pathkeys.
+ */
+static void
+match_pathkeys_to_index(IndexOptInfo *index, List *pathkeys,
+ List **orderby_clauses_p,
+ List **clause_columns_p)
+{
+ List *orderby_clauses = NIL;
+ List *clause_columns = NIL;
+ ListCell *lc1;
+
+ *orderby_clauses_p = NIL; /* set default results */
+ *clause_columns_p = NIL;
+
+ /* Only indexes with the amcanorderbyop property are interesting here */
+ if (!index->amcanorderbyop)
+ return;
+
+ foreach(lc1, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(lc1);
+ bool found = false;
+ ListCell *lc2;
+
+ /*
+ * Note: for any failure to match, we just return NIL immediately.
+ * There is no value in matching just some of the pathkeys.
+ */
+
+ /* Pathkey must request default sort order for the target opfamily */
+ if (pathkey->pk_strategy != BTLessStrategyNumber ||
+ pathkey->pk_nulls_first)
+ return;
+
+ /* If eclass is volatile, no hope of using an indexscan */
+ if (pathkey->pk_eclass->ec_has_volatile)
+ return;
+
+ /*
+ * Try to match eclass member expression(s) to index. Note that child
+ * EC members are considered, but only when they belong to the target
+ * relation. (Unlike regular members, the same expression could be a
+ * child member of more than one EC. Therefore, the same index could
+ * be considered to match more than one pathkey list, which is OK
+ * here. See also get_eclass_for_sort_expr.)
+ */
+ foreach(lc2, pathkey->pk_eclass->ec_members)
+ {
+ EquivalenceMember *member = (EquivalenceMember *) lfirst(lc2);
+ int indexcol;
+
+ /* No possibility of match if it references other relations */
+ if (!bms_equal(member->em_relids, index->rel->relids))
+ continue;
+
+ /*
+ * We allow any column of the index to match each pathkey; they
+ * don't have to match left-to-right as you might expect. This is
+ * correct for GiST, and it doesn't matter for SP-GiST because
+ * that doesn't handle multiple columns anyway, and no other
+ * existing AMs support amcanorderbyop. We might need different
+ * logic in future for other implementations.
+ */
+ for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++)
+ {
+ Expr *expr;
+
+ expr = match_clause_to_ordering_op(index,
+ indexcol,
+ member->em_expr,
+ pathkey->pk_opfamily);
+ if (expr)
+ {
+ orderby_clauses = lappend(orderby_clauses, expr);
+ clause_columns = lappend_int(clause_columns, indexcol);
+ found = true;
+ break;
+ }
+ }
+
+ if (found) /* don't want to look at remaining members */
+ break;
+ }
+
+ if (!found) /* fail if no match for this pathkey */
+ return;
+ }
+
+ *orderby_clauses_p = orderby_clauses; /* success! */
+ *clause_columns_p = clause_columns;
+}
+
+/*
+ * match_clause_to_ordering_op
+ * Determines whether an ordering operator expression matches an
+ * index column.
+ *
+ * This is similar to, but simpler than, match_clause_to_indexcol.
+ * We only care about simple OpExpr cases. The input is a bare
+ * expression that is being ordered by, which must be of the form
+ * (indexkey op const) or (const op indexkey) where op is an ordering
+ * operator for the column's opfamily.
+ *
+ * 'index' is the index of interest.
+ * 'indexcol' is a column number of 'index' (counting from 0).
+ * 'clause' is the ordering expression to be tested.
+ * 'pk_opfamily' is the btree opfamily describing the required sort order.
+ *
+ * Note that we currently do not consider the collation of the ordering
+ * operator's result. In practical cases the result type will be numeric
+ * and thus have no collation, and it's not very clear what to match to
+ * if it did have a collation. The index's collation should match the
+ * ordering operator's input collation, not its result.
+ *
+ * If successful, return 'clause' as-is if the indexkey is on the left,
+ * otherwise a commuted copy of 'clause'. If no match, return NULL.
+ */
+static Expr *
+match_clause_to_ordering_op(IndexOptInfo *index,
+ int indexcol,
+ Expr *clause,
+ Oid pk_opfamily)
+{
+ Oid opfamily;
+ Oid idxcollation;
+ Node *leftop,
+ *rightop;
+ Oid expr_op;
+ Oid expr_coll;
+ Oid sortfamily;
+ bool commuted;
+
+ Assert(indexcol < index->nkeycolumns);
+
+ opfamily = index->opfamily[indexcol];
+ idxcollation = index->indexcollations[indexcol];
+
+ /*
+ * Clause must be a binary opclause.
+ */
+ if (!is_opclause(clause))
+ return NULL;
+ leftop = get_leftop(clause);
+ rightop = get_rightop(clause);
+ if (!leftop || !rightop)
+ return NULL;
+ expr_op = ((OpExpr *) clause)->opno;
+ expr_coll = ((OpExpr *) clause)->inputcollid;
+
+ /*
+ * We can forget the whole thing right away if wrong collation.
+ */
+ if (!IndexCollMatchesExprColl(idxcollation, expr_coll))
+ return NULL;
+
+ /*
+ * Check for clauses of the form: (indexkey operator constant) or
+ * (constant operator indexkey).
+ */
+ if (match_index_to_operand(leftop, indexcol, index) &&
+ !contain_var_clause(rightop) &&
+ !contain_volatile_functions(rightop))
+ {
+ commuted = false;
+ }
+ else if (match_index_to_operand(rightop, indexcol, index) &&
+ !contain_var_clause(leftop) &&
+ !contain_volatile_functions(leftop))
+ {
+ /* Might match, but we need a commuted operator */
+ expr_op = get_commutator(expr_op);
+ if (expr_op == InvalidOid)
+ return NULL;
+ commuted = true;
+ }
+ else
+ return NULL;
+
+ /*
+ * Is the (commuted) operator an ordering operator for the opfamily? And
+ * if so, does it yield the right sorting semantics?
+ */
+ sortfamily = get_op_opfamily_sortfamily(expr_op, opfamily);
+ if (sortfamily != pk_opfamily)
+ return NULL;
+
+ /* We have a match. Return clause or a commuted version thereof. */
+ if (commuted)
+ {
+ OpExpr *newclause = makeNode(OpExpr);
+
+ /* flat-copy all the fields of clause */
+ memcpy(newclause, clause, sizeof(OpExpr));
+
+ /* commute it */
+ newclause->opno = expr_op;
+ newclause->opfuncid = InvalidOid;
+ newclause->args = list_make2(rightop, leftop);
+
+ clause = (Expr *) newclause;
+ }
+
+ return clause;
+}
+
+
+/****************************************************************************
+ * ---- ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS ----
+ ****************************************************************************/
+
+/*
+ * check_index_predicates
+ * Set the predicate-derived IndexOptInfo fields for each index
+ * of the specified relation.
+ *
+ * predOK is set true if the index is partial and its predicate is satisfied
+ * for this query, ie the query's WHERE clauses imply the predicate.
+ *
+ * indrestrictinfo is set to the relation's baserestrictinfo list less any
+ * conditions that are implied by the index's predicate. (Obviously, for a
+ * non-partial index, this is the same as baserestrictinfo.) Such conditions
+ * can be dropped from the plan when using the index, in certain cases.
+ *
+ * At one time it was possible for this to get re-run after adding more
+ * restrictions to the rel, thus possibly letting us prove more indexes OK.
+ * That doesn't happen any more (at least not in the core code's usage),
+ * but this code still supports it in case extensions want to mess with the
+ * baserestrictinfo list. We assume that adding more restrictions can't make
+ * an index not predOK. We must recompute indrestrictinfo each time, though,
+ * to make sure any newly-added restrictions get into it if needed.
+ */
+void
+check_index_predicates(PlannerInfo *root, RelOptInfo *rel)
+{
+ List *clauselist;
+ bool have_partial;
+ bool is_target_rel;
+ Relids otherrels;
+ ListCell *lc;
+
+ /* Indexes are available only on base or "other" member relations. */
+ Assert(IS_SIMPLE_REL(rel));
+
+ /*
+ * Initialize the indrestrictinfo lists to be identical to
+ * baserestrictinfo, and check whether there are any partial indexes. If
+ * not, this is all we need to do.
+ */
+ have_partial = false;
+ foreach(lc, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+
+ index->indrestrictinfo = rel->baserestrictinfo;
+ if (index->indpred)
+ have_partial = true;
+ }
+ if (!have_partial)
+ return;
+
+ /*
+ * Construct a list of clauses that we can assume true for the purpose of
+ * proving the index(es) usable. Restriction clauses for the rel are
+ * always usable, and so are any join clauses that are "movable to" this
+ * rel. Also, we can consider any EC-derivable join clauses (which must
+ * be "movable to" this rel, by definition).
+ */
+ clauselist = list_copy(rel->baserestrictinfo);
+
+ /* Scan the rel's join clauses */
+ foreach(lc, rel->joininfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ /* Check if clause can be moved to this rel */
+ if (!join_clause_is_movable_to(rinfo, rel))
+ continue;
+
+ clauselist = lappend(clauselist, rinfo);
+ }
+
+ /*
+ * Add on any equivalence-derivable join clauses. Computing the correct
+ * relid sets for generate_join_implied_equalities is slightly tricky
+ * because the rel could be a child rel rather than a true baserel, and in
+ * that case we must remove its parents' relid(s) from all_baserels.
+ */
+ if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL)
+ otherrels = bms_difference(root->all_baserels,
+ find_childrel_parents(root, rel));
+ else
+ otherrels = bms_difference(root->all_baserels, rel->relids);
+
+ if (!bms_is_empty(otherrels))
+ clauselist =
+ list_concat(clauselist,
+ generate_join_implied_equalities(root,
+ bms_union(rel->relids,
+ otherrels),
+ otherrels,
+ rel));
+
+ /*
+ * Normally we remove quals that are implied by a partial index's
+ * predicate from indrestrictinfo, indicating that they need not be
+ * checked explicitly by an indexscan plan using this index. However, if
+ * the rel is a target relation of UPDATE/DELETE/MERGE/SELECT FOR UPDATE,
+ * we cannot remove such quals from the plan, because they need to be in
+ * the plan so that they will be properly rechecked by EvalPlanQual
+ * testing. Some day we might want to remove such quals from the main
+ * plan anyway and pass them through to EvalPlanQual via a side channel;
+ * but for now, we just don't remove implied quals at all for target
+ * relations.
+ */
+ is_target_rel = (bms_is_member(rel->relid, root->all_result_relids) ||
+ get_plan_rowmark(root->rowMarks, rel->relid) != NULL);
+
+ /*
+ * Now try to prove each index predicate true, and compute the
+ * indrestrictinfo lists for partial indexes. Note that we compute the
+ * indrestrictinfo list even for non-predOK indexes; this might seem
+ * wasteful, but we may be able to use such indexes in OR clauses, cf
+ * generate_bitmap_or_paths().
+ */
+ foreach(lc, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+ ListCell *lcr;
+
+ if (index->indpred == NIL)
+ continue; /* ignore non-partial indexes here */
+
+ if (!index->predOK) /* don't repeat work if already proven OK */
+ index->predOK = predicate_implied_by(index->indpred, clauselist,
+ false);
+
+ /* If rel is an update target, leave indrestrictinfo as set above */
+ if (is_target_rel)
+ continue;
+
+ /* Else compute indrestrictinfo as the non-implied quals */
+ index->indrestrictinfo = NIL;
+ foreach(lcr, rel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lcr);
+
+ /* predicate_implied_by() assumes first arg is immutable */
+ if (contain_mutable_functions((Node *) rinfo->clause) ||
+ !predicate_implied_by(list_make1(rinfo->clause),
+ index->indpred, false))
+ index->indrestrictinfo = lappend(index->indrestrictinfo, rinfo);
+ }
+ }
+}
+
+/****************************************************************************
+ * ---- ROUTINES TO CHECK EXTERNALLY-VISIBLE CONDITIONS ----
+ ****************************************************************************/
+
+/*
+ * ec_member_matches_indexcol
+ * Test whether an EquivalenceClass member matches an index column.
+ *
+ * This is a callback for use by generate_implied_equalities_for_column.
+ */
+static bool
+ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel,
+ EquivalenceClass *ec, EquivalenceMember *em,
+ void *arg)
+{
+ IndexOptInfo *index = ((ec_member_matches_arg *) arg)->index;
+ int indexcol = ((ec_member_matches_arg *) arg)->indexcol;
+ Oid curFamily;
+ Oid curCollation;
+
+ Assert(indexcol < index->nkeycolumns);
+
+ curFamily = index->opfamily[indexcol];
+ curCollation = index->indexcollations[indexcol];
+
+ /*
+ * If it's a btree index, we can reject it if its opfamily isn't
+ * compatible with the EC, since no clause generated from the EC could be
+ * used with the index. For non-btree indexes, we can't easily tell
+ * whether clauses generated from the EC could be used with the index, so
+ * don't check the opfamily. This might mean we return "true" for a
+ * useless EC, so we have to recheck the results of
+ * generate_implied_equalities_for_column; see
+ * match_eclass_clauses_to_index.
+ */
+ if (index->relam == BTREE_AM_OID &&
+ !list_member_oid(ec->ec_opfamilies, curFamily))
+ return false;
+
+ /* We insist on collation match for all index types, though */
+ if (!IndexCollMatchesExprColl(curCollation, ec->ec_collation))
+ return false;
+
+ return match_index_to_operand((Node *) em->em_expr, indexcol, index);
+}
+
+/*
+ * relation_has_unique_index_for
+ * Determine whether the relation provably has at most one row satisfying
+ * a set of equality conditions, because the conditions constrain all
+ * columns of some unique index.
+ *
+ * The conditions can be represented in either or both of two ways:
+ * 1. A list of RestrictInfo nodes, where the caller has already determined
+ * that each condition is a mergejoinable equality with an expression in
+ * this relation on one side, and an expression not involving this relation
+ * on the other. The transient outer_is_left flag is used to identify which
+ * side we should look at: left side if outer_is_left is false, right side
+ * if it is true.
+ * 2. A list of expressions in this relation, and a corresponding list of
+ * equality operators. The caller must have already checked that the operators
+ * represent equality. (Note: the operators could be cross-type; the
+ * expressions should correspond to their RHS inputs.)
+ *
+ * The caller need only supply equality conditions arising from joins;
+ * this routine automatically adds in any usable baserestrictinfo clauses.
+ * (Note that the passed-in restrictlist will be destructively modified!)
+ */
+bool
+relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
+ List *restrictlist,
+ List *exprlist, List *oprlist)
+{
+ ListCell *ic;
+
+ Assert(list_length(exprlist) == list_length(oprlist));
+
+ /* Short-circuit if no indexes... */
+ if (rel->indexlist == NIL)
+ return false;
+
+ /*
+ * Examine the rel's restriction clauses for usable var = const clauses
+ * that we can add to the restrictlist.
+ */
+ foreach(ic, rel->baserestrictinfo)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(ic);
+
+ /*
+ * Note: can_join won't be set for a restriction clause, but
+ * mergeopfamilies will be if it has a mergejoinable operator and
+ * doesn't contain volatile functions.
+ */
+ if (restrictinfo->mergeopfamilies == NIL)
+ continue; /* not mergejoinable */
+
+ /*
+ * The clause certainly doesn't refer to anything but the given rel.
+ * If either side is pseudoconstant then we can use it.
+ */
+ if (bms_is_empty(restrictinfo->left_relids))
+ {
+ /* righthand side is inner */
+ restrictinfo->outer_is_left = true;
+ }
+ else if (bms_is_empty(restrictinfo->right_relids))
+ {
+ /* lefthand side is inner */
+ restrictinfo->outer_is_left = false;
+ }
+ else
+ continue;
+
+ /* OK, add to list */
+ restrictlist = lappend(restrictlist, restrictinfo);
+ }
+
+ /* Short-circuit the easy case */
+ if (restrictlist == NIL && exprlist == NIL)
+ return false;
+
+ /* Examine each index of the relation ... */
+ foreach(ic, rel->indexlist)
+ {
+ IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic);
+ int c;
+
+ /*
+ * If the index is not unique, or not immediately enforced, or if it's
+ * a partial index, it's useless here. We're unable to make use of
+ * predOK partial unique indexes due to the fact that
+ * check_index_predicates() also makes use of join predicates to
+ * determine if the partial index is usable. Here we need proofs that
+ * hold true before any joins are evaluated.
+ */
+ if (!ind->unique || !ind->immediate || ind->indpred != NIL)
+ continue;
+
+ /*
+ * Try to find each index column in the lists of conditions. This is
+ * O(N^2) or worse, but we expect all the lists to be short.
+ */
+ for (c = 0; c < ind->nkeycolumns; c++)
+ {
+ bool matched = false;
+ ListCell *lc;
+ ListCell *lc2;
+
+ foreach(lc, restrictlist)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+ Node *rexpr;
+
+ /*
+ * The condition's equality operator must be a member of the
+ * index opfamily, else it is not asserting the right kind of
+ * equality behavior for this index. We check this first
+ * since it's probably cheaper than match_index_to_operand().
+ */
+ if (!list_member_oid(rinfo->mergeopfamilies, ind->opfamily[c]))
+ continue;
+
+ /*
+ * XXX at some point we may need to check collations here too.
+ * For the moment we assume all collations reduce to the same
+ * notion of equality.
+ */
+
+ /* OK, see if the condition operand matches the index key */
+ if (rinfo->outer_is_left)
+ rexpr = get_rightop(rinfo->clause);
+ else
+ rexpr = get_leftop(rinfo->clause);
+
+ if (match_index_to_operand(rexpr, c, ind))
+ {
+ matched = true; /* column is unique */
+ break;
+ }
+ }
+
+ if (matched)
+ continue;
+
+ forboth(lc, exprlist, lc2, oprlist)
+ {
+ Node *expr = (Node *) lfirst(lc);
+ Oid opr = lfirst_oid(lc2);
+
+ /* See if the expression matches the index key */
+ if (!match_index_to_operand(expr, c, ind))
+ continue;
+
+ /*
+ * The equality operator must be a member of the index
+ * opfamily, else it is not asserting the right kind of
+ * equality behavior for this index. We assume the caller
+ * determined it is an equality operator, so we don't need to
+ * check any more tightly than this.
+ */
+ if (!op_in_opfamily(opr, ind->opfamily[c]))
+ continue;
+
+ /*
+ * XXX at some point we may need to check collations here too.
+ * For the moment we assume all collations reduce to the same
+ * notion of equality.
+ */
+
+ matched = true; /* column is unique */
+ break;
+ }
+
+ if (!matched)
+ break; /* no match; this index doesn't help us */
+ }
+
+ /* Matched all key columns of this index? */
+ if (c == ind->nkeycolumns)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * indexcol_is_bool_constant_for_query
+ *
+ * If an index column is constrained to have a constant value by the query's
+ * WHERE conditions, then it's irrelevant for sort-order considerations.
+ * Usually that means we have a restriction clause WHERE indexcol = constant,
+ * which gets turned into an EquivalenceClass containing a constant, which
+ * is recognized as redundant by build_index_pathkeys(). But if the index
+ * column is a boolean variable (or expression), then we are not going to
+ * see WHERE indexcol = constant, because expression preprocessing will have
+ * simplified that to "WHERE indexcol" or "WHERE NOT indexcol". So we are not
+ * going to have a matching EquivalenceClass (unless the query also contains
+ * "ORDER BY indexcol"). To allow such cases to work the same as they would
+ * for non-boolean values, this function is provided to detect whether the
+ * specified index column matches a boolean restriction clause.
+ */
+bool
+indexcol_is_bool_constant_for_query(PlannerInfo *root,
+ IndexOptInfo *index,
+ int indexcol)
+{
+ ListCell *lc;
+
+ /* If the index isn't boolean, we can't possibly get a match */
+ if (!IsBooleanOpfamily(index->opfamily[indexcol]))
+ return false;
+
+ /* Check each restriction clause for the index's rel */
+ foreach(lc, index->rel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ /*
+ * As in match_clause_to_indexcol, never match pseudoconstants to
+ * indexes. (It might be semantically okay to do so here, but the
+ * odds of getting a match are negligible, so don't waste the cycles.)
+ */
+ if (rinfo->pseudoconstant)
+ continue;
+
+ /* See if we can match the clause's expression to the index column */
+ if (match_boolean_index_clause(root, rinfo, indexcol, index))
+ return true;
+ }
+
+ return false;
+}
+
+
+/****************************************************************************
+ * ---- ROUTINES TO CHECK OPERANDS ----
+ ****************************************************************************/
+
+/*
+ * match_index_to_operand()
+ * Generalized test for a match between an index's key
+ * and the operand on one side of a restriction or join clause.
+ *
+ * operand: the nodetree to be compared to the index
+ * indexcol: the column number of the index (counting from 0)
+ * index: the index of interest
+ *
+ * Note that we aren't interested in collations here; the caller must check
+ * for a collation match, if it's dealing with an operator where that matters.
+ *
+ * This is exported for use in selfuncs.c.
+ */
+bool
+match_index_to_operand(Node *operand,
+ int indexcol,
+ IndexOptInfo *index)
+{
+ int indkey;
+
+ /*
+ * Ignore any RelabelType node above the operand. This is needed to be
+ * able to apply indexscanning in binary-compatible-operator cases. Note:
+ * we can assume there is at most one RelabelType node;
+ * eval_const_expressions() will have simplified if more than one.
+ */
+ if (operand && IsA(operand, RelabelType))
+ operand = (Node *) ((RelabelType *) operand)->arg;
+
+ indkey = index->indexkeys[indexcol];
+ if (indkey != 0)
+ {
+ /*
+ * Simple index column; operand must be a matching Var.
+ */
+ if (operand && IsA(operand, Var) &&
+ index->rel->relid == ((Var *) operand)->varno &&
+ indkey == ((Var *) operand)->varattno)
+ return true;
+ }
+ else
+ {
+ /*
+ * Index expression; find the correct expression. (This search could
+ * be avoided, at the cost of complicating all the callers of this
+ * routine; doesn't seem worth it.)
+ */
+ ListCell *indexpr_item;
+ int i;
+ Node *indexkey;
+
+ indexpr_item = list_head(index->indexprs);
+ for (i = 0; i < indexcol; i++)
+ {
+ if (index->indexkeys[i] == 0)
+ {
+ if (indexpr_item == NULL)
+ elog(ERROR, "wrong number of index expressions");
+ indexpr_item = lnext(index->indexprs, indexpr_item);
+ }
+ }
+ if (indexpr_item == NULL)
+ elog(ERROR, "wrong number of index expressions");
+ indexkey = (Node *) lfirst(indexpr_item);
+
+ /*
+ * Does it match the operand? Again, strip any relabeling.
+ */
+ if (indexkey && IsA(indexkey, RelabelType))
+ indexkey = (Node *) ((RelabelType *) indexkey)->arg;
+
+ if (equal(indexkey, operand))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * is_pseudo_constant_for_index()
+ * Test whether the given expression can be used as an indexscan
+ * comparison value.
+ *
+ * An indexscan comparison value must not contain any volatile functions,
+ * and it can't contain any Vars of the index's own table. Vars of
+ * other tables are okay, though; in that case we'd be producing an
+ * indexqual usable in a parameterized indexscan. This is, therefore,
+ * a weaker condition than is_pseudo_constant_clause().
+ *
+ * This function is exported for use by planner support functions,
+ * which will have available the IndexOptInfo, but not any RestrictInfo
+ * infrastructure. It is making the same test made by functions above
+ * such as match_opclause_to_indexcol(), but those rely where possible
+ * on RestrictInfo information about variable membership.
+ *
+ * expr: the nodetree to be checked
+ * index: the index of interest
+ */
+bool
+is_pseudo_constant_for_index(PlannerInfo *root, Node *expr, IndexOptInfo *index)
+{
+ /* pull_varnos is cheaper than volatility check, so do that first */
+ if (bms_is_member(index->rel->relid, pull_varnos(root, expr)))
+ return false; /* no good, contains Var of table */
+ if (contain_volatile_functions(expr))
+ return false; /* no good, volatile comparison value */
+ return true;
+}
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
new file mode 100644
index 0000000..bad3dd1
--- /dev/null
+++ b/src/backend/optimizer/path/joinpath.c
@@ -0,0 +1,2367 @@
+/*-------------------------------------------------------------------------
+ *
+ * joinpath.c
+ * Routines to find all possible paths for processing a set of joins
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/path/joinpath.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "executor/executor.h"
+#include "foreign/fdwapi.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/cost.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
+#include "utils/typcache.h"
+
+/* Hook for plugins to get control in add_paths_to_joinrel() */
+set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
+
+/*
+ * Paths parameterized by the parent can be considered to be parameterized by
+ * any of its child.
+ */
+#define PATH_PARAM_BY_PARENT(path, rel) \
+ ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
+ (rel)->top_parent_relids))
+#define PATH_PARAM_BY_REL_SELF(path, rel) \
+ ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
+
+#define PATH_PARAM_BY_REL(path, rel) \
+ (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
+
+static void try_partial_mergejoin_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ Path *outer_path,
+ Path *inner_path,
+ List *pathkeys,
+ List *mergeclauses,
+ List *outersortkeys,
+ List *innersortkeys,
+ JoinType jointype,
+ JoinPathExtraData *extra);
+static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
+ RelOptInfo *outerrel, RelOptInfo *innerrel,
+ JoinType jointype, JoinPathExtraData *extra);
+static inline bool clause_sides_match_join(RestrictInfo *rinfo,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel);
+static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
+ RelOptInfo *outerrel, RelOptInfo *innerrel,
+ JoinType jointype, JoinPathExtraData *extra);
+static void consider_parallel_nestloop(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ JoinPathExtraData *extra);
+static void consider_parallel_mergejoin(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ JoinPathExtraData *extra,
+ Path *inner_cheapest_total);
+static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
+ RelOptInfo *outerrel, RelOptInfo *innerrel,
+ JoinType jointype, JoinPathExtraData *extra);
+static List *select_mergejoin_clauses(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ List *restrictlist,
+ JoinType jointype,
+ bool *mergejoin_allowed);
+static void generate_mergejoin_paths(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *innerrel,
+ Path *outerpath,
+ JoinType jointype,
+ JoinPathExtraData *extra,
+ bool useallclauses,
+ Path *inner_cheapest_total,
+ List *merge_pathkeys,
+ bool is_partial);
+
+
+/*
+ * add_paths_to_joinrel
+ * Given a join relation and two component rels from which it can be made,
+ * consider all possible paths that use the two component rels as outer
+ * and inner rel respectively. Add these paths to the join rel's pathlist
+ * if they survive comparison with other paths (and remove any existing
+ * paths that are dominated by these paths).
+ *
+ * Modifies the pathlist field of the joinrel node to contain the best
+ * paths found so far.
+ *
+ * jointype is not necessarily the same as sjinfo->jointype; it might be
+ * "flipped around" if we are considering joining the rels in the opposite
+ * direction from what's indicated in sjinfo.
+ *
+ * Also, this routine and others in this module accept the special JoinTypes
+ * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
+ * unique-ify the outer or inner relation and then apply a regular inner
+ * join. These values are not allowed to propagate outside this module,
+ * however. Path cost estimation code may need to recognize that it's
+ * dealing with such a case --- the combination of nominal jointype INNER
+ * with sjinfo->jointype == JOIN_SEMI indicates that.
+ */
+void
+add_paths_to_joinrel(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist)
+{
+ JoinPathExtraData extra;
+ bool mergejoin_allowed = true;
+ bool consider_join_pushdown = false;
+ ListCell *lc;
+ Relids joinrelids;
+
+ /*
+ * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
+ * between child relations, even if there is a SpecialJoinInfo node for
+ * the join between the topmost parents. So, while calculating Relids set
+ * representing the restriction, consider relids of topmost parent of
+ * partitions.
+ */
+ if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
+ joinrelids = joinrel->top_parent_relids;
+ else
+ joinrelids = joinrel->relids;
+
+ extra.restrictlist = restrictlist;
+ extra.mergeclause_list = NIL;
+ extra.sjinfo = sjinfo;
+ extra.param_source_rels = NULL;
+
+ /*
+ * See if the inner relation is provably unique for this outer rel.
+ *
+ * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
+ * matter since the executor can make the equivalent optimization anyway;
+ * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
+ * must be considering a semijoin whose inner side is not provably unique
+ * (else reduce_unique_semijoins would've simplified it), so there's no
+ * point in calling innerrel_is_unique. However, if the LHS covers all of
+ * the semijoin's min_lefthand, then it's appropriate to set inner_unique
+ * because the path produced by create_unique_path will be unique relative
+ * to the LHS. (If we have an LHS that's only part of the min_lefthand,
+ * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
+ * letting that value escape this module.
+ */
+ switch (jointype)
+ {
+ case JOIN_SEMI:
+ case JOIN_ANTI:
+
+ /*
+ * XXX it may be worth proving this to allow a Memoize to be
+ * considered for Nested Loop Semi/Anti Joins.
+ */
+ extra.inner_unique = false; /* well, unproven */
+ break;
+ case JOIN_UNIQUE_INNER:
+ extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
+ outerrel->relids);
+ break;
+ case JOIN_UNIQUE_OUTER:
+ extra.inner_unique = innerrel_is_unique(root,
+ joinrel->relids,
+ outerrel->relids,
+ innerrel,
+ JOIN_INNER,
+ restrictlist,
+ false);
+ break;
+ default:
+ extra.inner_unique = innerrel_is_unique(root,
+ joinrel->relids,
+ outerrel->relids,
+ innerrel,
+ jointype,
+ restrictlist,
+ false);
+ break;
+ }
+
+ /*
+ * Find potential mergejoin clauses. We can skip this if we are not
+ * interested in doing a mergejoin. However, mergejoin may be our only
+ * way of implementing a full outer join, so override enable_mergejoin if
+ * it's a full join.
+ */
+ if (enable_mergejoin || jointype == JOIN_FULL)
+ extra.mergeclause_list = select_mergejoin_clauses(root,
+ joinrel,
+ outerrel,
+ innerrel,
+ restrictlist,
+ jointype,
+ &mergejoin_allowed);
+
+ /*
+ * If it's SEMI, ANTI, or inner_unique join, compute correction factors
+ * for cost estimation. These will be the same for all paths.
+ */
+ if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
+ compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
+ jointype, sjinfo, restrictlist,
+ &extra.semifactors);
+
+ /*
+ * Decide whether it's sensible to generate parameterized paths for this
+ * joinrel, and if so, which relations such paths should require. There
+ * is usually no need to create a parameterized result path unless there
+ * is a join order restriction that prevents joining one of our input rels
+ * directly to the parameter source rel instead of joining to the other
+ * input rel. (But see allow_star_schema_join().) This restriction
+ * reduces the number of parameterized paths we have to deal with at
+ * higher join levels, without compromising the quality of the resulting
+ * plan. We express the restriction as a Relids set that must overlap the
+ * parameterization of any proposed join path.
+ */
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
+
+ /*
+ * SJ is relevant to this join if we have some part of its RHS
+ * (possibly not all of it), and haven't yet joined to its LHS. (This
+ * test is pretty simplistic, but should be sufficient considering the
+ * join has already been proven legal.) If the SJ is relevant, it
+ * presents constraints for joining to anything not in its RHS.
+ */
+ if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
+ !bms_overlap(joinrelids, sjinfo2->min_lefthand))
+ extra.param_source_rels = bms_join(extra.param_source_rels,
+ bms_difference(root->all_baserels,
+ sjinfo2->min_righthand));
+
+ /* full joins constrain both sides symmetrically */
+ if (sjinfo2->jointype == JOIN_FULL &&
+ bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
+ !bms_overlap(joinrelids, sjinfo2->min_righthand))
+ extra.param_source_rels = bms_join(extra.param_source_rels,
+ bms_difference(root->all_baserels,
+ sjinfo2->min_lefthand));
+ }
+
+ /*
+ * However, when a LATERAL subquery is involved, there will simply not be
+ * any paths for the joinrel that aren't parameterized by whatever the
+ * subquery is parameterized by, unless its parameterization is resolved
+ * within the joinrel. So we might as well allow additional dependencies
+ * on whatever residual lateral dependencies the joinrel will have.
+ */
+ extra.param_source_rels = bms_add_members(extra.param_source_rels,
+ joinrel->lateral_relids);
+
+ /*
+ * 1. Consider mergejoin paths where both relations must be explicitly
+ * sorted. Skip this if we can't mergejoin.
+ */
+ if (mergejoin_allowed)
+ sort_inner_and_outer(root, joinrel, outerrel, innerrel,
+ jointype, &extra);
+
+ /*
+ * 2. Consider paths where the outer relation need not be explicitly
+ * sorted. This includes both nestloops and mergejoins where the outer
+ * path is already ordered. Again, skip this if we can't mergejoin.
+ * (That's okay because we know that nestloop can't handle right/full
+ * joins at all, so it wouldn't work in the prohibited cases either.)
+ */
+ if (mergejoin_allowed)
+ match_unsorted_outer(root, joinrel, outerrel, innerrel,
+ jointype, &extra);
+
+#ifdef NOT_USED
+
+ /*
+ * 3. Consider paths where the inner relation need not be explicitly
+ * sorted. This includes mergejoins only (nestloops were already built in
+ * match_unsorted_outer).
+ *
+ * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
+ * significant difference between the inner and outer side of a mergejoin,
+ * so match_unsorted_inner creates no paths that aren't equivalent to
+ * those made by match_unsorted_outer when add_paths_to_joinrel() is
+ * invoked with the two rels given in the other order.
+ */
+ if (mergejoin_allowed)
+ match_unsorted_inner(root, joinrel, outerrel, innerrel,
+ jointype, &extra);
+#endif
+
+ /*
+ * 4. Consider paths where both outer and inner relations must be hashed
+ * before being joined. As above, disregard enable_hashjoin for full
+ * joins, because there may be no other alternative.
+ */
+ if (enable_hashjoin || jointype == JOIN_FULL)
+ hash_inner_and_outer(root, joinrel, outerrel, innerrel,
+ jointype, &extra);
+
+ /*
+ * createplan.c does not currently support handling of pseudoconstant
+ * clauses assigned to joins pushed down by extensions; check if the
+ * restrictlist has such clauses, and if not, allow them to consider
+ * pushing down joins.
+ */
+ if ((joinrel->fdwroutine &&
+ joinrel->fdwroutine->GetForeignJoinPaths) ||
+ set_join_pathlist_hook)
+ consider_join_pushdown = !has_pseudoconstant_clauses(root,
+ restrictlist);
+
+ /*
+ * 5. If inner and outer relations are foreign tables (or joins) belonging
+ * to the same server and assigned to the same user to check access
+ * permissions as, give the FDW a chance to push down joins.
+ */
+ if (joinrel->fdwroutine &&
+ joinrel->fdwroutine->GetForeignJoinPaths &&
+ consider_join_pushdown)
+ joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
+ outerrel, innerrel,
+ jointype, &extra);
+
+ /*
+ * 6. Finally, give extensions a chance to manipulate the path list. They
+ * could add new paths (such as CustomPaths) by calling add_path(), or
+ * add_partial_path() if parallel aware. They could also delete or modify
+ * paths added by the core code.
+ */
+ if (set_join_pathlist_hook &&
+ consider_join_pushdown)
+ set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
+ jointype, &extra);
+}
+
+/*
+ * We override the param_source_rels heuristic to accept nestloop paths in
+ * which the outer rel satisfies some but not all of the inner path's
+ * parameterization. This is necessary to get good plans for star-schema
+ * scenarios, in which a parameterized path for a large table may require
+ * parameters from multiple small tables that will not get joined directly to
+ * each other. We can handle that by stacking nestloops that have the small
+ * tables on the outside; but this breaks the rule the param_source_rels
+ * heuristic is based on, namely that parameters should not be passed down
+ * across joins unless there's a join-order-constraint-based reason to do so.
+ * So we ignore the param_source_rels restriction when this case applies.
+ *
+ * allow_star_schema_join() returns true if the param_source_rels restriction
+ * should be overridden, ie, it's okay to perform this join.
+ */
+static inline bool
+allow_star_schema_join(PlannerInfo *root,
+ Relids outerrelids,
+ Relids inner_paramrels)
+{
+ /*
+ * It's a star-schema case if the outer rel provides some but not all of
+ * the inner rel's parameterization.
+ */
+ return (bms_overlap(inner_paramrels, outerrelids) &&
+ bms_nonempty_difference(inner_paramrels, outerrelids));
+}
+
+/*
+ * paraminfo_get_equal_hashops
+ * Determine if param_info and innerrel's lateral_vars can be hashed.
+ * Returns true the hashing is possible, otherwise return false.
+ *
+ * Additionally we also collect the outer exprs and the hash operators for
+ * each parameter to innerrel. These set in 'param_exprs', 'operators' and
+ * 'binary_mode' when we return true.
+ */
+static bool
+paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
+ RelOptInfo *outerrel, RelOptInfo *innerrel,
+ List **param_exprs, List **operators,
+ bool *binary_mode)
+
+{
+ ListCell *lc;
+
+ *param_exprs = NIL;
+ *operators = NIL;
+ *binary_mode = false;
+
+ if (param_info != NULL)
+ {
+ List *clauses = param_info->ppi_clauses;
+
+ foreach(lc, clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+ OpExpr *opexpr;
+ Node *expr;
+ Oid hasheqoperator;
+
+ opexpr = (OpExpr *) rinfo->clause;
+
+ /*
+ * Bail if the rinfo is not compatible. We need a join OpExpr
+ * with 2 args.
+ */
+ if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
+ !clause_sides_match_join(rinfo, outerrel, innerrel))
+ {
+ list_free(*operators);
+ list_free(*param_exprs);
+ return false;
+ }
+
+ if (rinfo->outer_is_left)
+ {
+ expr = (Node *) linitial(opexpr->args);
+ hasheqoperator = rinfo->left_hasheqoperator;
+ }
+ else
+ {
+ expr = (Node *) lsecond(opexpr->args);
+ hasheqoperator = rinfo->right_hasheqoperator;
+ }
+
+ /* can't do memoize if we can't hash the outer type */
+ if (!OidIsValid(hasheqoperator))
+ {
+ list_free(*operators);
+ list_free(*param_exprs);
+ return false;
+ }
+
+ *operators = lappend_oid(*operators, hasheqoperator);
+ *param_exprs = lappend(*param_exprs, expr);
+
+ /*
+ * When the join operator is not hashable then it's possible that
+ * the operator will be able to distinguish something that the
+ * hash equality operator could not. For example with floating
+ * point types -0.0 and +0.0 are classed as equal by the hash
+ * function and equality function, but some other operator may be
+ * able to tell those values apart. This means that we must put
+ * memoize into binary comparison mode so that it does bit-by-bit
+ * comparisons rather than a "logical" comparison as it would
+ * using the hash equality operator.
+ */
+ if (!OidIsValid(rinfo->hashjoinoperator))
+ *binary_mode = true;
+ }
+ }
+
+ /* Now add any lateral vars to the cache key too */
+ foreach(lc, innerrel->lateral_vars)
+ {
+ Node *expr = (Node *) lfirst(lc);
+ TypeCacheEntry *typentry;
+
+ /* Reject if there are any volatile functions */
+ if (contain_volatile_functions(expr))
+ {
+ list_free(*operators);
+ list_free(*param_exprs);
+ return false;
+ }
+
+ typentry = lookup_type_cache(exprType(expr),
+ TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
+
+ /* can't use a memoize node without a valid hash equals operator */
+ if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
+ {
+ list_free(*operators);
+ list_free(*param_exprs);
+ return false;
+ }
+
+ *operators = lappend_oid(*operators, typentry->eq_opr);
+ *param_exprs = lappend(*param_exprs, expr);
+
+ /*
+ * We must go into binary mode as we don't have too much of an idea of
+ * how these lateral Vars are being used. See comment above when we
+ * set *binary_mode for the non-lateral Var case. This could be
+ * relaxed a bit if we had the RestrictInfos and knew the operators
+ * being used, however for cases like Vars that are arguments to
+ * functions we must operate in binary mode as we don't have
+ * visibility into what the function is doing with the Vars.
+ */
+ *binary_mode = true;
+ }
+
+ /* We're okay to use memoize */
+ return true;
+}
+
+/*
+ * get_memoize_path
+ * If possible, make and return a Memoize path atop of 'inner_path'.
+ * Otherwise return NULL.
+ */
+static Path *
+get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
+ RelOptInfo *outerrel, Path *inner_path,
+ Path *outer_path, JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ RelOptInfo *top_outerrel;
+ List *param_exprs;
+ List *hash_operators;
+ ListCell *lc;
+ bool binary_mode;
+
+ /* Obviously not if it's disabled */
+ if (!enable_memoize)
+ return NULL;
+
+ /*
+ * We can safely not bother with all this unless we expect to perform more
+ * than one inner scan. The first scan is always going to be a cache
+ * miss. This would likely fail later anyway based on costs, so this is
+ * really just to save some wasted effort.
+ */
+ if (outer_path->parent->rows < 2)
+ return NULL;
+
+ /*
+ * We can only have a memoize node when there's some kind of cache key,
+ * either parameterized path clauses or lateral Vars. No cache key sounds
+ * more like something a Materialize node might be more useful for.
+ */
+ if ((inner_path->param_info == NULL ||
+ inner_path->param_info->ppi_clauses == NIL) &&
+ innerrel->lateral_vars == NIL)
+ return NULL;
+
+ /*
+ * Currently we don't do this for SEMI and ANTI joins unless they're
+ * marked as inner_unique. This is because nested loop SEMI/ANTI joins
+ * don't scan the inner node to completion, which will mean memoize cannot
+ * mark the cache entry as complete.
+ *
+ * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
+ * = true. Should we? See add_paths_to_joinrel()
+ */
+ if (!extra->inner_unique && (jointype == JOIN_SEMI ||
+ jointype == JOIN_ANTI))
+ return NULL;
+
+ /*
+ * Memoize normally marks cache entries as complete when it runs out of
+ * tuples to read from its subplan. However, with unique joins, Nested
+ * Loop will skip to the next outer tuple after finding the first matching
+ * inner tuple. This means that we may not read the inner side of the
+ * join to completion which leaves no opportunity to mark the cache entry
+ * as complete. To work around that, when the join is unique we
+ * automatically mark cache entries as complete after fetching the first
+ * tuple. This works when the entire join condition is parameterized.
+ * Otherwise, when the parameterization is only a subset of the join
+ * condition, we can't be sure which part of it causes the join to be
+ * unique. This means there are no guarantees that only 1 tuple will be
+ * read. We cannot mark the cache entry as complete after reading the
+ * first tuple without that guarantee. This means the scope of Memoize
+ * node's usefulness is limited to only outer rows that have no join
+ * partner as this is the only case where Nested Loop would exhaust the
+ * inner scan of a unique join. Since the scope is limited to that, we
+ * just don't bother making a memoize path in this case.
+ *
+ * Lateral vars needn't be considered here as they're not considered when
+ * determining if the join is unique.
+ *
+ * XXX this could be enabled if the remaining join quals were made part of
+ * the inner scan's filter instead of the join filter. Maybe it's worth
+ * considering doing that?
+ */
+ if (extra->inner_unique &&
+ (inner_path->param_info == NULL ||
+ list_length(inner_path->param_info->ppi_clauses) <
+ list_length(extra->restrictlist)))
+ return NULL;
+
+ /*
+ * We can't use a memoize node if there are volatile functions in the
+ * inner rel's target list or restrict list. A cache hit could reduce the
+ * number of calls to these functions.
+ */
+ if (contain_volatile_functions((Node *) innerrel->reltarget))
+ return NULL;
+
+ foreach(lc, innerrel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ if (contain_volatile_functions((Node *) rinfo))
+ return NULL;
+ }
+
+ /*
+ * Also check the parameterized path restrictinfos for volatile functions.
+ * Indexed functions must be immutable so shouldn't have any volatile
+ * functions, however, with a lateral join the inner scan may not be an
+ * index scan.
+ */
+ if (inner_path->param_info != NULL)
+ {
+ foreach(lc, inner_path->param_info->ppi_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ if (contain_volatile_functions((Node *) rinfo))
+ return NULL;
+ }
+ }
+
+ /*
+ * When considering a partitionwise join, we have clauses that reference
+ * the outerrel's top parent not outerrel itself.
+ */
+ if (outerrel->reloptkind == RELOPT_OTHER_MEMBER_REL)
+ top_outerrel = find_base_rel(root, bms_singleton_member(outerrel->top_parent_relids));
+ else if (outerrel->reloptkind == RELOPT_OTHER_JOINREL)
+ top_outerrel = find_join_rel(root, outerrel->top_parent_relids);
+ else
+ top_outerrel = outerrel;
+
+ /* Check if we have hash ops for each parameter to the path */
+ if (paraminfo_get_equal_hashops(root,
+ inner_path->param_info,
+ top_outerrel,
+ innerrel,
+ &param_exprs,
+ &hash_operators,
+ &binary_mode))
+ {
+ return (Path *) create_memoize_path(root,
+ innerrel,
+ inner_path,
+ param_exprs,
+ hash_operators,
+ extra->inner_unique,
+ binary_mode,
+ outer_path->rows);
+ }
+
+ return NULL;
+}
+
+/*
+ * try_nestloop_path
+ * Consider a nestloop join path; if it appears useful, push it into
+ * the joinrel's pathlist via add_path().
+ */
+static void
+try_nestloop_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ Path *outer_path,
+ Path *inner_path,
+ List *pathkeys,
+ JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ Relids required_outer;
+ JoinCostWorkspace workspace;
+ RelOptInfo *innerrel = inner_path->parent;
+ RelOptInfo *outerrel = outer_path->parent;
+ Relids innerrelids;
+ Relids outerrelids;
+ Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
+ Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
+
+ /*
+ * Paths are parameterized by top-level parents, so run parameterization
+ * tests on the parent relids.
+ */
+ if (innerrel->top_parent_relids)
+ innerrelids = innerrel->top_parent_relids;
+ else
+ innerrelids = innerrel->relids;
+
+ if (outerrel->top_parent_relids)
+ outerrelids = outerrel->top_parent_relids;
+ else
+ outerrelids = outerrel->relids;
+
+ /*
+ * Check to see if proposed path is still parameterized, and reject if the
+ * parameterization wouldn't be sensible --- unless allow_star_schema_join
+ * says to allow it anyway. Also, we must reject if have_dangerous_phv
+ * doesn't like the look of it, which could only happen if the nestloop is
+ * still parameterized.
+ */
+ required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
+ innerrelids, inner_paramrels);
+ if (required_outer &&
+ ((!bms_overlap(required_outer, extra->param_source_rels) &&
+ !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
+ have_dangerous_phv(root, outerrelids, inner_paramrels)))
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ return;
+ }
+
+ /*
+ * Do a precheck to quickly eliminate obviously-inferior paths. We
+ * calculate a cheap lower bound on the path's cost and then use
+ * add_path_precheck() to see if the path is clearly going to be dominated
+ * by some existing path for the joinrel. If not, do the full pushup with
+ * creating a fully valid path structure and submitting it to add_path().
+ * The latter two steps are expensive enough to make this two-phase
+ * methodology worthwhile.
+ */
+ initial_cost_nestloop(root, &workspace, jointype,
+ outer_path, inner_path, extra);
+
+ if (add_path_precheck(joinrel,
+ workspace.startup_cost, workspace.total_cost,
+ pathkeys, required_outer))
+ {
+ /*
+ * If the inner path is parameterized, it is parameterized by the
+ * topmost parent of the outer rel, not the outer rel itself. Fix
+ * that.
+ */
+ if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
+ {
+ inner_path = reparameterize_path_by_child(root, inner_path,
+ outer_path->parent);
+
+ /*
+ * If we could not translate the path, we can't create nest loop
+ * path.
+ */
+ if (!inner_path)
+ {
+ bms_free(required_outer);
+ return;
+ }
+ }
+
+ add_path(joinrel, (Path *)
+ create_nestloop_path(root,
+ joinrel,
+ jointype,
+ &workspace,
+ extra,
+ outer_path,
+ inner_path,
+ extra->restrictlist,
+ pathkeys,
+ required_outer));
+ }
+ else
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ }
+}
+
+/*
+ * try_partial_nestloop_path
+ * Consider a partial nestloop join path; if it appears useful, push it into
+ * the joinrel's partial_pathlist via add_partial_path().
+ */
+static void
+try_partial_nestloop_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ Path *outer_path,
+ Path *inner_path,
+ List *pathkeys,
+ JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ JoinCostWorkspace workspace;
+
+ /*
+ * If the inner path is parameterized, the parameterization must be fully
+ * satisfied by the proposed outer path. Parameterized partial paths are
+ * not supported. The caller should already have verified that no lateral
+ * rels are required here.
+ */
+ Assert(bms_is_empty(joinrel->lateral_relids));
+ if (inner_path->param_info != NULL)
+ {
+ Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
+ RelOptInfo *outerrel = outer_path->parent;
+ Relids outerrelids;
+
+ /*
+ * The inner and outer paths are parameterized, if at all, by the top
+ * level parents, not the child relations, so we must use those relids
+ * for our parameterization tests.
+ */
+ if (outerrel->top_parent_relids)
+ outerrelids = outerrel->top_parent_relids;
+ else
+ outerrelids = outerrel->relids;
+
+ if (!bms_is_subset(inner_paramrels, outerrelids))
+ return;
+ }
+
+ /*
+ * Before creating a path, get a quick lower bound on what it is likely to
+ * cost. Bail out right away if it looks terrible.
+ */
+ initial_cost_nestloop(root, &workspace, jointype,
+ outer_path, inner_path, extra);
+ if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
+ return;
+
+ /*
+ * If the inner path is parameterized, it is parameterized by the topmost
+ * parent of the outer rel, not the outer rel itself. Fix that.
+ */
+ if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
+ {
+ inner_path = reparameterize_path_by_child(root, inner_path,
+ outer_path->parent);
+
+ /*
+ * If we could not translate the path, we can't create nest loop path.
+ */
+ if (!inner_path)
+ return;
+ }
+
+ /* Might be good enough to be worth trying, so let's try it. */
+ add_partial_path(joinrel, (Path *)
+ create_nestloop_path(root,
+ joinrel,
+ jointype,
+ &workspace,
+ extra,
+ outer_path,
+ inner_path,
+ extra->restrictlist,
+ pathkeys,
+ NULL));
+}
+
+/*
+ * try_mergejoin_path
+ * Consider a merge join path; if it appears useful, push it into
+ * the joinrel's pathlist via add_path().
+ */
+static void
+try_mergejoin_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ Path *outer_path,
+ Path *inner_path,
+ List *pathkeys,
+ List *mergeclauses,
+ List *outersortkeys,
+ List *innersortkeys,
+ JoinType jointype,
+ JoinPathExtraData *extra,
+ bool is_partial)
+{
+ Relids required_outer;
+ JoinCostWorkspace workspace;
+
+ if (is_partial)
+ {
+ try_partial_mergejoin_path(root,
+ joinrel,
+ outer_path,
+ inner_path,
+ pathkeys,
+ mergeclauses,
+ outersortkeys,
+ innersortkeys,
+ jointype,
+ extra);
+ return;
+ }
+
+ /*
+ * Check to see if proposed path is still parameterized, and reject if the
+ * parameterization wouldn't be sensible.
+ */
+ required_outer = calc_non_nestloop_required_outer(outer_path,
+ inner_path);
+ if (required_outer &&
+ !bms_overlap(required_outer, extra->param_source_rels))
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ return;
+ }
+
+ /*
+ * If the given paths are already well enough ordered, we can skip doing
+ * an explicit sort.
+ */
+ if (outersortkeys &&
+ pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
+ outersortkeys = NIL;
+ if (innersortkeys &&
+ pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
+ innersortkeys = NIL;
+
+ /*
+ * See comments in try_nestloop_path().
+ */
+ initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
+ outer_path, inner_path,
+ outersortkeys, innersortkeys,
+ extra);
+
+ if (add_path_precheck(joinrel,
+ workspace.startup_cost, workspace.total_cost,
+ pathkeys, required_outer))
+ {
+ add_path(joinrel, (Path *)
+ create_mergejoin_path(root,
+ joinrel,
+ jointype,
+ &workspace,
+ extra,
+ outer_path,
+ inner_path,
+ extra->restrictlist,
+ pathkeys,
+ required_outer,
+ mergeclauses,
+ outersortkeys,
+ innersortkeys));
+ }
+ else
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ }
+}
+
+/*
+ * try_partial_mergejoin_path
+ * Consider a partial merge join path; if it appears useful, push it into
+ * the joinrel's pathlist via add_partial_path().
+ */
+static void
+try_partial_mergejoin_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ Path *outer_path,
+ Path *inner_path,
+ List *pathkeys,
+ List *mergeclauses,
+ List *outersortkeys,
+ List *innersortkeys,
+ JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ JoinCostWorkspace workspace;
+
+ /*
+ * See comments in try_partial_hashjoin_path().
+ */
+ Assert(bms_is_empty(joinrel->lateral_relids));
+ if (inner_path->param_info != NULL)
+ {
+ Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
+
+ if (!bms_is_empty(inner_paramrels))
+ return;
+ }
+
+ /*
+ * If the given paths are already well enough ordered, we can skip doing
+ * an explicit sort.
+ */
+ if (outersortkeys &&
+ pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
+ outersortkeys = NIL;
+ if (innersortkeys &&
+ pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
+ innersortkeys = NIL;
+
+ /*
+ * See comments in try_partial_nestloop_path().
+ */
+ initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
+ outer_path, inner_path,
+ outersortkeys, innersortkeys,
+ extra);
+
+ if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
+ return;
+
+ /* Might be good enough to be worth trying, so let's try it. */
+ add_partial_path(joinrel, (Path *)
+ create_mergejoin_path(root,
+ joinrel,
+ jointype,
+ &workspace,
+ extra,
+ outer_path,
+ inner_path,
+ extra->restrictlist,
+ pathkeys,
+ NULL,
+ mergeclauses,
+ outersortkeys,
+ innersortkeys));
+}
+
+/*
+ * try_hashjoin_path
+ * Consider a hash join path; if it appears useful, push it into
+ * the joinrel's pathlist via add_path().
+ */
+static void
+try_hashjoin_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ Path *outer_path,
+ Path *inner_path,
+ List *hashclauses,
+ JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ Relids required_outer;
+ JoinCostWorkspace workspace;
+
+ /*
+ * Check to see if proposed path is still parameterized, and reject if the
+ * parameterization wouldn't be sensible.
+ */
+ required_outer = calc_non_nestloop_required_outer(outer_path,
+ inner_path);
+ if (required_outer &&
+ !bms_overlap(required_outer, extra->param_source_rels))
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ return;
+ }
+
+ /*
+ * See comments in try_nestloop_path(). Also note that hashjoin paths
+ * never have any output pathkeys, per comments in create_hashjoin_path.
+ */
+ initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
+ outer_path, inner_path, extra, false);
+
+ if (add_path_precheck(joinrel,
+ workspace.startup_cost, workspace.total_cost,
+ NIL, required_outer))
+ {
+ add_path(joinrel, (Path *)
+ create_hashjoin_path(root,
+ joinrel,
+ jointype,
+ &workspace,
+ extra,
+ outer_path,
+ inner_path,
+ false, /* parallel_hash */
+ extra->restrictlist,
+ required_outer,
+ hashclauses));
+ }
+ else
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ }
+}
+
+/*
+ * try_partial_hashjoin_path
+ * Consider a partial hashjoin join path; if it appears useful, push it into
+ * the joinrel's partial_pathlist via add_partial_path().
+ * The outer side is partial. If parallel_hash is true, then the inner path
+ * must be partial and will be run in parallel to create one or more shared
+ * hash tables; otherwise the inner path must be complete and a copy of it
+ * is run in every process to create separate identical private hash tables.
+ */
+static void
+try_partial_hashjoin_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ Path *outer_path,
+ Path *inner_path,
+ List *hashclauses,
+ JoinType jointype,
+ JoinPathExtraData *extra,
+ bool parallel_hash)
+{
+ JoinCostWorkspace workspace;
+
+ /*
+ * If the inner path is parameterized, the parameterization must be fully
+ * satisfied by the proposed outer path. Parameterized partial paths are
+ * not supported. The caller should already have verified that no lateral
+ * rels are required here.
+ */
+ Assert(bms_is_empty(joinrel->lateral_relids));
+ if (inner_path->param_info != NULL)
+ {
+ Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
+
+ if (!bms_is_empty(inner_paramrels))
+ return;
+ }
+
+ /*
+ * Before creating a path, get a quick lower bound on what it is likely to
+ * cost. Bail out right away if it looks terrible.
+ */
+ initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
+ outer_path, inner_path, extra, parallel_hash);
+ if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
+ return;
+
+ /* Might be good enough to be worth trying, so let's try it. */
+ add_partial_path(joinrel, (Path *)
+ create_hashjoin_path(root,
+ joinrel,
+ jointype,
+ &workspace,
+ extra,
+ outer_path,
+ inner_path,
+ parallel_hash,
+ extra->restrictlist,
+ NULL,
+ hashclauses));
+}
+
+/*
+ * clause_sides_match_join
+ * Determine whether a join clause is of the right form to use in this join.
+ *
+ * We already know that the clause is a binary opclause referencing only the
+ * rels in the current join. The point here is to check whether it has the
+ * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
+ * rather than mixing outer and inner vars on either side. If it matches,
+ * we set the transient flag outer_is_left to identify which side is which.
+ */
+static inline bool
+clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
+ RelOptInfo *innerrel)
+{
+ if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
+ bms_is_subset(rinfo->right_relids, innerrel->relids))
+ {
+ /* lefthand side is outer */
+ rinfo->outer_is_left = true;
+ return true;
+ }
+ else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
+ bms_is_subset(rinfo->right_relids, outerrel->relids))
+ {
+ /* righthand side is outer */
+ rinfo->outer_is_left = false;
+ return true;
+ }
+ return false; /* no good for these input relations */
+}
+
+/*
+ * sort_inner_and_outer
+ * Create mergejoin join paths by explicitly sorting both the outer and
+ * inner join relations on each available merge ordering.
+ *
+ * 'joinrel' is the join relation
+ * 'outerrel' is the outer join relation
+ * 'innerrel' is the inner join relation
+ * 'jointype' is the type of join to do
+ * 'extra' contains additional input values
+ */
+static void
+sort_inner_and_outer(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ JoinType save_jointype = jointype;
+ Path *outer_path;
+ Path *inner_path;
+ Path *cheapest_partial_outer = NULL;
+ Path *cheapest_safe_inner = NULL;
+ List *all_pathkeys;
+ ListCell *l;
+
+ /*
+ * We only consider the cheapest-total-cost input paths, since we are
+ * assuming here that a sort is required. We will consider
+ * cheapest-startup-cost input paths later, and only if they don't need a
+ * sort.
+ *
+ * This function intentionally does not consider parameterized input
+ * paths, except when the cheapest-total is parameterized. If we did so,
+ * we'd have a combinatorial explosion of mergejoin paths of dubious
+ * value. This interacts with decisions elsewhere that also discriminate
+ * against mergejoins with parameterized inputs; see comments in
+ * src/backend/optimizer/README.
+ */
+ outer_path = outerrel->cheapest_total_path;
+ inner_path = innerrel->cheapest_total_path;
+
+ /*
+ * If either cheapest-total path is parameterized by the other rel, we
+ * can't use a mergejoin. (There's no use looking for alternative input
+ * paths, since these should already be the least-parameterized available
+ * paths.)
+ */
+ if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
+ PATH_PARAM_BY_REL(inner_path, outerrel))
+ return;
+
+ /*
+ * If unique-ification is requested, do it and then handle as a plain
+ * inner join.
+ */
+ if (jointype == JOIN_UNIQUE_OUTER)
+ {
+ outer_path = (Path *) create_unique_path(root, outerrel,
+ outer_path, extra->sjinfo);
+ Assert(outer_path);
+ jointype = JOIN_INNER;
+ }
+ else if (jointype == JOIN_UNIQUE_INNER)
+ {
+ inner_path = (Path *) create_unique_path(root, innerrel,
+ inner_path, extra->sjinfo);
+ Assert(inner_path);
+ jointype = JOIN_INNER;
+ }
+
+ /*
+ * If the joinrel is parallel-safe, we may be able to consider a partial
+ * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
+ * outer path will be partial, and therefore we won't be able to properly
+ * guarantee uniqueness. Similarly, we can't handle JOIN_FULL and
+ * JOIN_RIGHT, because they can produce false null extended rows. Also,
+ * the resulting path must not be parameterized.
+ */
+ if (joinrel->consider_parallel &&
+ save_jointype != JOIN_UNIQUE_OUTER &&
+ save_jointype != JOIN_FULL &&
+ save_jointype != JOIN_RIGHT &&
+ outerrel->partial_pathlist != NIL &&
+ bms_is_empty(joinrel->lateral_relids))
+ {
+ cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
+
+ if (inner_path->parallel_safe)
+ cheapest_safe_inner = inner_path;
+ else if (save_jointype != JOIN_UNIQUE_INNER)
+ cheapest_safe_inner =
+ get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
+ }
+
+ /*
+ * Each possible ordering of the available mergejoin clauses will generate
+ * a differently-sorted result path at essentially the same cost. We have
+ * no basis for choosing one over another at this level of joining, but
+ * some sort orders may be more useful than others for higher-level
+ * mergejoins, so it's worth considering multiple orderings.
+ *
+ * Actually, it's not quite true that every mergeclause ordering will
+ * generate a different path order, because some of the clauses may be
+ * partially redundant (refer to the same EquivalenceClasses). Therefore,
+ * what we do is convert the mergeclause list to a list of canonical
+ * pathkeys, and then consider different orderings of the pathkeys.
+ *
+ * Generating a path for *every* permutation of the pathkeys doesn't seem
+ * like a winning strategy; the cost in planning time is too high. For
+ * now, we generate one path for each pathkey, listing that pathkey first
+ * and the rest in random order. This should allow at least a one-clause
+ * mergejoin without re-sorting against any other possible mergejoin
+ * partner path. But if we've not guessed the right ordering of secondary
+ * keys, we may end up evaluating clauses as qpquals when they could have
+ * been done as mergeclauses. (In practice, it's rare that there's more
+ * than two or three mergeclauses, so expending a huge amount of thought
+ * on that is probably not worth it.)
+ *
+ * The pathkey order returned by select_outer_pathkeys_for_merge() has
+ * some heuristics behind it (see that function), so be sure to try it
+ * exactly as-is as well as making variants.
+ */
+ all_pathkeys = select_outer_pathkeys_for_merge(root,
+ extra->mergeclause_list,
+ joinrel);
+
+ foreach(l, all_pathkeys)
+ {
+ PathKey *front_pathkey = (PathKey *) lfirst(l);
+ List *cur_mergeclauses;
+ List *outerkeys;
+ List *innerkeys;
+ List *merge_pathkeys;
+
+ /* Make a pathkey list with this guy first */
+ if (l != list_head(all_pathkeys))
+ outerkeys = lcons(front_pathkey,
+ list_delete_nth_cell(list_copy(all_pathkeys),
+ foreach_current_index(l)));
+ else
+ outerkeys = all_pathkeys; /* no work at first one... */
+
+ /* Sort the mergeclauses into the corresponding ordering */
+ cur_mergeclauses =
+ find_mergeclauses_for_outer_pathkeys(root,
+ outerkeys,
+ extra->mergeclause_list);
+
+ /* Should have used them all... */
+ Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
+
+ /* Build sort pathkeys for the inner side */
+ innerkeys = make_inner_pathkeys_for_merge(root,
+ cur_mergeclauses,
+ outerkeys);
+
+ /* Build pathkeys representing output sort order */
+ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
+ outerkeys);
+
+ /*
+ * And now we can make the path.
+ *
+ * Note: it's possible that the cheapest paths will already be sorted
+ * properly. try_mergejoin_path will detect that case and suppress an
+ * explicit sort step, so we needn't do so here.
+ */
+ try_mergejoin_path(root,
+ joinrel,
+ outer_path,
+ inner_path,
+ merge_pathkeys,
+ cur_mergeclauses,
+ outerkeys,
+ innerkeys,
+ jointype,
+ extra,
+ false);
+
+ /*
+ * If we have partial outer and parallel safe inner path then try
+ * partial mergejoin path.
+ */
+ if (cheapest_partial_outer && cheapest_safe_inner)
+ try_partial_mergejoin_path(root,
+ joinrel,
+ cheapest_partial_outer,
+ cheapest_safe_inner,
+ merge_pathkeys,
+ cur_mergeclauses,
+ outerkeys,
+ innerkeys,
+ jointype,
+ extra);
+ }
+}
+
+/*
+ * generate_mergejoin_paths
+ * Creates possible mergejoin paths for input outerpath.
+ *
+ * We generate mergejoins if mergejoin clauses are available. We have
+ * two ways to generate the inner path for a mergejoin: sort the cheapest
+ * inner path, or use an inner path that is already suitably ordered for the
+ * merge. If we have several mergeclauses, it could be that there is no inner
+ * path (or only a very expensive one) for the full list of mergeclauses, but
+ * better paths exist if we truncate the mergeclause list (thereby discarding
+ * some sort key requirements). So, we consider truncations of the
+ * mergeclause list as well as the full list. (Ideally we'd consider all
+ * subsets of the mergeclause list, but that seems way too expensive.)
+ */
+static void
+generate_mergejoin_paths(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *innerrel,
+ Path *outerpath,
+ JoinType jointype,
+ JoinPathExtraData *extra,
+ bool useallclauses,
+ Path *inner_cheapest_total,
+ List *merge_pathkeys,
+ bool is_partial)
+{
+ List *mergeclauses;
+ List *innersortkeys;
+ List *trialsortkeys;
+ Path *cheapest_startup_inner;
+ Path *cheapest_total_inner;
+ JoinType save_jointype = jointype;
+ int num_sortkeys;
+ int sortkeycnt;
+
+ if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
+ jointype = JOIN_INNER;
+
+ /* Look for useful mergeclauses (if any) */
+ mergeclauses =
+ find_mergeclauses_for_outer_pathkeys(root,
+ outerpath->pathkeys,
+ extra->mergeclause_list);
+
+ /*
+ * Done with this outer path if no chance for a mergejoin.
+ *
+ * Special corner case: for "x FULL JOIN y ON true", there will be no join
+ * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
+ * but since mergejoin is our only join type that supports FULL JOIN
+ * without any join clauses, it's necessary to generate a clauseless
+ * mergejoin path instead.
+ */
+ if (mergeclauses == NIL)
+ {
+ if (jointype == JOIN_FULL)
+ /* okay to try for mergejoin */ ;
+ else
+ return;
+ }
+ if (useallclauses &&
+ list_length(mergeclauses) != list_length(extra->mergeclause_list))
+ return;
+
+ /* Compute the required ordering of the inner path */
+ innersortkeys = make_inner_pathkeys_for_merge(root,
+ mergeclauses,
+ outerpath->pathkeys);
+
+ /*
+ * Generate a mergejoin on the basis of sorting the cheapest inner. Since
+ * a sort will be needed, only cheapest total cost matters. (But
+ * try_mergejoin_path will do the right thing if inner_cheapest_total is
+ * already correctly sorted.)
+ */
+ try_mergejoin_path(root,
+ joinrel,
+ outerpath,
+ inner_cheapest_total,
+ merge_pathkeys,
+ mergeclauses,
+ NIL,
+ innersortkeys,
+ jointype,
+ extra,
+ is_partial);
+
+ /* Can't do anything else if inner path needs to be unique'd */
+ if (save_jointype == JOIN_UNIQUE_INNER)
+ return;
+
+ /*
+ * Look for presorted inner paths that satisfy the innersortkey list ---
+ * or any truncation thereof, if we are allowed to build a mergejoin using
+ * a subset of the merge clauses. Here, we consider both cheap startup
+ * cost and cheap total cost.
+ *
+ * Currently we do not consider parameterized inner paths here. This
+ * interacts with decisions elsewhere that also discriminate against
+ * mergejoins with parameterized inputs; see comments in
+ * src/backend/optimizer/README.
+ *
+ * As we shorten the sortkey list, we should consider only paths that are
+ * strictly cheaper than (in particular, not the same as) any path found
+ * in an earlier iteration. Otherwise we'd be intentionally using fewer
+ * merge keys than a given path allows (treating the rest as plain
+ * joinquals), which is unlikely to be a good idea. Also, eliminating
+ * paths here on the basis of compare_path_costs is a lot cheaper than
+ * building the mergejoin path only to throw it away.
+ *
+ * If inner_cheapest_total is well enough sorted to have not required a
+ * sort in the path made above, we shouldn't make a duplicate path with
+ * it, either. We handle that case with the same logic that handles the
+ * previous consideration, by initializing the variables that track
+ * cheapest-so-far properly. Note that we do NOT reject
+ * inner_cheapest_total if we find it matches some shorter set of
+ * pathkeys. That case corresponds to using fewer mergekeys to avoid
+ * sorting inner_cheapest_total, whereas we did sort it above, so the
+ * plans being considered are different.
+ */
+ if (pathkeys_contained_in(innersortkeys,
+ inner_cheapest_total->pathkeys))
+ {
+ /* inner_cheapest_total didn't require a sort */
+ cheapest_startup_inner = inner_cheapest_total;
+ cheapest_total_inner = inner_cheapest_total;
+ }
+ else
+ {
+ /* it did require a sort, at least for the full set of keys */
+ cheapest_startup_inner = NULL;
+ cheapest_total_inner = NULL;
+ }
+ num_sortkeys = list_length(innersortkeys);
+ if (num_sortkeys > 1 && !useallclauses)
+ trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
+ else
+ trialsortkeys = innersortkeys; /* won't really truncate */
+
+ for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
+ {
+ Path *innerpath;
+ List *newclauses = NIL;
+
+ /*
+ * Look for an inner path ordered well enough for the first
+ * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
+ * destructively, which is why we made a copy...
+ */
+ trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
+ innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+ trialsortkeys,
+ NULL,
+ TOTAL_COST,
+ is_partial);
+ if (innerpath != NULL &&
+ (cheapest_total_inner == NULL ||
+ compare_path_costs(innerpath, cheapest_total_inner,
+ TOTAL_COST) < 0))
+ {
+ /* Found a cheap (or even-cheaper) sorted path */
+ /* Select the right mergeclauses, if we didn't already */
+ if (sortkeycnt < num_sortkeys)
+ {
+ newclauses =
+ trim_mergeclauses_for_inner_pathkeys(root,
+ mergeclauses,
+ trialsortkeys);
+ Assert(newclauses != NIL);
+ }
+ else
+ newclauses = mergeclauses;
+ try_mergejoin_path(root,
+ joinrel,
+ outerpath,
+ innerpath,
+ merge_pathkeys,
+ newclauses,
+ NIL,
+ NIL,
+ jointype,
+ extra,
+ is_partial);
+ cheapest_total_inner = innerpath;
+ }
+ /* Same on the basis of cheapest startup cost ... */
+ innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+ trialsortkeys,
+ NULL,
+ STARTUP_COST,
+ is_partial);
+ if (innerpath != NULL &&
+ (cheapest_startup_inner == NULL ||
+ compare_path_costs(innerpath, cheapest_startup_inner,
+ STARTUP_COST) < 0))
+ {
+ /* Found a cheap (or even-cheaper) sorted path */
+ if (innerpath != cheapest_total_inner)
+ {
+ /*
+ * Avoid rebuilding clause list if we already made one; saves
+ * memory in big join trees...
+ */
+ if (newclauses == NIL)
+ {
+ if (sortkeycnt < num_sortkeys)
+ {
+ newclauses =
+ trim_mergeclauses_for_inner_pathkeys(root,
+ mergeclauses,
+ trialsortkeys);
+ Assert(newclauses != NIL);
+ }
+ else
+ newclauses = mergeclauses;
+ }
+ try_mergejoin_path(root,
+ joinrel,
+ outerpath,
+ innerpath,
+ merge_pathkeys,
+ newclauses,
+ NIL,
+ NIL,
+ jointype,
+ extra,
+ is_partial);
+ }
+ cheapest_startup_inner = innerpath;
+ }
+
+ /*
+ * Don't consider truncated sortkeys if we need all clauses.
+ */
+ if (useallclauses)
+ break;
+ }
+}
+
+/*
+ * match_unsorted_outer
+ * Creates possible join paths for processing a single join relation
+ * 'joinrel' by employing either iterative substitution or
+ * mergejoining on each of its possible outer paths (considering
+ * only outer paths that are already ordered well enough for merging).
+ *
+ * We always generate a nestloop path for each available outer path.
+ * In fact we may generate as many as five: one on the cheapest-total-cost
+ * inner path, one on the same with materialization, one on the
+ * cheapest-startup-cost inner path (if different), one on the
+ * cheapest-total inner-indexscan path (if any), and one on the
+ * cheapest-startup inner-indexscan path (if different).
+ *
+ * We also consider mergejoins if mergejoin clauses are available. See
+ * detailed comments in generate_mergejoin_paths.
+ *
+ * 'joinrel' is the join relation
+ * 'outerrel' is the outer join relation
+ * 'innerrel' is the inner join relation
+ * 'jointype' is the type of join to do
+ * 'extra' contains additional input values
+ */
+static void
+match_unsorted_outer(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ JoinType save_jointype = jointype;
+ bool nestjoinOK;
+ bool useallclauses;
+ Path *inner_cheapest_total = innerrel->cheapest_total_path;
+ Path *matpath = NULL;
+ ListCell *lc1;
+
+ /*
+ * Nestloop only supports inner, left, semi, and anti joins. Also, if we
+ * are doing a right or full mergejoin, we must use *all* the mergeclauses
+ * as join clauses, else we will not have a valid plan. (Although these
+ * two flags are currently inverses, keep them separate for clarity and
+ * possible future changes.)
+ */
+ switch (jointype)
+ {
+ case JOIN_INNER:
+ case JOIN_LEFT:
+ case JOIN_SEMI:
+ case JOIN_ANTI:
+ nestjoinOK = true;
+ useallclauses = false;
+ break;
+ case JOIN_RIGHT:
+ case JOIN_FULL:
+ nestjoinOK = false;
+ useallclauses = true;
+ break;
+ case JOIN_UNIQUE_OUTER:
+ case JOIN_UNIQUE_INNER:
+ jointype = JOIN_INNER;
+ nestjoinOK = true;
+ useallclauses = false;
+ break;
+ default:
+ elog(ERROR, "unrecognized join type: %d",
+ (int) jointype);
+ nestjoinOK = false; /* keep compiler quiet */
+ useallclauses = false;
+ break;
+ }
+
+ /*
+ * If inner_cheapest_total is parameterized by the outer rel, ignore it;
+ * we will consider it below as a member of cheapest_parameterized_paths,
+ * but the other possibilities considered in this routine aren't usable.
+ */
+ if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
+ inner_cheapest_total = NULL;
+
+ /*
+ * If we need to unique-ify the inner path, we will consider only the
+ * cheapest-total inner.
+ */
+ if (save_jointype == JOIN_UNIQUE_INNER)
+ {
+ /* No way to do this with an inner path parameterized by outer rel */
+ if (inner_cheapest_total == NULL)
+ return;
+ inner_cheapest_total = (Path *)
+ create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
+ Assert(inner_cheapest_total);
+ }
+ else if (nestjoinOK)
+ {
+ /*
+ * Consider materializing the cheapest inner path, unless
+ * enable_material is off or the path in question materializes its
+ * output anyway.
+ */
+ if (enable_material && inner_cheapest_total != NULL &&
+ !ExecMaterializesOutput(inner_cheapest_total->pathtype))
+ matpath = (Path *)
+ create_material_path(innerrel, inner_cheapest_total);
+ }
+
+ foreach(lc1, outerrel->pathlist)
+ {
+ Path *outerpath = (Path *) lfirst(lc1);
+ List *merge_pathkeys;
+
+ /*
+ * We cannot use an outer path that is parameterized by the inner rel.
+ */
+ if (PATH_PARAM_BY_REL(outerpath, innerrel))
+ continue;
+
+ /*
+ * If we need to unique-ify the outer path, it's pointless to consider
+ * any but the cheapest outer. (XXX we don't consider parameterized
+ * outers, nor inners, for unique-ified cases. Should we?)
+ */
+ if (save_jointype == JOIN_UNIQUE_OUTER)
+ {
+ if (outerpath != outerrel->cheapest_total_path)
+ continue;
+ outerpath = (Path *) create_unique_path(root, outerrel,
+ outerpath, extra->sjinfo);
+ Assert(outerpath);
+ }
+
+ /*
+ * The result will have this sort order (even if it is implemented as
+ * a nestloop, and even if some of the mergeclauses are implemented by
+ * qpquals rather than as true mergeclauses):
+ */
+ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
+ outerpath->pathkeys);
+
+ if (save_jointype == JOIN_UNIQUE_INNER)
+ {
+ /*
+ * Consider nestloop join, but only with the unique-ified cheapest
+ * inner path
+ */
+ try_nestloop_path(root,
+ joinrel,
+ outerpath,
+ inner_cheapest_total,
+ merge_pathkeys,
+ jointype,
+ extra);
+ }
+ else if (nestjoinOK)
+ {
+ /*
+ * Consider nestloop joins using this outer path and various
+ * available paths for the inner relation. We consider the
+ * cheapest-total paths for each available parameterization of the
+ * inner relation, including the unparameterized case.
+ */
+ ListCell *lc2;
+
+ foreach(lc2, innerrel->cheapest_parameterized_paths)
+ {
+ Path *innerpath = (Path *) lfirst(lc2);
+ Path *mpath;
+
+ try_nestloop_path(root,
+ joinrel,
+ outerpath,
+ innerpath,
+ merge_pathkeys,
+ jointype,
+ extra);
+
+ /*
+ * Try generating a memoize path and see if that makes the
+ * nested loop any cheaper.
+ */
+ mpath = get_memoize_path(root, innerrel, outerrel,
+ innerpath, outerpath, jointype,
+ extra);
+ if (mpath != NULL)
+ try_nestloop_path(root,
+ joinrel,
+ outerpath,
+ mpath,
+ merge_pathkeys,
+ jointype,
+ extra);
+ }
+
+ /* Also consider materialized form of the cheapest inner path */
+ if (matpath != NULL)
+ try_nestloop_path(root,
+ joinrel,
+ outerpath,
+ matpath,
+ merge_pathkeys,
+ jointype,
+ extra);
+ }
+
+ /* Can't do anything else if outer path needs to be unique'd */
+ if (save_jointype == JOIN_UNIQUE_OUTER)
+ continue;
+
+ /* Can't do anything else if inner rel is parameterized by outer */
+ if (inner_cheapest_total == NULL)
+ continue;
+
+ /* Generate merge join paths */
+ generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
+ save_jointype, extra, useallclauses,
+ inner_cheapest_total, merge_pathkeys,
+ false);
+ }
+
+ /*
+ * Consider partial nestloop and mergejoin plan if outerrel has any
+ * partial path and the joinrel is parallel-safe. However, we can't
+ * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
+ * therefore we won't be able to properly guarantee uniqueness. Nor can
+ * we handle joins needing lateral rels, since partial paths must not be
+ * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
+ * because they can produce false null extended rows.
+ */
+ if (joinrel->consider_parallel &&
+ save_jointype != JOIN_UNIQUE_OUTER &&
+ save_jointype != JOIN_FULL &&
+ save_jointype != JOIN_RIGHT &&
+ outerrel->partial_pathlist != NIL &&
+ bms_is_empty(joinrel->lateral_relids))
+ {
+ if (nestjoinOK)
+ consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
+ save_jointype, extra);
+
+ /*
+ * If inner_cheapest_total is NULL or non parallel-safe then find the
+ * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
+ * can't use any alternative inner path.
+ */
+ if (inner_cheapest_total == NULL ||
+ !inner_cheapest_total->parallel_safe)
+ {
+ if (save_jointype == JOIN_UNIQUE_INNER)
+ return;
+
+ inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
+ }
+
+ if (inner_cheapest_total)
+ consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
+ save_jointype, extra,
+ inner_cheapest_total);
+ }
+}
+
+/*
+ * consider_parallel_mergejoin
+ * Try to build partial paths for a joinrel by joining a partial path
+ * for the outer relation to a complete path for the inner relation.
+ *
+ * 'joinrel' is the join relation
+ * 'outerrel' is the outer join relation
+ * 'innerrel' is the inner join relation
+ * 'jointype' is the type of join to do
+ * 'extra' contains additional input values
+ * 'inner_cheapest_total' cheapest total path for innerrel
+ */
+static void
+consider_parallel_mergejoin(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ JoinPathExtraData *extra,
+ Path *inner_cheapest_total)
+{
+ ListCell *lc1;
+
+ /* generate merge join path for each partial outer path */
+ foreach(lc1, outerrel->partial_pathlist)
+ {
+ Path *outerpath = (Path *) lfirst(lc1);
+ List *merge_pathkeys;
+
+ /*
+ * Figure out what useful ordering any paths we create will have.
+ */
+ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
+ outerpath->pathkeys);
+
+ generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
+ extra, false, inner_cheapest_total,
+ merge_pathkeys, true);
+ }
+}
+
+/*
+ * consider_parallel_nestloop
+ * Try to build partial paths for a joinrel by joining a partial path for the
+ * outer relation to a complete path for the inner relation.
+ *
+ * 'joinrel' is the join relation
+ * 'outerrel' is the outer join relation
+ * 'innerrel' is the inner join relation
+ * 'jointype' is the type of join to do
+ * 'extra' contains additional input values
+ */
+static void
+consider_parallel_nestloop(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ JoinType save_jointype = jointype;
+ ListCell *lc1;
+
+ if (jointype == JOIN_UNIQUE_INNER)
+ jointype = JOIN_INNER;
+
+ foreach(lc1, outerrel->partial_pathlist)
+ {
+ Path *outerpath = (Path *) lfirst(lc1);
+ List *pathkeys;
+ ListCell *lc2;
+
+ /* Figure out what useful ordering any paths we create will have. */
+ pathkeys = build_join_pathkeys(root, joinrel, jointype,
+ outerpath->pathkeys);
+
+ /*
+ * Try the cheapest parameterized paths; only those which will produce
+ * an unparameterized path when joined to this outerrel will survive
+ * try_partial_nestloop_path. The cheapest unparameterized path is
+ * also in this list.
+ */
+ foreach(lc2, innerrel->cheapest_parameterized_paths)
+ {
+ Path *innerpath = (Path *) lfirst(lc2);
+ Path *mpath;
+
+ /* Can't join to an inner path that is not parallel-safe */
+ if (!innerpath->parallel_safe)
+ continue;
+
+ /*
+ * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
+ * cheapest_total_path, and we have to unique-ify it. (We might
+ * be able to relax this to allow other safe, unparameterized
+ * inner paths, but right now create_unique_path is not on board
+ * with that.)
+ */
+ if (save_jointype == JOIN_UNIQUE_INNER)
+ {
+ if (innerpath != innerrel->cheapest_total_path)
+ continue;
+ innerpath = (Path *) create_unique_path(root, innerrel,
+ innerpath,
+ extra->sjinfo);
+ Assert(innerpath);
+ }
+
+ try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
+ pathkeys, jointype, extra);
+
+ /*
+ * Try generating a memoize path and see if that makes the nested
+ * loop any cheaper.
+ */
+ mpath = get_memoize_path(root, innerrel, outerrel,
+ innerpath, outerpath, jointype,
+ extra);
+ if (mpath != NULL)
+ try_partial_nestloop_path(root, joinrel, outerpath, mpath,
+ pathkeys, jointype, extra);
+ }
+ }
+}
+
+/*
+ * hash_inner_and_outer
+ * Create hashjoin join paths by explicitly hashing both the outer and
+ * inner keys of each available hash clause.
+ *
+ * 'joinrel' is the join relation
+ * 'outerrel' is the outer join relation
+ * 'innerrel' is the inner join relation
+ * 'jointype' is the type of join to do
+ * 'extra' contains additional input values
+ */
+static void
+hash_inner_and_outer(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ JoinType save_jointype = jointype;
+ bool isouterjoin = IS_OUTER_JOIN(jointype);
+ List *hashclauses;
+ ListCell *l;
+
+ /*
+ * We need to build only one hashclauses list for any given pair of outer
+ * and inner relations; all of the hashable clauses will be used as keys.
+ *
+ * Scan the join's restrictinfo list to find hashjoinable clauses that are
+ * usable with this pair of sub-relations.
+ */
+ hashclauses = NIL;
+ foreach(l, extra->restrictlist)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+
+ /*
+ * If processing an outer join, only use its own join clauses for
+ * hashing. For inner joins we need not be so picky.
+ */
+ if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
+ continue;
+
+ if (!restrictinfo->can_join ||
+ restrictinfo->hashjoinoperator == InvalidOid)
+ continue; /* not hashjoinable */
+
+ /*
+ * Check if clause has the form "outer op inner" or "inner op outer".
+ */
+ if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
+ continue; /* no good for these input relations */
+
+ hashclauses = lappend(hashclauses, restrictinfo);
+ }
+
+ /* If we found any usable hashclauses, make paths */
+ if (hashclauses)
+ {
+ /*
+ * We consider both the cheapest-total-cost and cheapest-startup-cost
+ * outer paths. There's no need to consider any but the
+ * cheapest-total-cost inner path, however.
+ */
+ Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
+ Path *cheapest_total_outer = outerrel->cheapest_total_path;
+ Path *cheapest_total_inner = innerrel->cheapest_total_path;
+
+ /*
+ * If either cheapest-total path is parameterized by the other rel, we
+ * can't use a hashjoin. (There's no use looking for alternative
+ * input paths, since these should already be the least-parameterized
+ * available paths.)
+ */
+ if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
+ PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
+ return;
+
+ /* Unique-ify if need be; we ignore parameterized possibilities */
+ if (jointype == JOIN_UNIQUE_OUTER)
+ {
+ cheapest_total_outer = (Path *)
+ create_unique_path(root, outerrel,
+ cheapest_total_outer, extra->sjinfo);
+ Assert(cheapest_total_outer);
+ jointype = JOIN_INNER;
+ try_hashjoin_path(root,
+ joinrel,
+ cheapest_total_outer,
+ cheapest_total_inner,
+ hashclauses,
+ jointype,
+ extra);
+ /* no possibility of cheap startup here */
+ }
+ else if (jointype == JOIN_UNIQUE_INNER)
+ {
+ cheapest_total_inner = (Path *)
+ create_unique_path(root, innerrel,
+ cheapest_total_inner, extra->sjinfo);
+ Assert(cheapest_total_inner);
+ jointype = JOIN_INNER;
+ try_hashjoin_path(root,
+ joinrel,
+ cheapest_total_outer,
+ cheapest_total_inner,
+ hashclauses,
+ jointype,
+ extra);
+ if (cheapest_startup_outer != NULL &&
+ cheapest_startup_outer != cheapest_total_outer)
+ try_hashjoin_path(root,
+ joinrel,
+ cheapest_startup_outer,
+ cheapest_total_inner,
+ hashclauses,
+ jointype,
+ extra);
+ }
+ else
+ {
+ /*
+ * For other jointypes, we consider the cheapest startup outer
+ * together with the cheapest total inner, and then consider
+ * pairings of cheapest-total paths including parameterized ones.
+ * There is no use in generating parameterized paths on the basis
+ * of possibly cheap startup cost, so this is sufficient.
+ */
+ ListCell *lc1;
+ ListCell *lc2;
+
+ if (cheapest_startup_outer != NULL)
+ try_hashjoin_path(root,
+ joinrel,
+ cheapest_startup_outer,
+ cheapest_total_inner,
+ hashclauses,
+ jointype,
+ extra);
+
+ foreach(lc1, outerrel->cheapest_parameterized_paths)
+ {
+ Path *outerpath = (Path *) lfirst(lc1);
+
+ /*
+ * We cannot use an outer path that is parameterized by the
+ * inner rel.
+ */
+ if (PATH_PARAM_BY_REL(outerpath, innerrel))
+ continue;
+
+ foreach(lc2, innerrel->cheapest_parameterized_paths)
+ {
+ Path *innerpath = (Path *) lfirst(lc2);
+
+ /*
+ * We cannot use an inner path that is parameterized by
+ * the outer rel, either.
+ */
+ if (PATH_PARAM_BY_REL(innerpath, outerrel))
+ continue;
+
+ if (outerpath == cheapest_startup_outer &&
+ innerpath == cheapest_total_inner)
+ continue; /* already tried it */
+
+ try_hashjoin_path(root,
+ joinrel,
+ outerpath,
+ innerpath,
+ hashclauses,
+ jointype,
+ extra);
+ }
+ }
+ }
+
+ /*
+ * If the joinrel is parallel-safe, we may be able to consider a
+ * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
+ * because the outer path will be partial, and therefore we won't be
+ * able to properly guarantee uniqueness. Similarly, we can't handle
+ * JOIN_FULL and JOIN_RIGHT, because they can produce false null
+ * extended rows. Also, the resulting path must not be parameterized.
+ * We would be able to support JOIN_FULL and JOIN_RIGHT for Parallel
+ * Hash, since in that case we're back to a single hash table with a
+ * single set of match bits for each batch, but that will require
+ * figuring out a deadlock-free way to wait for the probe to finish.
+ */
+ if (joinrel->consider_parallel &&
+ save_jointype != JOIN_UNIQUE_OUTER &&
+ save_jointype != JOIN_FULL &&
+ save_jointype != JOIN_RIGHT &&
+ outerrel->partial_pathlist != NIL &&
+ bms_is_empty(joinrel->lateral_relids))
+ {
+ Path *cheapest_partial_outer;
+ Path *cheapest_partial_inner = NULL;
+ Path *cheapest_safe_inner = NULL;
+
+ cheapest_partial_outer =
+ (Path *) linitial(outerrel->partial_pathlist);
+
+ /*
+ * Can we use a partial inner plan too, so that we can build a
+ * shared hash table in parallel? We can't handle
+ * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
+ */
+ if (innerrel->partial_pathlist != NIL &&
+ save_jointype != JOIN_UNIQUE_INNER &&
+ enable_parallel_hash)
+ {
+ cheapest_partial_inner =
+ (Path *) linitial(innerrel->partial_pathlist);
+ try_partial_hashjoin_path(root, joinrel,
+ cheapest_partial_outer,
+ cheapest_partial_inner,
+ hashclauses, jointype, extra,
+ true /* parallel_hash */ );
+ }
+
+ /*
+ * Normally, given that the joinrel is parallel-safe, the cheapest
+ * total inner path will also be parallel-safe, but if not, we'll
+ * have to search for the cheapest safe, unparameterized inner
+ * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
+ * inner path.
+ */
+ if (cheapest_total_inner->parallel_safe)
+ cheapest_safe_inner = cheapest_total_inner;
+ else if (save_jointype != JOIN_UNIQUE_INNER)
+ cheapest_safe_inner =
+ get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
+
+ if (cheapest_safe_inner != NULL)
+ try_partial_hashjoin_path(root, joinrel,
+ cheapest_partial_outer,
+ cheapest_safe_inner,
+ hashclauses, jointype, extra,
+ false /* parallel_hash */ );
+ }
+ }
+}
+
+/*
+ * select_mergejoin_clauses
+ * Select mergejoin clauses that are usable for a particular join.
+ * Returns a list of RestrictInfo nodes for those clauses.
+ *
+ * *mergejoin_allowed is normally set to true, but it is set to false if
+ * this is a right/full join and there are nonmergejoinable join clauses.
+ * The executor's mergejoin machinery cannot handle such cases, so we have
+ * to avoid generating a mergejoin plan. (Note that this flag does NOT
+ * consider whether there are actually any mergejoinable clauses. This is
+ * correct because in some cases we need to build a clauseless mergejoin.
+ * Simply returning NIL is therefore not enough to distinguish safe from
+ * unsafe cases.)
+ *
+ * We also mark each selected RestrictInfo to show which side is currently
+ * being considered as outer. These are transient markings that are only
+ * good for the duration of the current add_paths_to_joinrel() call!
+ *
+ * We examine each restrictinfo clause known for the join to see
+ * if it is mergejoinable and involves vars from the two sub-relations
+ * currently of interest.
+ */
+static List *
+select_mergejoin_clauses(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ List *restrictlist,
+ JoinType jointype,
+ bool *mergejoin_allowed)
+{
+ List *result_list = NIL;
+ bool isouterjoin = IS_OUTER_JOIN(jointype);
+ bool have_nonmergeable_joinclause = false;
+ ListCell *l;
+
+ foreach(l, restrictlist)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+
+ /*
+ * If processing an outer join, only use its own join clauses in the
+ * merge. For inner joins we can use pushed-down clauses too. (Note:
+ * we don't set have_nonmergeable_joinclause here because pushed-down
+ * clauses will become otherquals not joinquals.)
+ */
+ if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
+ continue;
+
+ /* Check that clause is a mergeable operator clause */
+ if (!restrictinfo->can_join ||
+ restrictinfo->mergeopfamilies == NIL)
+ {
+ /*
+ * The executor can handle extra joinquals that are constants, but
+ * not anything else, when doing right/full merge join. (The
+ * reason to support constants is so we can do FULL JOIN ON
+ * FALSE.)
+ */
+ if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
+ have_nonmergeable_joinclause = true;
+ continue; /* not mergejoinable */
+ }
+
+ /*
+ * Check if clause has the form "outer op inner" or "inner op outer".
+ */
+ if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
+ {
+ have_nonmergeable_joinclause = true;
+ continue; /* no good for these input relations */
+ }
+
+ /*
+ * Insist that each side have a non-redundant eclass. This
+ * restriction is needed because various bits of the planner expect
+ * that each clause in a merge be associable with some pathkey in a
+ * canonical pathkey list, but redundant eclasses can't appear in
+ * canonical sort orderings. (XXX it might be worth relaxing this,
+ * but not enough time to address it for 8.3.)
+ *
+ * Note: it would be bad if this condition failed for an otherwise
+ * mergejoinable FULL JOIN clause, since that would result in
+ * undesirable planner failure. I believe that is not possible
+ * however; a variable involved in a full join could only appear in
+ * below_outer_join eclasses, which aren't considered redundant.
+ *
+ * This case *can* happen for left/right join clauses: the outer-side
+ * variable could be equated to a constant. Because we will propagate
+ * that constant across the join clause, the loss of ability to do a
+ * mergejoin is not really all that big a deal, and so it's not clear
+ * that improving this is important.
+ */
+ update_mergeclause_eclasses(root, restrictinfo);
+
+ if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
+ EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
+ {
+ have_nonmergeable_joinclause = true;
+ continue; /* can't handle redundant eclasses */
+ }
+
+ result_list = lappend(result_list, restrictinfo);
+ }
+
+ /*
+ * Report whether mergejoin is allowed (see comment at top of function).
+ */
+ switch (jointype)
+ {
+ case JOIN_RIGHT:
+ case JOIN_FULL:
+ *mergejoin_allowed = !have_nonmergeable_joinclause;
+ break;
+ default:
+ *mergejoin_allowed = true;
+ break;
+ }
+
+ return result_list;
+}
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
new file mode 100644
index 0000000..9da3ff2
--- /dev/null
+++ b/src/backend/optimizer/path/joinrels.c
@@ -0,0 +1,1783 @@
+/*-------------------------------------------------------------------------
+ *
+ * joinrels.c
+ * Routines to determine which relations should be joined
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/path/joinrels.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "optimizer/appendinfo.h"
+#include "optimizer/joininfo.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "partitioning/partbounds.h"
+#include "utils/memutils.h"
+
+
+static void make_rels_by_clause_joins(PlannerInfo *root,
+ RelOptInfo *old_rel,
+ List *other_rels_list,
+ ListCell *other_rels);
+static void make_rels_by_clauseless_joins(PlannerInfo *root,
+ RelOptInfo *old_rel,
+ List *other_rels);
+static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
+static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel);
+static bool restriction_is_constant_false(List *restrictlist,
+ RelOptInfo *joinrel,
+ bool only_pushed_down);
+static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
+ RelOptInfo *rel2, RelOptInfo *joinrel,
+ SpecialJoinInfo *sjinfo, List *restrictlist);
+static void try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1,
+ RelOptInfo *rel2, RelOptInfo *joinrel,
+ SpecialJoinInfo *parent_sjinfo,
+ List *parent_restrictlist);
+static SpecialJoinInfo *build_child_join_sjinfo(PlannerInfo *root,
+ SpecialJoinInfo *parent_sjinfo,
+ Relids left_relids, Relids right_relids);
+static void compute_partition_bounds(PlannerInfo *root, RelOptInfo *rel1,
+ RelOptInfo *rel2, RelOptInfo *joinrel,
+ SpecialJoinInfo *parent_sjinfo,
+ List **parts1, List **parts2);
+static void get_matching_part_pairs(PlannerInfo *root, RelOptInfo *joinrel,
+ RelOptInfo *rel1, RelOptInfo *rel2,
+ List **parts1, List **parts2);
+
+
+/*
+ * join_search_one_level
+ * Consider ways to produce join relations containing exactly 'level'
+ * jointree items. (This is one step of the dynamic-programming method
+ * embodied in standard_join_search.) Join rel nodes for each feasible
+ * combination of lower-level rels are created and returned in a list.
+ * Implementation paths are created for each such joinrel, too.
+ *
+ * level: level of rels we want to make this time
+ * root->join_rel_level[j], 1 <= j < level, is a list of rels containing j items
+ *
+ * The result is returned in root->join_rel_level[level].
+ */
+void
+join_search_one_level(PlannerInfo *root, int level)
+{
+ List **joinrels = root->join_rel_level;
+ ListCell *r;
+ int k;
+
+ Assert(joinrels[level] == NIL);
+
+ /* Set join_cur_level so that new joinrels are added to proper list */
+ root->join_cur_level = level;
+
+ /*
+ * First, consider left-sided and right-sided plans, in which rels of
+ * exactly level-1 member relations are joined against initial relations.
+ * We prefer to join using join clauses, but if we find a rel of level-1
+ * members that has no join clauses, we will generate Cartesian-product
+ * joins against all initial rels not already contained in it.
+ */
+ foreach(r, joinrels[level - 1])
+ {
+ RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
+
+ if (old_rel->joininfo != NIL || old_rel->has_eclass_joins ||
+ has_join_restriction(root, old_rel))
+ {
+ /*
+ * There are join clauses or join order restrictions relevant to
+ * this rel, so consider joins between this rel and (only) those
+ * initial rels it is linked to by a clause or restriction.
+ *
+ * At level 2 this condition is symmetric, so there is no need to
+ * look at initial rels before this one in the list; we already
+ * considered such joins when we were at the earlier rel. (The
+ * mirror-image joins are handled automatically by make_join_rel.)
+ * In later passes (level > 2), we join rels of the previous level
+ * to each initial rel they don't already include but have a join
+ * clause or restriction with.
+ */
+ List *other_rels_list;
+ ListCell *other_rels;
+
+ if (level == 2) /* consider remaining initial rels */
+ {
+ other_rels_list = joinrels[level - 1];
+ other_rels = lnext(other_rels_list, r);
+ }
+ else /* consider all initial rels */
+ {
+ other_rels_list = joinrels[1];
+ other_rels = list_head(other_rels_list);
+ }
+
+ make_rels_by_clause_joins(root,
+ old_rel,
+ other_rels_list,
+ other_rels);
+ }
+ else
+ {
+ /*
+ * Oops, we have a relation that is not joined to any other
+ * relation, either directly or by join-order restrictions.
+ * Cartesian product time.
+ *
+ * We consider a cartesian product with each not-already-included
+ * initial rel, whether it has other join clauses or not. At
+ * level 2, if there are two or more clauseless initial rels, we
+ * will redundantly consider joining them in both directions; but
+ * such cases aren't common enough to justify adding complexity to
+ * avoid the duplicated effort.
+ */
+ make_rels_by_clauseless_joins(root,
+ old_rel,
+ joinrels[1]);
+ }
+ }
+
+ /*
+ * Now, consider "bushy plans" in which relations of k initial rels are
+ * joined to relations of level-k initial rels, for 2 <= k <= level-2.
+ *
+ * We only consider bushy-plan joins for pairs of rels where there is a
+ * suitable join clause (or join order restriction), in order to avoid
+ * unreasonable growth of planning time.
+ */
+ for (k = 2;; k++)
+ {
+ int other_level = level - k;
+
+ /*
+ * Since make_join_rel(x, y) handles both x,y and y,x cases, we only
+ * need to go as far as the halfway point.
+ */
+ if (k > other_level)
+ break;
+
+ foreach(r, joinrels[k])
+ {
+ RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
+ List *other_rels_list;
+ ListCell *other_rels;
+ ListCell *r2;
+
+ /*
+ * We can ignore relations without join clauses here, unless they
+ * participate in join-order restrictions --- then we might have
+ * to force a bushy join plan.
+ */
+ if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins &&
+ !has_join_restriction(root, old_rel))
+ continue;
+
+ if (k == other_level)
+ {
+ /* only consider remaining rels */
+ other_rels_list = joinrels[k];
+ other_rels = lnext(other_rels_list, r);
+ }
+ else
+ {
+ other_rels_list = joinrels[other_level];
+ other_rels = list_head(other_rels_list);
+ }
+
+ for_each_cell(r2, other_rels_list, other_rels)
+ {
+ RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2);
+
+ if (!bms_overlap(old_rel->relids, new_rel->relids))
+ {
+ /*
+ * OK, we can build a rel of the right level from this
+ * pair of rels. Do so if there is at least one relevant
+ * join clause or join order restriction.
+ */
+ if (have_relevant_joinclause(root, old_rel, new_rel) ||
+ have_join_order_restriction(root, old_rel, new_rel))
+ {
+ (void) make_join_rel(root, old_rel, new_rel);
+ }
+ }
+ }
+ }
+ }
+
+ /*----------
+ * Last-ditch effort: if we failed to find any usable joins so far, force
+ * a set of cartesian-product joins to be generated. This handles the
+ * special case where all the available rels have join clauses but we
+ * cannot use any of those clauses yet. This can only happen when we are
+ * considering a join sub-problem (a sub-joinlist) and all the rels in the
+ * sub-problem have only join clauses with rels outside the sub-problem.
+ * An example is
+ *
+ * SELECT ... FROM a INNER JOIN b ON TRUE, c, d, ...
+ * WHERE a.w = c.x and b.y = d.z;
+ *
+ * If the "a INNER JOIN b" sub-problem does not get flattened into the
+ * upper level, we must be willing to make a cartesian join of a and b;
+ * but the code above will not have done so, because it thought that both
+ * a and b have joinclauses. We consider only left-sided and right-sided
+ * cartesian joins in this case (no bushy).
+ *----------
+ */
+ if (joinrels[level] == NIL)
+ {
+ /*
+ * This loop is just like the first one, except we always call
+ * make_rels_by_clauseless_joins().
+ */
+ foreach(r, joinrels[level - 1])
+ {
+ RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
+
+ make_rels_by_clauseless_joins(root,
+ old_rel,
+ joinrels[1]);
+ }
+
+ /*----------
+ * When special joins are involved, there may be no legal way
+ * to make an N-way join for some values of N. For example consider
+ *
+ * SELECT ... FROM t1 WHERE
+ * x IN (SELECT ... FROM t2,t3 WHERE ...) AND
+ * y IN (SELECT ... FROM t4,t5 WHERE ...)
+ *
+ * We will flatten this query to a 5-way join problem, but there are
+ * no 4-way joins that join_is_legal() will consider legal. We have
+ * to accept failure at level 4 and go on to discover a workable
+ * bushy plan at level 5.
+ *
+ * However, if there are no special joins and no lateral references
+ * then join_is_legal() should never fail, and so the following sanity
+ * check is useful.
+ *----------
+ */
+ if (joinrels[level] == NIL &&
+ root->join_info_list == NIL &&
+ !root->hasLateralRTEs)
+ elog(ERROR, "failed to build any %d-way joins", level);
+ }
+}
+
+/*
+ * make_rels_by_clause_joins
+ * Build joins between the given relation 'old_rel' and other relations
+ * that participate in join clauses that 'old_rel' also participates in
+ * (or participate in join-order restrictions with it).
+ * The join rels are returned in root->join_rel_level[join_cur_level].
+ *
+ * Note: at levels above 2 we will generate the same joined relation in
+ * multiple ways --- for example (a join b) join c is the same RelOptInfo as
+ * (b join c) join a, though the second case will add a different set of Paths
+ * to it. This is the reason for using the join_rel_level mechanism, which
+ * automatically ensures that each new joinrel is only added to the list once.
+ *
+ * 'old_rel' is the relation entry for the relation to be joined
+ * 'other_rels_list': a list containing the other
+ * rels to be considered for joining
+ * 'other_rels': the first cell to be considered
+ *
+ * Currently, this is only used with initial rels in other_rels, but it
+ * will work for joining to joinrels too.
+ */
+static void
+make_rels_by_clause_joins(PlannerInfo *root,
+ RelOptInfo *old_rel,
+ List *other_rels_list,
+ ListCell *other_rels)
+{
+ ListCell *l;
+
+ for_each_cell(l, other_rels_list, other_rels)
+ {
+ RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
+
+ if (!bms_overlap(old_rel->relids, other_rel->relids) &&
+ (have_relevant_joinclause(root, old_rel, other_rel) ||
+ have_join_order_restriction(root, old_rel, other_rel)))
+ {
+ (void) make_join_rel(root, old_rel, other_rel);
+ }
+ }
+}
+
+/*
+ * make_rels_by_clauseless_joins
+ * Given a relation 'old_rel' and a list of other relations
+ * 'other_rels', create a join relation between 'old_rel' and each
+ * member of 'other_rels' that isn't already included in 'old_rel'.
+ * The join rels are returned in root->join_rel_level[join_cur_level].
+ *
+ * 'old_rel' is the relation entry for the relation to be joined
+ * 'other_rels': a list containing the other rels to be considered for joining
+ *
+ * Currently, this is only used with initial rels in other_rels, but it would
+ * work for joining to joinrels too.
+ */
+static void
+make_rels_by_clauseless_joins(PlannerInfo *root,
+ RelOptInfo *old_rel,
+ List *other_rels)
+{
+ ListCell *l;
+
+ foreach(l, other_rels)
+ {
+ RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
+
+ if (!bms_overlap(other_rel->relids, old_rel->relids))
+ {
+ (void) make_join_rel(root, old_rel, other_rel);
+ }
+ }
+}
+
+
+/*
+ * join_is_legal
+ * Determine whether a proposed join is legal given the query's
+ * join order constraints; and if it is, determine the join type.
+ *
+ * Caller must supply not only the two rels, but the union of their relids.
+ * (We could simplify the API by computing joinrelids locally, but this
+ * would be redundant work in the normal path through make_join_rel.)
+ *
+ * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
+ * else it's set to point to the associated SpecialJoinInfo node. Also,
+ * *reversed_p is set true if the given relations need to be swapped to
+ * match the SpecialJoinInfo node.
+ */
+static bool
+join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
+ Relids joinrelids,
+ SpecialJoinInfo **sjinfo_p, bool *reversed_p)
+{
+ SpecialJoinInfo *match_sjinfo;
+ bool reversed;
+ bool unique_ified;
+ bool must_be_leftjoin;
+ ListCell *l;
+
+ /*
+ * Ensure output params are set on failure return. This is just to
+ * suppress uninitialized-variable warnings from overly anal compilers.
+ */
+ *sjinfo_p = NULL;
+ *reversed_p = false;
+
+ /*
+ * If we have any special joins, the proposed join might be illegal; and
+ * in any case we have to determine its join type. Scan the join info
+ * list for matches and conflicts.
+ */
+ match_sjinfo = NULL;
+ reversed = false;
+ unique_ified = false;
+ must_be_leftjoin = false;
+
+ foreach(l, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
+
+ /*
+ * This special join is not relevant unless its RHS overlaps the
+ * proposed join. (Check this first as a fast path for dismissing
+ * most irrelevant SJs quickly.)
+ */
+ if (!bms_overlap(sjinfo->min_righthand, joinrelids))
+ continue;
+
+ /*
+ * Also, not relevant if proposed join is fully contained within RHS
+ * (ie, we're still building up the RHS).
+ */
+ if (bms_is_subset(joinrelids, sjinfo->min_righthand))
+ continue;
+
+ /*
+ * Also, not relevant if SJ is already done within either input.
+ */
+ if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+ bms_is_subset(sjinfo->min_righthand, rel1->relids))
+ continue;
+ if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
+ bms_is_subset(sjinfo->min_righthand, rel2->relids))
+ continue;
+
+ /*
+ * If it's a semijoin and we already joined the RHS to any other rels
+ * within either input, then we must have unique-ified the RHS at that
+ * point (see below). Therefore the semijoin is no longer relevant in
+ * this join path.
+ */
+ if (sjinfo->jointype == JOIN_SEMI)
+ {
+ if (bms_is_subset(sjinfo->syn_righthand, rel1->relids) &&
+ !bms_equal(sjinfo->syn_righthand, rel1->relids))
+ continue;
+ if (bms_is_subset(sjinfo->syn_righthand, rel2->relids) &&
+ !bms_equal(sjinfo->syn_righthand, rel2->relids))
+ continue;
+ }
+
+ /*
+ * If one input contains min_lefthand and the other contains
+ * min_righthand, then we can perform the SJ at this join.
+ *
+ * Reject if we get matches to more than one SJ; that implies we're
+ * considering something that's not really valid.
+ */
+ if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+ bms_is_subset(sjinfo->min_righthand, rel2->relids))
+ {
+ if (match_sjinfo)
+ return false; /* invalid join path */
+ match_sjinfo = sjinfo;
+ reversed = false;
+ }
+ else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
+ bms_is_subset(sjinfo->min_righthand, rel1->relids))
+ {
+ if (match_sjinfo)
+ return false; /* invalid join path */
+ match_sjinfo = sjinfo;
+ reversed = true;
+ }
+ else if (sjinfo->jointype == JOIN_SEMI &&
+ bms_equal(sjinfo->syn_righthand, rel2->relids) &&
+ create_unique_path(root, rel2, rel2->cheapest_total_path,
+ sjinfo) != NULL)
+ {
+ /*----------
+ * For a semijoin, we can join the RHS to anything else by
+ * unique-ifying the RHS (if the RHS can be unique-ified).
+ * We will only get here if we have the full RHS but less
+ * than min_lefthand on the LHS.
+ *
+ * The reason to consider such a join path is exemplified by
+ * SELECT ... FROM a,b WHERE (a.x,b.y) IN (SELECT c1,c2 FROM c)
+ * If we insist on doing this as a semijoin we will first have
+ * to form the cartesian product of A*B. But if we unique-ify
+ * C then the semijoin becomes a plain innerjoin and we can join
+ * in any order, eg C to A and then to B. When C is much smaller
+ * than A and B this can be a huge win. So we allow C to be
+ * joined to just A or just B here, and then make_join_rel has
+ * to handle the case properly.
+ *
+ * Note that actually we'll allow unique-ified C to be joined to
+ * some other relation D here, too. That is legal, if usually not
+ * very sane, and this routine is only concerned with legality not
+ * with whether the join is good strategy.
+ *----------
+ */
+ if (match_sjinfo)
+ return false; /* invalid join path */
+ match_sjinfo = sjinfo;
+ reversed = false;
+ unique_ified = true;
+ }
+ else if (sjinfo->jointype == JOIN_SEMI &&
+ bms_equal(sjinfo->syn_righthand, rel1->relids) &&
+ create_unique_path(root, rel1, rel1->cheapest_total_path,
+ sjinfo) != NULL)
+ {
+ /* Reversed semijoin case */
+ if (match_sjinfo)
+ return false; /* invalid join path */
+ match_sjinfo = sjinfo;
+ reversed = true;
+ unique_ified = true;
+ }
+ else
+ {
+ /*
+ * Otherwise, the proposed join overlaps the RHS but isn't a valid
+ * implementation of this SJ. But don't panic quite yet: the RHS
+ * violation might have occurred previously, in one or both input
+ * relations, in which case we must have previously decided that
+ * it was OK to commute some other SJ with this one. If we need
+ * to perform this join to finish building up the RHS, rejecting
+ * it could lead to not finding any plan at all. (This can occur
+ * because of the heuristics elsewhere in this file that postpone
+ * clauseless joins: we might not consider doing a clauseless join
+ * within the RHS until after we've performed other, validly
+ * commutable SJs with one or both sides of the clauseless join.)
+ * This consideration boils down to the rule that if both inputs
+ * overlap the RHS, we can allow the join --- they are either
+ * fully within the RHS, or represent previously-allowed joins to
+ * rels outside it.
+ */
+ if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
+ bms_overlap(rel2->relids, sjinfo->min_righthand))
+ continue; /* assume valid previous violation of RHS */
+
+ /*
+ * The proposed join could still be legal, but only if we're
+ * allowed to associate it into the RHS of this SJ. That means
+ * this SJ must be a LEFT join (not SEMI or ANTI, and certainly
+ * not FULL) and the proposed join must not overlap the LHS.
+ */
+ if (sjinfo->jointype != JOIN_LEFT ||
+ bms_overlap(joinrelids, sjinfo->min_lefthand))
+ return false; /* invalid join path */
+
+ /*
+ * To be valid, the proposed join must be a LEFT join; otherwise
+ * it can't associate into this SJ's RHS. But we may not yet have
+ * found the SpecialJoinInfo matching the proposed join, so we
+ * can't test that yet. Remember the requirement for later.
+ */
+ must_be_leftjoin = true;
+ }
+ }
+
+ /*
+ * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the
+ * proposed join can't associate into an SJ's RHS.
+ *
+ * Also, fail if the proposed join's predicate isn't strict; we're
+ * essentially checking to see if we can apply outer-join identity 3, and
+ * that's a requirement. (This check may be redundant with checks in
+ * make_outerjoininfo, but I'm not quite sure, and it's cheap to test.)
+ */
+ if (must_be_leftjoin &&
+ (match_sjinfo == NULL ||
+ match_sjinfo->jointype != JOIN_LEFT ||
+ !match_sjinfo->lhs_strict))
+ return false; /* invalid join path */
+
+ /*
+ * We also have to check for constraints imposed by LATERAL references.
+ */
+ if (root->hasLateralRTEs)
+ {
+ bool lateral_fwd;
+ bool lateral_rev;
+ Relids join_lateral_rels;
+
+ /*
+ * The proposed rels could each contain lateral references to the
+ * other, in which case the join is impossible. If there are lateral
+ * references in just one direction, then the join has to be done with
+ * a nestloop with the lateral referencer on the inside. If the join
+ * matches an SJ that cannot be implemented by such a nestloop, the
+ * join is impossible.
+ *
+ * Also, if the lateral reference is only indirect, we should reject
+ * the join; whatever rel(s) the reference chain goes through must be
+ * joined to first.
+ *
+ * Another case that might keep us from building a valid plan is the
+ * implementation restriction described by have_dangerous_phv().
+ */
+ lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
+ lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
+ if (lateral_fwd && lateral_rev)
+ return false; /* have lateral refs in both directions */
+ if (lateral_fwd)
+ {
+ /* has to be implemented as nestloop with rel1 on left */
+ if (match_sjinfo &&
+ (reversed ||
+ unique_ified ||
+ match_sjinfo->jointype == JOIN_FULL))
+ return false; /* not implementable as nestloop */
+ /* check there is a direct reference from rel2 to rel1 */
+ if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
+ return false; /* only indirect refs, so reject */
+ /* check we won't have a dangerous PHV */
+ if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
+ return false; /* might be unable to handle required PHV */
+ }
+ else if (lateral_rev)
+ {
+ /* has to be implemented as nestloop with rel2 on left */
+ if (match_sjinfo &&
+ (!reversed ||
+ unique_ified ||
+ match_sjinfo->jointype == JOIN_FULL))
+ return false; /* not implementable as nestloop */
+ /* check there is a direct reference from rel1 to rel2 */
+ if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
+ return false; /* only indirect refs, so reject */
+ /* check we won't have a dangerous PHV */
+ if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
+ return false; /* might be unable to handle required PHV */
+ }
+
+ /*
+ * LATERAL references could also cause problems later on if we accept
+ * this join: if the join's minimum parameterization includes any rels
+ * that would have to be on the inside of an outer join with this join
+ * rel, then it's never going to be possible to build the complete
+ * query using this join. We should reject this join not only because
+ * it'll save work, but because if we don't, the clauseless-join
+ * heuristics might think that legality of this join means that some
+ * other join rel need not be formed, and that could lead to failure
+ * to find any plan at all. We have to consider not only rels that
+ * are directly on the inner side of an OJ with the joinrel, but also
+ * ones that are indirectly so, so search to find all such rels.
+ */
+ join_lateral_rels = min_join_parameterization(root, joinrelids,
+ rel1, rel2);
+ if (join_lateral_rels)
+ {
+ Relids join_plus_rhs = bms_copy(joinrelids);
+ bool more;
+
+ do
+ {
+ more = false;
+ foreach(l, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
+
+ /* ignore full joins --- their ordering is predetermined */
+ if (sjinfo->jointype == JOIN_FULL)
+ continue;
+
+ if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) &&
+ !bms_is_subset(sjinfo->min_righthand, join_plus_rhs))
+ {
+ join_plus_rhs = bms_add_members(join_plus_rhs,
+ sjinfo->min_righthand);
+ more = true;
+ }
+ }
+ } while (more);
+ if (bms_overlap(join_plus_rhs, join_lateral_rels))
+ return false; /* will not be able to join to some RHS rel */
+ }
+ }
+
+ /* Otherwise, it's a valid join */
+ *sjinfo_p = match_sjinfo;
+ *reversed_p = reversed;
+ return true;
+}
+
+
+/*
+ * make_join_rel
+ * Find or create a join RelOptInfo that represents the join of
+ * the two given rels, and add to it path information for paths
+ * created with the two rels as outer and inner rel.
+ * (The join rel may already contain paths generated from other
+ * pairs of rels that add up to the same set of base rels.)
+ *
+ * NB: will return NULL if attempted join is not valid. This can happen
+ * when working with outer joins, or with IN or EXISTS clauses that have been
+ * turned into joins.
+ */
+RelOptInfo *
+make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
+{
+ Relids joinrelids;
+ SpecialJoinInfo *sjinfo;
+ bool reversed;
+ SpecialJoinInfo sjinfo_data;
+ RelOptInfo *joinrel;
+ List *restrictlist;
+
+ /* We should never try to join two overlapping sets of rels. */
+ Assert(!bms_overlap(rel1->relids, rel2->relids));
+
+ /* Construct Relids set that identifies the joinrel. */
+ joinrelids = bms_union(rel1->relids, rel2->relids);
+
+ /* Check validity and determine join type. */
+ if (!join_is_legal(root, rel1, rel2, joinrelids,
+ &sjinfo, &reversed))
+ {
+ /* invalid join path */
+ bms_free(joinrelids);
+ return NULL;
+ }
+
+ /* Swap rels if needed to match the join info. */
+ if (reversed)
+ {
+ RelOptInfo *trel = rel1;
+
+ rel1 = rel2;
+ rel2 = trel;
+ }
+
+ /*
+ * If it's a plain inner join, then we won't have found anything in
+ * join_info_list. Make up a SpecialJoinInfo so that selectivity
+ * estimation functions will know what's being joined.
+ */
+ if (sjinfo == NULL)
+ {
+ sjinfo = &sjinfo_data;
+ sjinfo->type = T_SpecialJoinInfo;
+ sjinfo->min_lefthand = rel1->relids;
+ sjinfo->min_righthand = rel2->relids;
+ sjinfo->syn_lefthand = rel1->relids;
+ sjinfo->syn_righthand = rel2->relids;
+ sjinfo->jointype = JOIN_INNER;
+ /* we don't bother trying to make the remaining fields valid */
+ sjinfo->lhs_strict = false;
+ sjinfo->delay_upper_joins = false;
+ sjinfo->semi_can_btree = false;
+ sjinfo->semi_can_hash = false;
+ sjinfo->semi_operators = NIL;
+ sjinfo->semi_rhs_exprs = NIL;
+ }
+
+ /*
+ * Find or build the join RelOptInfo, and compute the restrictlist that
+ * goes with this particular joining.
+ */
+ joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
+ &restrictlist);
+
+ /*
+ * If we've already proven this join is empty, we needn't consider any
+ * more paths for it.
+ */
+ if (is_dummy_rel(joinrel))
+ {
+ bms_free(joinrelids);
+ return joinrel;
+ }
+
+ /* Add paths to the join relation. */
+ populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo,
+ restrictlist);
+
+ bms_free(joinrelids);
+
+ return joinrel;
+}
+
+/*
+ * populate_joinrel_with_paths
+ * Add paths to the given joinrel for given pair of joining relations. The
+ * SpecialJoinInfo provides details about the join and the restrictlist
+ * contains the join clauses and the other clauses applicable for given pair
+ * of the joining relations.
+ */
+static void
+populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
+ RelOptInfo *rel2, RelOptInfo *joinrel,
+ SpecialJoinInfo *sjinfo, List *restrictlist)
+{
+ /*
+ * Consider paths using each rel as both outer and inner. Depending on
+ * the join type, a provably empty outer or inner rel might mean the join
+ * is provably empty too; in which case throw away any previously computed
+ * paths and mark the join as dummy. (We do it this way since it's
+ * conceivable that dummy-ness of a multi-element join might only be
+ * noticeable for certain construction paths.)
+ *
+ * Also, a provably constant-false join restriction typically means that
+ * we can skip evaluating one or both sides of the join. We do this by
+ * marking the appropriate rel as dummy. For outer joins, a
+ * constant-false restriction that is pushed down still means the whole
+ * join is dummy, while a non-pushed-down one means that no inner rows
+ * will join so we can treat the inner rel as dummy.
+ *
+ * We need only consider the jointypes that appear in join_info_list, plus
+ * JOIN_INNER.
+ */
+ switch (sjinfo->jointype)
+ {
+ case JOIN_INNER:
+ if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
+ restriction_is_constant_false(restrictlist, joinrel, false))
+ {
+ mark_dummy_rel(joinrel);
+ break;
+ }
+ add_paths_to_joinrel(root, joinrel, rel1, rel2,
+ JOIN_INNER, sjinfo,
+ restrictlist);
+ add_paths_to_joinrel(root, joinrel, rel2, rel1,
+ JOIN_INNER, sjinfo,
+ restrictlist);
+ break;
+ case JOIN_LEFT:
+ if (is_dummy_rel(rel1) ||
+ restriction_is_constant_false(restrictlist, joinrel, true))
+ {
+ mark_dummy_rel(joinrel);
+ break;
+ }
+ if (restriction_is_constant_false(restrictlist, joinrel, false) &&
+ bms_is_subset(rel2->relids, sjinfo->syn_righthand))
+ mark_dummy_rel(rel2);
+ add_paths_to_joinrel(root, joinrel, rel1, rel2,
+ JOIN_LEFT, sjinfo,
+ restrictlist);
+ add_paths_to_joinrel(root, joinrel, rel2, rel1,
+ JOIN_RIGHT, sjinfo,
+ restrictlist);
+ break;
+ case JOIN_FULL:
+ if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) ||
+ restriction_is_constant_false(restrictlist, joinrel, true))
+ {
+ mark_dummy_rel(joinrel);
+ break;
+ }
+ add_paths_to_joinrel(root, joinrel, rel1, rel2,
+ JOIN_FULL, sjinfo,
+ restrictlist);
+ add_paths_to_joinrel(root, joinrel, rel2, rel1,
+ JOIN_FULL, sjinfo,
+ restrictlist);
+
+ /*
+ * If there are join quals that aren't mergeable or hashable, we
+ * may not be able to build any valid plan. Complain here so that
+ * we can give a somewhat-useful error message. (Since we have no
+ * flexibility of planning for a full join, there's no chance of
+ * succeeding later with another pair of input rels.)
+ */
+ if (joinrel->pathlist == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions")));
+ break;
+ case JOIN_SEMI:
+
+ /*
+ * We might have a normal semijoin, or a case where we don't have
+ * enough rels to do the semijoin but can unique-ify the RHS and
+ * then do an innerjoin (see comments in join_is_legal). In the
+ * latter case we can't apply JOIN_SEMI joining.
+ */
+ if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+ bms_is_subset(sjinfo->min_righthand, rel2->relids))
+ {
+ if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
+ restriction_is_constant_false(restrictlist, joinrel, false))
+ {
+ mark_dummy_rel(joinrel);
+ break;
+ }
+ add_paths_to_joinrel(root, joinrel, rel1, rel2,
+ JOIN_SEMI, sjinfo,
+ restrictlist);
+ }
+
+ /*
+ * If we know how to unique-ify the RHS and one input rel is
+ * exactly the RHS (not a superset) we can consider unique-ifying
+ * it and then doing a regular join. (The create_unique_path
+ * check here is probably redundant with what join_is_legal did,
+ * but if so the check is cheap because it's cached. So test
+ * anyway to be sure.)
+ */
+ if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
+ create_unique_path(root, rel2, rel2->cheapest_total_path,
+ sjinfo) != NULL)
+ {
+ if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
+ restriction_is_constant_false(restrictlist, joinrel, false))
+ {
+ mark_dummy_rel(joinrel);
+ break;
+ }
+ add_paths_to_joinrel(root, joinrel, rel1, rel2,
+ JOIN_UNIQUE_INNER, sjinfo,
+ restrictlist);
+ add_paths_to_joinrel(root, joinrel, rel2, rel1,
+ JOIN_UNIQUE_OUTER, sjinfo,
+ restrictlist);
+ }
+ break;
+ case JOIN_ANTI:
+ if (is_dummy_rel(rel1) ||
+ restriction_is_constant_false(restrictlist, joinrel, true))
+ {
+ mark_dummy_rel(joinrel);
+ break;
+ }
+ if (restriction_is_constant_false(restrictlist, joinrel, false) &&
+ bms_is_subset(rel2->relids, sjinfo->syn_righthand))
+ mark_dummy_rel(rel2);
+ add_paths_to_joinrel(root, joinrel, rel1, rel2,
+ JOIN_ANTI, sjinfo,
+ restrictlist);
+ break;
+ default:
+ /* other values not expected here */
+ elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
+ break;
+ }
+
+ /* Apply partitionwise join technique, if possible. */
+ try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
+}
+
+
+/*
+ * have_join_order_restriction
+ * Detect whether the two relations should be joined to satisfy
+ * a join-order restriction arising from special or lateral joins.
+ *
+ * In practice this is always used with have_relevant_joinclause(), and so
+ * could be merged with that function, but it seems clearer to separate the
+ * two concerns. We need this test because there are degenerate cases where
+ * a clauseless join must be performed to satisfy join-order restrictions.
+ * Also, if one rel has a lateral reference to the other, or both are needed
+ * to compute some PHV, we should consider joining them even if the join would
+ * be clauseless.
+ *
+ * Note: this is only a problem if one side of a degenerate outer join
+ * contains multiple rels, or a clauseless join is required within an
+ * IN/EXISTS RHS; else we will find a join path via the "last ditch" case in
+ * join_search_one_level(). We could dispense with this test if we were
+ * willing to try bushy plans in the "last ditch" case, but that seems much
+ * less efficient.
+ */
+bool
+have_join_order_restriction(PlannerInfo *root,
+ RelOptInfo *rel1, RelOptInfo *rel2)
+{
+ bool result = false;
+ ListCell *l;
+
+ /*
+ * If either side has a direct lateral reference to the other, attempt the
+ * join regardless of outer-join considerations.
+ */
+ if (bms_overlap(rel1->relids, rel2->direct_lateral_relids) ||
+ bms_overlap(rel2->relids, rel1->direct_lateral_relids))
+ return true;
+
+ /*
+ * Likewise, if both rels are needed to compute some PlaceHolderVar,
+ * attempt the join regardless of outer-join considerations. (This is not
+ * very desirable, because a PHV with a large eval_at set will cause a lot
+ * of probably-useless joins to be considered, but failing to do this can
+ * cause us to fail to construct a plan at all.)
+ */
+ foreach(l, root->placeholder_list)
+ {
+ PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
+
+ if (bms_is_subset(rel1->relids, phinfo->ph_eval_at) &&
+ bms_is_subset(rel2->relids, phinfo->ph_eval_at))
+ return true;
+ }
+
+ /*
+ * It's possible that the rels correspond to the left and right sides of a
+ * degenerate outer join, that is, one with no joinclause mentioning the
+ * non-nullable side; in which case we should force the join to occur.
+ *
+ * Also, the two rels could represent a clauseless join that has to be
+ * completed to build up the LHS or RHS of an outer join.
+ */
+ foreach(l, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
+
+ /* ignore full joins --- other mechanisms handle them */
+ if (sjinfo->jointype == JOIN_FULL)
+ continue;
+
+ /* Can we perform the SJ with these rels? */
+ if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+ bms_is_subset(sjinfo->min_righthand, rel2->relids))
+ {
+ result = true;
+ break;
+ }
+ if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
+ bms_is_subset(sjinfo->min_righthand, rel1->relids))
+ {
+ result = true;
+ break;
+ }
+
+ /*
+ * Might we need to join these rels to complete the RHS? We have to
+ * use "overlap" tests since either rel might include a lower SJ that
+ * has been proven to commute with this one.
+ */
+ if (bms_overlap(sjinfo->min_righthand, rel1->relids) &&
+ bms_overlap(sjinfo->min_righthand, rel2->relids))
+ {
+ result = true;
+ break;
+ }
+
+ /* Likewise for the LHS. */
+ if (bms_overlap(sjinfo->min_lefthand, rel1->relids) &&
+ bms_overlap(sjinfo->min_lefthand, rel2->relids))
+ {
+ result = true;
+ break;
+ }
+ }
+
+ /*
+ * We do not force the join to occur if either input rel can legally be
+ * joined to anything else using joinclauses. This essentially means that
+ * clauseless bushy joins are put off as long as possible. The reason is
+ * that when there is a join order restriction high up in the join tree
+ * (that is, with many rels inside the LHS or RHS), we would otherwise
+ * expend lots of effort considering very stupid join combinations within
+ * its LHS or RHS.
+ */
+ if (result)
+ {
+ if (has_legal_joinclause(root, rel1) ||
+ has_legal_joinclause(root, rel2))
+ result = false;
+ }
+
+ return result;
+}
+
+
+/*
+ * has_join_restriction
+ * Detect whether the specified relation has join-order restrictions,
+ * due to being inside an outer join or an IN (sub-SELECT),
+ * or participating in any LATERAL references or multi-rel PHVs.
+ *
+ * Essentially, this tests whether have_join_order_restriction() could
+ * succeed with this rel and some other one. It's OK if we sometimes
+ * say "true" incorrectly. (Therefore, we don't bother with the relatively
+ * expensive has_legal_joinclause test.)
+ */
+static bool
+has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
+{
+ ListCell *l;
+
+ if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL)
+ return true;
+
+ foreach(l, root->placeholder_list)
+ {
+ PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
+
+ if (bms_is_subset(rel->relids, phinfo->ph_eval_at) &&
+ !bms_equal(rel->relids, phinfo->ph_eval_at))
+ return true;
+ }
+
+ foreach(l, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
+
+ /* ignore full joins --- other mechanisms preserve their ordering */
+ if (sjinfo->jointype == JOIN_FULL)
+ continue;
+
+ /* ignore if SJ is already contained in rel */
+ if (bms_is_subset(sjinfo->min_lefthand, rel->relids) &&
+ bms_is_subset(sjinfo->min_righthand, rel->relids))
+ continue;
+
+ /* restricted if it overlaps LHS or RHS, but doesn't contain SJ */
+ if (bms_overlap(sjinfo->min_lefthand, rel->relids) ||
+ bms_overlap(sjinfo->min_righthand, rel->relids))
+ return true;
+ }
+
+ return false;
+}
+
+
+/*
+ * has_legal_joinclause
+ * Detect whether the specified relation can legally be joined
+ * to any other rels using join clauses.
+ *
+ * We consider only joins to single other relations in the current
+ * initial_rels list. This is sufficient to get a "true" result in most real
+ * queries, and an occasional erroneous "false" will only cost a bit more
+ * planning time. The reason for this limitation is that considering joins to
+ * other joins would require proving that the other join rel can legally be
+ * formed, which seems like too much trouble for something that's only a
+ * heuristic to save planning time. (Note: we must look at initial_rels
+ * and not all of the query, since when we are planning a sub-joinlist we
+ * may be forced to make clauseless joins within initial_rels even though
+ * there are join clauses linking to other parts of the query.)
+ */
+static bool
+has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
+{
+ ListCell *lc;
+
+ foreach(lc, root->initial_rels)
+ {
+ RelOptInfo *rel2 = (RelOptInfo *) lfirst(lc);
+
+ /* ignore rels that are already in "rel" */
+ if (bms_overlap(rel->relids, rel2->relids))
+ continue;
+
+ if (have_relevant_joinclause(root, rel, rel2))
+ {
+ Relids joinrelids;
+ SpecialJoinInfo *sjinfo;
+ bool reversed;
+
+ /* join_is_legal needs relids of the union */
+ joinrelids = bms_union(rel->relids, rel2->relids);
+
+ if (join_is_legal(root, rel, rel2, joinrelids,
+ &sjinfo, &reversed))
+ {
+ /* Yes, this will work */
+ bms_free(joinrelids);
+ return true;
+ }
+
+ bms_free(joinrelids);
+ }
+ }
+
+ return false;
+}
+
+
+/*
+ * There's a pitfall for creating parameterized nestloops: suppose the inner
+ * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
+ * minimum eval_at set includes the outer rel (B) and some third rel (C).
+ * We might think we could create a B/A nestloop join that's parameterized by
+ * C. But we would end up with a plan in which the PHV's expression has to be
+ * evaluated as a nestloop parameter at the B/A join; and the executor is only
+ * set up to handle simple Vars as NestLoopParams. Rather than add complexity
+ * and overhead to the executor for such corner cases, it seems better to
+ * forbid the join. (Note that we can still make use of A's parameterized
+ * path with pre-joined B+C as the outer rel. have_join_order_restriction()
+ * ensures that we will consider making such a join even if there are not
+ * other reasons to do so.)
+ *
+ * So we check whether any PHVs used in the query could pose such a hazard.
+ * We don't have any simple way of checking whether a risky PHV would actually
+ * be used in the inner plan, and the case is so unusual that it doesn't seem
+ * worth working very hard on it.
+ *
+ * This needs to be checked in two places. If the inner rel's minimum
+ * parameterization would trigger the restriction, then join_is_legal() should
+ * reject the join altogether, because there will be no workable paths for it.
+ * But joinpath.c has to check again for every proposed nestloop path, because
+ * the inner path might have more than the minimum parameterization, causing
+ * some PHV to be dangerous for it that otherwise wouldn't be.
+ */
+bool
+have_dangerous_phv(PlannerInfo *root,
+ Relids outer_relids, Relids inner_params)
+{
+ ListCell *lc;
+
+ foreach(lc, root->placeholder_list)
+ {
+ PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
+
+ if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
+ continue; /* ignore, could not be a nestloop param */
+ if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
+ continue; /* ignore, not relevant to this join */
+ if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
+ continue; /* safe, it can be eval'd within outerrel */
+ /* Otherwise, it's potentially unsafe, so reject the join */
+ return true;
+ }
+
+ /* OK to perform the join */
+ return false;
+}
+
+
+/*
+ * is_dummy_rel --- has relation been proven empty?
+ */
+bool
+is_dummy_rel(RelOptInfo *rel)
+{
+ Path *path;
+
+ /*
+ * A rel that is known dummy will have just one path that is a childless
+ * Append. (Even if somehow it has more paths, a childless Append will
+ * have cost zero and hence should be at the front of the pathlist.)
+ */
+ if (rel->pathlist == NIL)
+ return false;
+ path = (Path *) linitial(rel->pathlist);
+
+ /*
+ * Initially, a dummy path will just be a childless Append. But in later
+ * planning stages we might stick a ProjectSetPath and/or ProjectionPath
+ * on top, since Append can't project. Rather than make assumptions about
+ * which combinations can occur, just descend through whatever we find.
+ */
+ for (;;)
+ {
+ if (IsA(path, ProjectionPath))
+ path = ((ProjectionPath *) path)->subpath;
+ else if (IsA(path, ProjectSetPath))
+ path = ((ProjectSetPath *) path)->subpath;
+ else
+ break;
+ }
+ if (IS_DUMMY_APPEND(path))
+ return true;
+ return false;
+}
+
+/*
+ * Mark a relation as proven empty.
+ *
+ * During GEQO planning, this can get invoked more than once on the same
+ * baserel struct, so it's worth checking to see if the rel is already marked
+ * dummy.
+ *
+ * Also, when called during GEQO join planning, we are in a short-lived
+ * memory context. We must make sure that the dummy path attached to a
+ * baserel survives the GEQO cycle, else the baserel is trashed for future
+ * GEQO cycles. On the other hand, when we are marking a joinrel during GEQO,
+ * we don't want the dummy path to clutter the main planning context. Upshot
+ * is that the best solution is to explicitly make the dummy path in the same
+ * context the given RelOptInfo is in.
+ */
+void
+mark_dummy_rel(RelOptInfo *rel)
+{
+ MemoryContext oldcontext;
+
+ /* Already marked? */
+ if (is_dummy_rel(rel))
+ return;
+
+ /* No, so choose correct context to make the dummy path in */
+ oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
+
+ /* Set dummy size estimate */
+ rel->rows = 0;
+
+ /* Evict any previously chosen paths */
+ rel->pathlist = NIL;
+ rel->partial_pathlist = NIL;
+
+ /* Set up the dummy path */
+ add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
+ NIL, rel->lateral_relids,
+ 0, false, -1));
+
+ /* Set or update cheapest_total_path and related fields */
+ set_cheapest(rel);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+
+/*
+ * restriction_is_constant_false --- is a restrictlist just FALSE?
+ *
+ * In cases where a qual is provably constant FALSE, eval_const_expressions
+ * will generally have thrown away anything that's ANDed with it. In outer
+ * join situations this will leave us computing cartesian products only to
+ * decide there's no match for an outer row, which is pretty stupid. So,
+ * we need to detect the case.
+ *
+ * If only_pushed_down is true, then consider only quals that are pushed-down
+ * from the point of view of the joinrel.
+ */
+static bool
+restriction_is_constant_false(List *restrictlist,
+ RelOptInfo *joinrel,
+ bool only_pushed_down)
+{
+ ListCell *lc;
+
+ /*
+ * Despite the above comment, the restriction list we see here might
+ * possibly have other members besides the FALSE constant, since other
+ * quals could get "pushed down" to the outer join level. So we check
+ * each member of the list.
+ */
+ foreach(lc, restrictlist)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+
+ if (only_pushed_down && !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
+ continue;
+
+ if (rinfo->clause && IsA(rinfo->clause, Const))
+ {
+ Const *con = (Const *) rinfo->clause;
+
+ /* constant NULL is as good as constant FALSE for our purposes */
+ if (con->constisnull)
+ return true;
+ if (!DatumGetBool(con->constvalue))
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+ * Assess whether join between given two partitioned relations can be broken
+ * down into joins between matching partitions; a technique called
+ * "partitionwise join"
+ *
+ * Partitionwise join is possible when a. Joining relations have same
+ * partitioning scheme b. There exists an equi-join between the partition keys
+ * of the two relations.
+ *
+ * Partitionwise join is planned as follows (details: optimizer/README.)
+ *
+ * 1. Create the RelOptInfos for joins between matching partitions i.e
+ * child-joins and add paths to them.
+ *
+ * 2. Construct Append or MergeAppend paths across the set of child joins.
+ * This second phase is implemented by generate_partitionwise_join_paths().
+ *
+ * The RelOptInfo, SpecialJoinInfo and restrictlist for each child join are
+ * obtained by translating the respective parent join structures.
+ */
+static void
+try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
+ RelOptInfo *joinrel, SpecialJoinInfo *parent_sjinfo,
+ List *parent_restrictlist)
+{
+ bool rel1_is_simple = IS_SIMPLE_REL(rel1);
+ bool rel2_is_simple = IS_SIMPLE_REL(rel2);
+ List *parts1 = NIL;
+ List *parts2 = NIL;
+ ListCell *lcr1 = NULL;
+ ListCell *lcr2 = NULL;
+ int cnt_parts;
+
+ /* Guard against stack overflow due to overly deep partition hierarchy. */
+ check_stack_depth();
+
+ /* Nothing to do, if the join relation is not partitioned. */
+ if (joinrel->part_scheme == NULL || joinrel->nparts == 0)
+ return;
+
+ /* The join relation should have consider_partitionwise_join set. */
+ Assert(joinrel->consider_partitionwise_join);
+
+ /*
+ * We can not perform partitionwise join if either of the joining
+ * relations is not partitioned.
+ */
+ if (!IS_PARTITIONED_REL(rel1) || !IS_PARTITIONED_REL(rel2))
+ return;
+
+ Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2));
+
+ /* The joining relations should have consider_partitionwise_join set. */
+ Assert(rel1->consider_partitionwise_join &&
+ rel2->consider_partitionwise_join);
+
+ /*
+ * The partition scheme of the join relation should match that of the
+ * joining relations.
+ */
+ Assert(joinrel->part_scheme == rel1->part_scheme &&
+ joinrel->part_scheme == rel2->part_scheme);
+
+ Assert(!(joinrel->partbounds_merged && (joinrel->nparts <= 0)));
+
+ compute_partition_bounds(root, rel1, rel2, joinrel, parent_sjinfo,
+ &parts1, &parts2);
+
+ if (joinrel->partbounds_merged)
+ {
+ lcr1 = list_head(parts1);
+ lcr2 = list_head(parts2);
+ }
+
+ /*
+ * Create child-join relations for this partitioned join, if those don't
+ * exist. Add paths to child-joins for a pair of child relations
+ * corresponding to the given pair of parent relations.
+ */
+ for (cnt_parts = 0; cnt_parts < joinrel->nparts; cnt_parts++)
+ {
+ RelOptInfo *child_rel1;
+ RelOptInfo *child_rel2;
+ bool rel1_empty;
+ bool rel2_empty;
+ SpecialJoinInfo *child_sjinfo;
+ List *child_restrictlist;
+ RelOptInfo *child_joinrel;
+ Relids child_joinrelids;
+ AppendRelInfo **appinfos;
+ int nappinfos;
+
+ if (joinrel->partbounds_merged)
+ {
+ child_rel1 = lfirst_node(RelOptInfo, lcr1);
+ child_rel2 = lfirst_node(RelOptInfo, lcr2);
+ lcr1 = lnext(parts1, lcr1);
+ lcr2 = lnext(parts2, lcr2);
+ }
+ else
+ {
+ child_rel1 = rel1->part_rels[cnt_parts];
+ child_rel2 = rel2->part_rels[cnt_parts];
+ }
+
+ rel1_empty = (child_rel1 == NULL || IS_DUMMY_REL(child_rel1));
+ rel2_empty = (child_rel2 == NULL || IS_DUMMY_REL(child_rel2));
+
+ /*
+ * Check for cases where we can prove that this segment of the join
+ * returns no rows, due to one or both inputs being empty (including
+ * inputs that have been pruned away entirely). If so just ignore it.
+ * These rules are equivalent to populate_joinrel_with_paths's rules
+ * for dummy input relations.
+ */
+ switch (parent_sjinfo->jointype)
+ {
+ case JOIN_INNER:
+ case JOIN_SEMI:
+ if (rel1_empty || rel2_empty)
+ continue; /* ignore this join segment */
+ break;
+ case JOIN_LEFT:
+ case JOIN_ANTI:
+ if (rel1_empty)
+ continue; /* ignore this join segment */
+ break;
+ case JOIN_FULL:
+ if (rel1_empty && rel2_empty)
+ continue; /* ignore this join segment */
+ break;
+ default:
+ /* other values not expected here */
+ elog(ERROR, "unrecognized join type: %d",
+ (int) parent_sjinfo->jointype);
+ break;
+ }
+
+ /*
+ * If a child has been pruned entirely then we can't generate paths
+ * for it, so we have to reject partitionwise joining unless we were
+ * able to eliminate this partition above.
+ */
+ if (child_rel1 == NULL || child_rel2 == NULL)
+ {
+ /*
+ * Mark the joinrel as unpartitioned so that later functions treat
+ * it correctly.
+ */
+ joinrel->nparts = 0;
+ return;
+ }
+
+ /*
+ * If a leaf relation has consider_partitionwise_join=false, it means
+ * that it's a dummy relation for which we skipped setting up tlist
+ * expressions and adding EC members in set_append_rel_size(), so
+ * again we have to fail here.
+ */
+ if (rel1_is_simple && !child_rel1->consider_partitionwise_join)
+ {
+ Assert(child_rel1->reloptkind == RELOPT_OTHER_MEMBER_REL);
+ Assert(IS_DUMMY_REL(child_rel1));
+ joinrel->nparts = 0;
+ return;
+ }
+ if (rel2_is_simple && !child_rel2->consider_partitionwise_join)
+ {
+ Assert(child_rel2->reloptkind == RELOPT_OTHER_MEMBER_REL);
+ Assert(IS_DUMMY_REL(child_rel2));
+ joinrel->nparts = 0;
+ return;
+ }
+
+ /* We should never try to join two overlapping sets of rels. */
+ Assert(!bms_overlap(child_rel1->relids, child_rel2->relids));
+ child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids);
+ appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos);
+
+ /*
+ * Construct SpecialJoinInfo from parent join relations's
+ * SpecialJoinInfo.
+ */
+ child_sjinfo = build_child_join_sjinfo(root, parent_sjinfo,
+ child_rel1->relids,
+ child_rel2->relids);
+
+ /*
+ * Construct restrictions applicable to the child join from those
+ * applicable to the parent join.
+ */
+ child_restrictlist =
+ (List *) adjust_appendrel_attrs(root,
+ (Node *) parent_restrictlist,
+ nappinfos, appinfos);
+ pfree(appinfos);
+
+ child_joinrel = joinrel->part_rels[cnt_parts];
+ if (!child_joinrel)
+ {
+ child_joinrel = build_child_join_rel(root, child_rel1, child_rel2,
+ joinrel, child_restrictlist,
+ child_sjinfo,
+ child_sjinfo->jointype);
+ joinrel->part_rels[cnt_parts] = child_joinrel;
+ joinrel->live_parts = bms_add_member(joinrel->live_parts, cnt_parts);
+ joinrel->all_partrels = bms_add_members(joinrel->all_partrels,
+ child_joinrel->relids);
+ }
+
+ Assert(bms_equal(child_joinrel->relids, child_joinrelids));
+
+ populate_joinrel_with_paths(root, child_rel1, child_rel2,
+ child_joinrel, child_sjinfo,
+ child_restrictlist);
+ }
+}
+
+/*
+ * Construct the SpecialJoinInfo for a child-join by translating
+ * SpecialJoinInfo for the join between parents. left_relids and right_relids
+ * are the relids of left and right side of the join respectively.
+ */
+static SpecialJoinInfo *
+build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo,
+ Relids left_relids, Relids right_relids)
+{
+ SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
+ AppendRelInfo **left_appinfos;
+ int left_nappinfos;
+ AppendRelInfo **right_appinfos;
+ int right_nappinfos;
+
+ memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo));
+ left_appinfos = find_appinfos_by_relids(root, left_relids,
+ &left_nappinfos);
+ right_appinfos = find_appinfos_by_relids(root, right_relids,
+ &right_nappinfos);
+
+ sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand,
+ left_nappinfos, left_appinfos);
+ sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand,
+ right_nappinfos,
+ right_appinfos);
+ sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand,
+ left_nappinfos, left_appinfos);
+ sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand,
+ right_nappinfos,
+ right_appinfos);
+ sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root,
+ (Node *) sjinfo->semi_rhs_exprs,
+ right_nappinfos,
+ right_appinfos);
+
+ pfree(left_appinfos);
+ pfree(right_appinfos);
+
+ return sjinfo;
+}
+
+/*
+ * compute_partition_bounds
+ * Compute the partition bounds for a join rel from those for inputs
+ */
+static void
+compute_partition_bounds(PlannerInfo *root, RelOptInfo *rel1,
+ RelOptInfo *rel2, RelOptInfo *joinrel,
+ SpecialJoinInfo *parent_sjinfo,
+ List **parts1, List **parts2)
+{
+ /*
+ * If we don't have the partition bounds for the join rel yet, try to
+ * compute those along with pairs of partitions to be joined.
+ */
+ if (joinrel->nparts == -1)
+ {
+ PartitionScheme part_scheme = joinrel->part_scheme;
+ PartitionBoundInfo boundinfo = NULL;
+ int nparts = 0;
+
+ Assert(joinrel->boundinfo == NULL);
+ Assert(joinrel->part_rels == NULL);
+
+ /*
+ * See if the partition bounds for inputs are exactly the same, in
+ * which case we don't need to work hard: the join rel will have the
+ * same partition bounds as inputs, and the partitions with the same
+ * cardinal positions will form the pairs.
+ *
+ * Note: even in cases where one or both inputs have merged bounds, it
+ * would be possible for both the bounds to be exactly the same, but
+ * it seems unlikely to be worth the cycles to check.
+ */
+ if (!rel1->partbounds_merged &&
+ !rel2->partbounds_merged &&
+ rel1->nparts == rel2->nparts &&
+ partition_bounds_equal(part_scheme->partnatts,
+ part_scheme->parttyplen,
+ part_scheme->parttypbyval,
+ rel1->boundinfo, rel2->boundinfo))
+ {
+ boundinfo = rel1->boundinfo;
+ nparts = rel1->nparts;
+ }
+ else
+ {
+ /* Try merging the partition bounds for inputs. */
+ boundinfo = partition_bounds_merge(part_scheme->partnatts,
+ part_scheme->partsupfunc,
+ part_scheme->partcollation,
+ rel1, rel2,
+ parent_sjinfo->jointype,
+ parts1, parts2);
+ if (boundinfo == NULL)
+ {
+ joinrel->nparts = 0;
+ return;
+ }
+ nparts = list_length(*parts1);
+ joinrel->partbounds_merged = true;
+ }
+
+ Assert(nparts > 0);
+ joinrel->boundinfo = boundinfo;
+ joinrel->nparts = nparts;
+ joinrel->part_rels =
+ (RelOptInfo **) palloc0(sizeof(RelOptInfo *) * nparts);
+ }
+ else
+ {
+ Assert(joinrel->nparts > 0);
+ Assert(joinrel->boundinfo);
+ Assert(joinrel->part_rels);
+
+ /*
+ * If the join rel's partbounds_merged flag is true, it means inputs
+ * are not guaranteed to have the same partition bounds, therefore we
+ * can't assume that the partitions at the same cardinal positions
+ * form the pairs; let get_matching_part_pairs() generate the pairs.
+ * Otherwise, nothing to do since we can assume that.
+ */
+ if (joinrel->partbounds_merged)
+ {
+ get_matching_part_pairs(root, joinrel, rel1, rel2,
+ parts1, parts2);
+ Assert(list_length(*parts1) == joinrel->nparts);
+ Assert(list_length(*parts2) == joinrel->nparts);
+ }
+ }
+}
+
+/*
+ * get_matching_part_pairs
+ * Generate pairs of partitions to be joined from inputs
+ */
+static void
+get_matching_part_pairs(PlannerInfo *root, RelOptInfo *joinrel,
+ RelOptInfo *rel1, RelOptInfo *rel2,
+ List **parts1, List **parts2)
+{
+ bool rel1_is_simple = IS_SIMPLE_REL(rel1);
+ bool rel2_is_simple = IS_SIMPLE_REL(rel2);
+ int cnt_parts;
+
+ *parts1 = NIL;
+ *parts2 = NIL;
+
+ for (cnt_parts = 0; cnt_parts < joinrel->nparts; cnt_parts++)
+ {
+ RelOptInfo *child_joinrel = joinrel->part_rels[cnt_parts];
+ RelOptInfo *child_rel1;
+ RelOptInfo *child_rel2;
+ Relids child_relids1;
+ Relids child_relids2;
+
+ /*
+ * If this segment of the join is empty, it means that this segment
+ * was ignored when previously creating child-join paths for it in
+ * try_partitionwise_join() as it would not contribute to the join
+ * result, due to one or both inputs being empty; add NULL to each of
+ * the given lists so that this segment will be ignored again in that
+ * function.
+ */
+ if (!child_joinrel)
+ {
+ *parts1 = lappend(*parts1, NULL);
+ *parts2 = lappend(*parts2, NULL);
+ continue;
+ }
+
+ /*
+ * Get a relids set of partition(s) involved in this join segment that
+ * are from the rel1 side.
+ */
+ child_relids1 = bms_intersect(child_joinrel->relids,
+ rel1->all_partrels);
+ Assert(bms_num_members(child_relids1) == bms_num_members(rel1->relids));
+
+ /*
+ * Get a child rel for rel1 with the relids. Note that we should have
+ * the child rel even if rel1 is a join rel, because in that case the
+ * partitions specified in the relids would have matching/overlapping
+ * boundaries, so the specified partitions should be considered as
+ * ones to be joined when planning partitionwise joins of rel1,
+ * meaning that the child rel would have been built by the time we get
+ * here.
+ */
+ if (rel1_is_simple)
+ {
+ int varno = bms_singleton_member(child_relids1);
+
+ child_rel1 = find_base_rel(root, varno);
+ }
+ else
+ child_rel1 = find_join_rel(root, child_relids1);
+ Assert(child_rel1);
+
+ /*
+ * Get a relids set of partition(s) involved in this join segment that
+ * are from the rel2 side.
+ */
+ child_relids2 = bms_intersect(child_joinrel->relids,
+ rel2->all_partrels);
+ Assert(bms_num_members(child_relids2) == bms_num_members(rel2->relids));
+
+ /*
+ * Get a child rel for rel2 with the relids. See above comments.
+ */
+ if (rel2_is_simple)
+ {
+ int varno = bms_singleton_member(child_relids2);
+
+ child_rel2 = find_base_rel(root, varno);
+ }
+ else
+ child_rel2 = find_join_rel(root, child_relids2);
+ Assert(child_rel2);
+
+ /*
+ * The join of rel1 and rel2 is legal, so is the join of the child
+ * rels obtained above; add them to the given lists as a join pair
+ * producing this join segment.
+ */
+ *parts1 = lappend(*parts1, child_rel1);
+ *parts2 = lappend(*parts2, child_rel2);
+ }
+}
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
new file mode 100644
index 0000000..86a35cd
--- /dev/null
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -0,0 +1,1917 @@
+/*-------------------------------------------------------------------------
+ *
+ * pathkeys.c
+ * Utilities for matching and building path keys
+ *
+ * See src/backend/optimizer/README for a great deal of information about
+ * the nature and use of path keys.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/path/pathkeys.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/stratnum.h"
+#include "catalog/pg_opfamily.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/plannodes.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "partitioning/partbounds.h"
+#include "utils/lsyscache.h"
+
+
+static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
+static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
+ RelOptInfo *partrel,
+ int partkeycol);
+static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle);
+static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
+
+
+/****************************************************************************
+ * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
+ ****************************************************************************/
+
+/*
+ * make_canonical_pathkey
+ * Given the parameters for a PathKey, find any pre-existing matching
+ * pathkey in the query's list of "canonical" pathkeys. Make a new
+ * entry if there's not one already.
+ *
+ * Note that this function must not be used until after we have completed
+ * merging EquivalenceClasses.
+ */
+PathKey *
+make_canonical_pathkey(PlannerInfo *root,
+ EquivalenceClass *eclass, Oid opfamily,
+ int strategy, bool nulls_first)
+{
+ PathKey *pk;
+ ListCell *lc;
+ MemoryContext oldcontext;
+
+ /* Can't make canonical pathkeys if the set of ECs might still change */
+ if (!root->ec_merging_done)
+ elog(ERROR, "too soon to build canonical pathkeys");
+
+ /* The passed eclass might be non-canonical, so chase up to the top */
+ while (eclass->ec_merged)
+ eclass = eclass->ec_merged;
+
+ foreach(lc, root->canon_pathkeys)
+ {
+ pk = (PathKey *) lfirst(lc);
+ if (eclass == pk->pk_eclass &&
+ opfamily == pk->pk_opfamily &&
+ strategy == pk->pk_strategy &&
+ nulls_first == pk->pk_nulls_first)
+ return pk;
+ }
+
+ /*
+ * Be sure canonical pathkeys are allocated in the main planning context.
+ * Not an issue in normal planning, but it is for GEQO.
+ */
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ pk = makeNode(PathKey);
+ pk->pk_eclass = eclass;
+ pk->pk_opfamily = opfamily;
+ pk->pk_strategy = strategy;
+ pk->pk_nulls_first = nulls_first;
+
+ root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return pk;
+}
+
+/*
+ * pathkey_is_redundant
+ * Is a pathkey redundant with one already in the given list?
+ *
+ * We detect two cases:
+ *
+ * 1. If the new pathkey's equivalence class contains a constant, and isn't
+ * below an outer join, then we can disregard it as a sort key. An example:
+ * SELECT ... WHERE x = 42 ORDER BY x, y;
+ * We may as well just sort by y. Note that because of opfamily matching,
+ * this is semantically correct: we know that the equality constraint is one
+ * that actually binds the variable to a single value in the terms of any
+ * ordering operator that might go with the eclass. This rule not only lets
+ * us simplify (or even skip) explicit sorts, but also allows matching index
+ * sort orders to a query when there are don't-care index columns.
+ *
+ * 2. If the new pathkey's equivalence class is the same as that of any
+ * existing member of the pathkey list, then it is redundant. Some examples:
+ * SELECT ... ORDER BY x, x;
+ * SELECT ... ORDER BY x, x DESC;
+ * SELECT ... WHERE x = y ORDER BY x, y;
+ * In all these cases the second sort key cannot distinguish values that are
+ * considered equal by the first, and so there's no point in using it.
+ * Note in particular that we need not compare opfamily (all the opfamilies
+ * of the EC have the same notion of equality) nor sort direction.
+ *
+ * Both the given pathkey and the list members must be canonical for this
+ * to work properly, but that's okay since we no longer ever construct any
+ * non-canonical pathkeys. (Note: the notion of a pathkey *list* being
+ * canonical includes the additional requirement of no redundant entries,
+ * which is exactly what we are checking for here.)
+ *
+ * Because the equivclass.c machinery forms only one copy of any EC per query,
+ * pointer comparison is enough to decide whether canonical ECs are the same.
+ */
+static bool
+pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
+{
+ EquivalenceClass *new_ec = new_pathkey->pk_eclass;
+ ListCell *lc;
+
+ /* Check for EC containing a constant --- unconditionally redundant */
+ if (EC_MUST_BE_REDUNDANT(new_ec))
+ return true;
+
+ /* If same EC already used in list, then redundant */
+ foreach(lc, pathkeys)
+ {
+ PathKey *old_pathkey = (PathKey *) lfirst(lc);
+
+ if (new_ec == old_pathkey->pk_eclass)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * make_pathkey_from_sortinfo
+ * Given an expression and sort-order information, create a PathKey.
+ * The result is always a "canonical" PathKey, but it might be redundant.
+ *
+ * expr is the expression, and nullable_relids is the set of base relids
+ * that are potentially nullable below it.
+ *
+ * If the PathKey is being generated from a SortGroupClause, sortref should be
+ * the SortGroupClause's SortGroupRef; otherwise zero.
+ *
+ * If rel is not NULL, it identifies a specific relation we're considering
+ * a path for, and indicates that child EC members for that relation can be
+ * considered. Otherwise child members are ignored. (See the comments for
+ * get_eclass_for_sort_expr.)
+ *
+ * create_it is true if we should create any missing EquivalenceClass
+ * needed to represent the sort key. If it's false, we return NULL if the
+ * sort key isn't already present in any EquivalenceClass.
+ */
+static PathKey *
+make_pathkey_from_sortinfo(PlannerInfo *root,
+ Expr *expr,
+ Relids nullable_relids,
+ Oid opfamily,
+ Oid opcintype,
+ Oid collation,
+ bool reverse_sort,
+ bool nulls_first,
+ Index sortref,
+ Relids rel,
+ bool create_it)
+{
+ int16 strategy;
+ Oid equality_op;
+ List *opfamilies;
+ EquivalenceClass *eclass;
+
+ strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber;
+
+ /*
+ * EquivalenceClasses need to contain opfamily lists based on the family
+ * membership of mergejoinable equality operators, which could belong to
+ * more than one opfamily. So we have to look up the opfamily's equality
+ * operator and get its membership.
+ */
+ equality_op = get_opfamily_member(opfamily,
+ opcintype,
+ opcintype,
+ BTEqualStrategyNumber);
+ if (!OidIsValid(equality_op)) /* shouldn't happen */
+ elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+ BTEqualStrategyNumber, opcintype, opcintype, opfamily);
+ opfamilies = get_mergejoin_opfamilies(equality_op);
+ if (!opfamilies) /* certainly should find some */
+ elog(ERROR, "could not find opfamilies for equality operator %u",
+ equality_op);
+
+ /* Now find or (optionally) create a matching EquivalenceClass */
+ eclass = get_eclass_for_sort_expr(root, expr, nullable_relids,
+ opfamilies, opcintype, collation,
+ sortref, rel, create_it);
+
+ /* Fail if no EC and !create_it */
+ if (!eclass)
+ return NULL;
+
+ /* And finally we can find or create a PathKey node */
+ return make_canonical_pathkey(root, eclass, opfamily,
+ strategy, nulls_first);
+}
+
+/*
+ * make_pathkey_from_sortop
+ * Like make_pathkey_from_sortinfo, but work from a sort operator.
+ *
+ * This should eventually go away, but we need to restructure SortGroupClause
+ * first.
+ */
+static PathKey *
+make_pathkey_from_sortop(PlannerInfo *root,
+ Expr *expr,
+ Relids nullable_relids,
+ Oid ordering_op,
+ bool nulls_first,
+ Index sortref,
+ bool create_it)
+{
+ Oid opfamily,
+ opcintype,
+ collation;
+ int16 strategy;
+
+ /* Find the operator in pg_amop --- failure shouldn't happen */
+ if (!get_ordering_op_properties(ordering_op,
+ &opfamily, &opcintype, &strategy))
+ elog(ERROR, "operator %u is not a valid ordering operator",
+ ordering_op);
+
+ /* Because SortGroupClause doesn't carry collation, consult the expr */
+ collation = exprCollation((Node *) expr);
+
+ return make_pathkey_from_sortinfo(root,
+ expr,
+ nullable_relids,
+ opfamily,
+ opcintype,
+ collation,
+ (strategy == BTGreaterStrategyNumber),
+ nulls_first,
+ sortref,
+ NULL,
+ create_it);
+}
+
+
+/****************************************************************************
+ * PATHKEY COMPARISONS
+ ****************************************************************************/
+
+/*
+ * compare_pathkeys
+ * Compare two pathkeys to see if they are equivalent, and if not whether
+ * one is "better" than the other.
+ *
+ * We assume the pathkeys are canonical, and so they can be checked for
+ * equality by simple pointer comparison.
+ */
+PathKeysComparison
+compare_pathkeys(List *keys1, List *keys2)
+{
+ ListCell *key1,
+ *key2;
+
+ /*
+ * Fall out quickly if we are passed two identical lists. This mostly
+ * catches the case where both are NIL, but that's common enough to
+ * warrant the test.
+ */
+ if (keys1 == keys2)
+ return PATHKEYS_EQUAL;
+
+ forboth(key1, keys1, key2, keys2)
+ {
+ PathKey *pathkey1 = (PathKey *) lfirst(key1);
+ PathKey *pathkey2 = (PathKey *) lfirst(key2);
+
+ if (pathkey1 != pathkey2)
+ return PATHKEYS_DIFFERENT; /* no need to keep looking */
+ }
+
+ /*
+ * If we reached the end of only one list, the other is longer and
+ * therefore not a subset.
+ */
+ if (key1 != NULL)
+ return PATHKEYS_BETTER1; /* key1 is longer */
+ if (key2 != NULL)
+ return PATHKEYS_BETTER2; /* key2 is longer */
+ return PATHKEYS_EQUAL;
+}
+
+/*
+ * pathkeys_contained_in
+ * Common special case of compare_pathkeys: we just want to know
+ * if keys2 are at least as well sorted as keys1.
+ */
+bool
+pathkeys_contained_in(List *keys1, List *keys2)
+{
+ switch (compare_pathkeys(keys1, keys2))
+ {
+ case PATHKEYS_EQUAL:
+ case PATHKEYS_BETTER2:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+/*
+ * pathkeys_count_contained_in
+ * Same as pathkeys_contained_in, but also sets length of longest
+ * common prefix of keys1 and keys2.
+ */
+bool
+pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
+{
+ int n = 0;
+ ListCell *key1,
+ *key2;
+
+ /*
+ * See if we can avoiding looping through both lists. This optimization
+ * gains us several percent in planning time in a worst-case test.
+ */
+ if (keys1 == keys2)
+ {
+ *n_common = list_length(keys1);
+ return true;
+ }
+ else if (keys1 == NIL)
+ {
+ *n_common = 0;
+ return true;
+ }
+ else if (keys2 == NIL)
+ {
+ *n_common = 0;
+ return false;
+ }
+
+ /*
+ * If both lists are non-empty, iterate through both to find out how many
+ * items are shared.
+ */
+ forboth(key1, keys1, key2, keys2)
+ {
+ PathKey *pathkey1 = (PathKey *) lfirst(key1);
+ PathKey *pathkey2 = (PathKey *) lfirst(key2);
+
+ if (pathkey1 != pathkey2)
+ {
+ *n_common = n;
+ return false;
+ }
+ n++;
+ }
+
+ /* If we ended with a null value, then we've processed the whole list. */
+ *n_common = n;
+ return (key1 == NULL);
+}
+
+/*
+ * get_cheapest_path_for_pathkeys
+ * Find the cheapest path (according to the specified criterion) that
+ * satisfies the given pathkeys and parameterization.
+ * Return NULL if no such path.
+ *
+ * 'paths' is a list of possible paths that all generate the same relation
+ * 'pathkeys' represents a required ordering (in canonical form!)
+ * 'required_outer' denotes allowable outer relations for parameterized paths
+ * 'cost_criterion' is STARTUP_COST or TOTAL_COST
+ * 'require_parallel_safe' causes us to consider only parallel-safe paths
+ */
+Path *
+get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
+ Relids required_outer,
+ CostSelector cost_criterion,
+ bool require_parallel_safe)
+{
+ Path *matched_path = NULL;
+ ListCell *l;
+
+ foreach(l, paths)
+ {
+ Path *path = (Path *) lfirst(l);
+
+ /*
+ * Since cost comparison is a lot cheaper than pathkey comparison, do
+ * that first. (XXX is that still true?)
+ */
+ if (matched_path != NULL &&
+ compare_path_costs(matched_path, path, cost_criterion) <= 0)
+ continue;
+
+ if (require_parallel_safe && !path->parallel_safe)
+ continue;
+
+ if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+ bms_is_subset(PATH_REQ_OUTER(path), required_outer))
+ matched_path = path;
+ }
+ return matched_path;
+}
+
+/*
+ * get_cheapest_fractional_path_for_pathkeys
+ * Find the cheapest path (for retrieving a specified fraction of all
+ * the tuples) that satisfies the given pathkeys and parameterization.
+ * Return NULL if no such path.
+ *
+ * See compare_fractional_path_costs() for the interpretation of the fraction
+ * parameter.
+ *
+ * 'paths' is a list of possible paths that all generate the same relation
+ * 'pathkeys' represents a required ordering (in canonical form!)
+ * 'required_outer' denotes allowable outer relations for parameterized paths
+ * 'fraction' is the fraction of the total tuples expected to be retrieved
+ */
+Path *
+get_cheapest_fractional_path_for_pathkeys(List *paths,
+ List *pathkeys,
+ Relids required_outer,
+ double fraction)
+{
+ Path *matched_path = NULL;
+ ListCell *l;
+
+ foreach(l, paths)
+ {
+ Path *path = (Path *) lfirst(l);
+
+ /*
+ * Since cost comparison is a lot cheaper than pathkey comparison, do
+ * that first. (XXX is that still true?)
+ */
+ if (matched_path != NULL &&
+ compare_fractional_path_costs(matched_path, path, fraction) <= 0)
+ continue;
+
+ if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+ bms_is_subset(PATH_REQ_OUTER(path), required_outer))
+ matched_path = path;
+ }
+ return matched_path;
+}
+
+
+/*
+ * get_cheapest_parallel_safe_total_inner
+ * Find the unparameterized parallel-safe path with the least total cost.
+ */
+Path *
+get_cheapest_parallel_safe_total_inner(List *paths)
+{
+ ListCell *l;
+
+ foreach(l, paths)
+ {
+ Path *innerpath = (Path *) lfirst(l);
+
+ if (innerpath->parallel_safe &&
+ bms_is_empty(PATH_REQ_OUTER(innerpath)))
+ return innerpath;
+ }
+
+ return NULL;
+}
+
+/****************************************************************************
+ * NEW PATHKEY FORMATION
+ ****************************************************************************/
+
+/*
+ * build_index_pathkeys
+ * Build a pathkeys list that describes the ordering induced by an index
+ * scan using the given index. (Note that an unordered index doesn't
+ * induce any ordering, so we return NIL.)
+ *
+ * If 'scandir' is BackwardScanDirection, build pathkeys representing a
+ * backwards scan of the index.
+ *
+ * We iterate only key columns of covering indexes, since non-key columns
+ * don't influence index ordering. The result is canonical, meaning that
+ * redundant pathkeys are removed; it may therefore have fewer entries than
+ * there are key columns in the index.
+ *
+ * Another reason for stopping early is that we may be able to tell that
+ * an index column's sort order is uninteresting for this query. However,
+ * that test is just based on the existence of an EquivalenceClass and not
+ * on position in pathkey lists, so it's not complete. Caller should call
+ * truncate_useless_pathkeys() to possibly remove more pathkeys.
+ */
+List *
+build_index_pathkeys(PlannerInfo *root,
+ IndexOptInfo *index,
+ ScanDirection scandir)
+{
+ List *retval = NIL;
+ ListCell *lc;
+ int i;
+
+ if (index->sortopfamily == NULL)
+ return NIL; /* non-orderable index */
+
+ i = 0;
+ foreach(lc, index->indextlist)
+ {
+ TargetEntry *indextle = (TargetEntry *) lfirst(lc);
+ Expr *indexkey;
+ bool reverse_sort;
+ bool nulls_first;
+ PathKey *cpathkey;
+
+ /*
+ * INCLUDE columns are stored in index unordered, so they don't
+ * support ordered index scan.
+ */
+ if (i >= index->nkeycolumns)
+ break;
+
+ /* We assume we don't need to make a copy of the tlist item */
+ indexkey = indextle->expr;
+
+ if (ScanDirectionIsBackward(scandir))
+ {
+ reverse_sort = !index->reverse_sort[i];
+ nulls_first = !index->nulls_first[i];
+ }
+ else
+ {
+ reverse_sort = index->reverse_sort[i];
+ nulls_first = index->nulls_first[i];
+ }
+
+ /*
+ * OK, try to make a canonical pathkey for this sort key. Note we're
+ * underneath any outer joins, so nullable_relids should be NULL.
+ */
+ cpathkey = make_pathkey_from_sortinfo(root,
+ indexkey,
+ NULL,
+ index->sortopfamily[i],
+ index->opcintype[i],
+ index->indexcollations[i],
+ reverse_sort,
+ nulls_first,
+ 0,
+ index->rel->relids,
+ false);
+
+ if (cpathkey)
+ {
+ /*
+ * We found the sort key in an EquivalenceClass, so it's relevant
+ * for this query. Add it to list, unless it's redundant.
+ */
+ if (!pathkey_is_redundant(cpathkey, retval))
+ retval = lappend(retval, cpathkey);
+ }
+ else
+ {
+ /*
+ * Boolean index keys might be redundant even if they do not
+ * appear in an EquivalenceClass, because of our special treatment
+ * of boolean equality conditions --- see the comment for
+ * indexcol_is_bool_constant_for_query(). If that applies, we can
+ * continue to examine lower-order index columns. Otherwise, the
+ * sort key is not an interesting sort order for this query, so we
+ * should stop considering index columns; any lower-order sort
+ * keys won't be useful either.
+ */
+ if (!indexcol_is_bool_constant_for_query(root, index, i))
+ break;
+ }
+
+ i++;
+ }
+
+ return retval;
+}
+
+/*
+ * partkey_is_bool_constant_for_query
+ *
+ * If a partition key column is constrained to have a constant value by the
+ * query's WHERE conditions, then it's irrelevant for sort-order
+ * considerations. Usually that means we have a restriction clause
+ * WHERE partkeycol = constant, which gets turned into an EquivalenceClass
+ * containing a constant, which is recognized as redundant by
+ * build_partition_pathkeys(). But if the partition key column is a
+ * boolean variable (or expression), then we are not going to see such a
+ * WHERE clause, because expression preprocessing will have simplified it
+ * to "WHERE partkeycol" or "WHERE NOT partkeycol". So we are not going
+ * to have a matching EquivalenceClass (unless the query also contains
+ * "ORDER BY partkeycol"). To allow such cases to work the same as they would
+ * for non-boolean values, this function is provided to detect whether the
+ * specified partition key column matches a boolean restriction clause.
+ */
+static bool
+partkey_is_bool_constant_for_query(RelOptInfo *partrel, int partkeycol)
+{
+ PartitionScheme partscheme = partrel->part_scheme;
+ ListCell *lc;
+
+ /* If the partkey isn't boolean, we can't possibly get a match */
+ if (!IsBooleanOpfamily(partscheme->partopfamily[partkeycol]))
+ return false;
+
+ /* Check each restriction clause for the partitioned rel */
+ foreach(lc, partrel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ /* Ignore pseudoconstant quals, they won't match */
+ if (rinfo->pseudoconstant)
+ continue;
+
+ /* See if we can match the clause's expression to the partkey column */
+ if (matches_boolean_partition_clause(rinfo, partrel, partkeycol))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * matches_boolean_partition_clause
+ * Determine if the boolean clause described by rinfo matches
+ * partrel's partkeycol-th partition key column.
+ *
+ * "Matches" can be either an exact match (equivalent to partkey = true),
+ * or a NOT above an exact match (equivalent to partkey = false).
+ */
+static bool
+matches_boolean_partition_clause(RestrictInfo *rinfo,
+ RelOptInfo *partrel, int partkeycol)
+{
+ Node *clause = (Node *) rinfo->clause;
+ Node *partexpr = (Node *) linitial(partrel->partexprs[partkeycol]);
+
+ /* Direct match? */
+ if (equal(partexpr, clause))
+ return true;
+ /* NOT clause? */
+ else if (is_notclause(clause))
+ {
+ Node *arg = (Node *) get_notclausearg((Expr *) clause);
+
+ if (equal(partexpr, arg))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * build_partition_pathkeys
+ * Build a pathkeys list that describes the ordering induced by the
+ * partitions of partrel, under either forward or backward scan
+ * as per scandir.
+ *
+ * Caller must have checked that the partitions are properly ordered,
+ * as detected by partitions_are_ordered().
+ *
+ * Sets *partialkeys to true if pathkeys were only built for a prefix of the
+ * partition key, or false if the pathkeys include all columns of the
+ * partition key.
+ */
+List *
+build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel,
+ ScanDirection scandir, bool *partialkeys)
+{
+ List *retval = NIL;
+ PartitionScheme partscheme = partrel->part_scheme;
+ int i;
+
+ Assert(partscheme != NULL);
+ Assert(partitions_are_ordered(partrel->boundinfo, partrel->live_parts));
+ /* For now, we can only cope with baserels */
+ Assert(IS_SIMPLE_REL(partrel));
+
+ for (i = 0; i < partscheme->partnatts; i++)
+ {
+ PathKey *cpathkey;
+ Expr *keyCol = (Expr *) linitial(partrel->partexprs[i]);
+
+ /*
+ * Try to make a canonical pathkey for this partkey.
+ *
+ * We're considering a baserel scan, so nullable_relids should be
+ * NULL. Also, we assume the PartitionDesc lists any NULL partition
+ * last, so we treat the scan like a NULLS LAST index: we have
+ * nulls_first for backwards scan only.
+ */
+ cpathkey = make_pathkey_from_sortinfo(root,
+ keyCol,
+ NULL,
+ partscheme->partopfamily[i],
+ partscheme->partopcintype[i],
+ partscheme->partcollation[i],
+ ScanDirectionIsBackward(scandir),
+ ScanDirectionIsBackward(scandir),
+ 0,
+ partrel->relids,
+ false);
+
+
+ if (cpathkey)
+ {
+ /*
+ * We found the sort key in an EquivalenceClass, so it's relevant
+ * for this query. Add it to list, unless it's redundant.
+ */
+ if (!pathkey_is_redundant(cpathkey, retval))
+ retval = lappend(retval, cpathkey);
+ }
+ else
+ {
+ /*
+ * Boolean partition keys might be redundant even if they do not
+ * appear in an EquivalenceClass, because of our special treatment
+ * of boolean equality conditions --- see the comment for
+ * partkey_is_bool_constant_for_query(). If that applies, we can
+ * continue to examine lower-order partition keys. Otherwise, the
+ * sort key is not an interesting sort order for this query, so we
+ * should stop considering partition columns; any lower-order sort
+ * keys won't be useful either.
+ */
+ if (!partkey_is_bool_constant_for_query(partrel, i))
+ {
+ *partialkeys = true;
+ return retval;
+ }
+ }
+ }
+
+ *partialkeys = false;
+ return retval;
+}
+
+/*
+ * build_expression_pathkey
+ * Build a pathkeys list that describes an ordering by a single expression
+ * using the given sort operator.
+ *
+ * expr, nullable_relids, and rel are as for make_pathkey_from_sortinfo.
+ * We induce the other arguments assuming default sort order for the operator.
+ *
+ * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
+ * is false and the expression isn't already in some EquivalenceClass.
+ */
+List *
+build_expression_pathkey(PlannerInfo *root,
+ Expr *expr,
+ Relids nullable_relids,
+ Oid opno,
+ Relids rel,
+ bool create_it)
+{
+ List *pathkeys;
+ Oid opfamily,
+ opcintype;
+ int16 strategy;
+ PathKey *cpathkey;
+
+ /* Find the operator in pg_amop --- failure shouldn't happen */
+ if (!get_ordering_op_properties(opno,
+ &opfamily, &opcintype, &strategy))
+ elog(ERROR, "operator %u is not a valid ordering operator",
+ opno);
+
+ cpathkey = make_pathkey_from_sortinfo(root,
+ expr,
+ nullable_relids,
+ opfamily,
+ opcintype,
+ exprCollation((Node *) expr),
+ (strategy == BTGreaterStrategyNumber),
+ (strategy == BTGreaterStrategyNumber),
+ 0,
+ rel,
+ create_it);
+
+ if (cpathkey)
+ pathkeys = list_make1(cpathkey);
+ else
+ pathkeys = NIL;
+
+ return pathkeys;
+}
+
+/*
+ * convert_subquery_pathkeys
+ * Build a pathkeys list that describes the ordering of a subquery's
+ * result, in the terms of the outer query. This is essentially a
+ * task of conversion.
+ *
+ * 'rel': outer query's RelOptInfo for the subquery relation.
+ * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
+ * 'subquery_tlist': the subquery's output targetlist, in its terms.
+ *
+ * We intentionally don't do truncate_useless_pathkeys() here, because there
+ * are situations where seeing the raw ordering of the subquery is helpful.
+ * For example, if it returns ORDER BY x DESC, that may prompt us to
+ * construct a mergejoin using DESC order rather than ASC order; but the
+ * right_merge_direction heuristic would have us throw the knowledge away.
+ */
+List *
+convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
+ List *subquery_pathkeys,
+ List *subquery_tlist)
+{
+ List *retval = NIL;
+ int retvallen = 0;
+ int outer_query_keys = list_length(root->query_pathkeys);
+ ListCell *i;
+
+ foreach(i, subquery_pathkeys)
+ {
+ PathKey *sub_pathkey = (PathKey *) lfirst(i);
+ EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
+ PathKey *best_pathkey = NULL;
+
+ if (sub_eclass->ec_has_volatile)
+ {
+ /*
+ * If the sub_pathkey's EquivalenceClass is volatile, then it must
+ * have come from an ORDER BY clause, and we have to match it to
+ * that same targetlist entry.
+ */
+ TargetEntry *tle;
+ Var *outer_var;
+
+ if (sub_eclass->ec_sortref == 0) /* can't happen */
+ elog(ERROR, "volatile EquivalenceClass has no sortref");
+ tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
+ Assert(tle);
+ /* Is TLE actually available to the outer query? */
+ outer_var = find_var_for_subquery_tle(rel, tle);
+ if (outer_var)
+ {
+ /* We can represent this sub_pathkey */
+ EquivalenceMember *sub_member;
+ EquivalenceClass *outer_ec;
+
+ Assert(list_length(sub_eclass->ec_members) == 1);
+ sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
+
+ /*
+ * Note: it might look funny to be setting sortref = 0 for a
+ * reference to a volatile sub_eclass. However, the
+ * expression is *not* volatile in the outer query: it's just
+ * a Var referencing whatever the subquery emitted. (IOW, the
+ * outer query isn't going to re-execute the volatile
+ * expression itself.) So this is okay. Likewise, it's
+ * correct to pass nullable_relids = NULL, because we're
+ * underneath any outer joins appearing in the outer query.
+ */
+ outer_ec =
+ get_eclass_for_sort_expr(root,
+ (Expr *) outer_var,
+ NULL,
+ sub_eclass->ec_opfamilies,
+ sub_member->em_datatype,
+ sub_eclass->ec_collation,
+ 0,
+ rel->relids,
+ false);
+
+ /*
+ * If we don't find a matching EC, sub-pathkey isn't
+ * interesting to the outer query
+ */
+ if (outer_ec)
+ best_pathkey =
+ make_canonical_pathkey(root,
+ outer_ec,
+ sub_pathkey->pk_opfamily,
+ sub_pathkey->pk_strategy,
+ sub_pathkey->pk_nulls_first);
+ }
+ }
+ else
+ {
+ /*
+ * Otherwise, the sub_pathkey's EquivalenceClass could contain
+ * multiple elements (representing knowledge that multiple items
+ * are effectively equal). Each element might match none, one, or
+ * more of the output columns that are visible to the outer query.
+ * This means we may have multiple possible representations of the
+ * sub_pathkey in the context of the outer query. Ideally we
+ * would generate them all and put them all into an EC of the
+ * outer query, thereby propagating equality knowledge up to the
+ * outer query. Right now we cannot do so, because the outer
+ * query's EquivalenceClasses are already frozen when this is
+ * called. Instead we prefer the one that has the highest "score"
+ * (number of EC peers, plus one if it matches the outer
+ * query_pathkeys). This is the most likely to be useful in the
+ * outer query.
+ */
+ int best_score = -1;
+ ListCell *j;
+
+ foreach(j, sub_eclass->ec_members)
+ {
+ EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
+ Expr *sub_expr = sub_member->em_expr;
+ Oid sub_expr_type = sub_member->em_datatype;
+ Oid sub_expr_coll = sub_eclass->ec_collation;
+ ListCell *k;
+
+ if (sub_member->em_is_child)
+ continue; /* ignore children here */
+
+ foreach(k, subquery_tlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(k);
+ Var *outer_var;
+ Expr *tle_expr;
+ EquivalenceClass *outer_ec;
+ PathKey *outer_pk;
+ int score;
+
+ /* Is TLE actually available to the outer query? */
+ outer_var = find_var_for_subquery_tle(rel, tle);
+ if (!outer_var)
+ continue;
+
+ /*
+ * The targetlist entry is considered to match if it
+ * matches after sort-key canonicalization. That is
+ * needed since the sub_expr has been through the same
+ * process.
+ */
+ tle_expr = canonicalize_ec_expression(tle->expr,
+ sub_expr_type,
+ sub_expr_coll);
+ if (!equal(tle_expr, sub_expr))
+ continue;
+
+ /* See if we have a matching EC for the TLE */
+ outer_ec = get_eclass_for_sort_expr(root,
+ (Expr *) outer_var,
+ NULL,
+ sub_eclass->ec_opfamilies,
+ sub_expr_type,
+ sub_expr_coll,
+ 0,
+ rel->relids,
+ false);
+
+ /*
+ * If we don't find a matching EC, this sub-pathkey isn't
+ * interesting to the outer query
+ */
+ if (!outer_ec)
+ continue;
+
+ outer_pk = make_canonical_pathkey(root,
+ outer_ec,
+ sub_pathkey->pk_opfamily,
+ sub_pathkey->pk_strategy,
+ sub_pathkey->pk_nulls_first);
+ /* score = # of equivalence peers */
+ score = list_length(outer_ec->ec_members) - 1;
+ /* +1 if it matches the proper query_pathkeys item */
+ if (retvallen < outer_query_keys &&
+ list_nth(root->query_pathkeys, retvallen) == outer_pk)
+ score++;
+ if (score > best_score)
+ {
+ best_pathkey = outer_pk;
+ best_score = score;
+ }
+ }
+ }
+ }
+
+ /*
+ * If we couldn't find a representation of this sub_pathkey, we're
+ * done (we can't use the ones to its right, either).
+ */
+ if (!best_pathkey)
+ break;
+
+ /*
+ * Eliminate redundant ordering info; could happen if outer query
+ * equivalences subquery keys...
+ */
+ if (!pathkey_is_redundant(best_pathkey, retval))
+ {
+ retval = lappend(retval, best_pathkey);
+ retvallen++;
+ }
+ }
+
+ return retval;
+}
+
+/*
+ * find_var_for_subquery_tle
+ *
+ * If the given subquery tlist entry is due to be emitted by the subquery's
+ * scan node, return a Var for it, else return NULL.
+ *
+ * We need this to ensure that we don't return pathkeys describing values
+ * that are unavailable above the level of the subquery scan.
+ */
+static Var *
+find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
+{
+ ListCell *lc;
+
+ /* If the TLE is resjunk, it's certainly not visible to the outer query */
+ if (tle->resjunk)
+ return NULL;
+
+ /* Search the rel's targetlist to see what it will return */
+ foreach(lc, rel->reltarget->exprs)
+ {
+ Var *var = (Var *) lfirst(lc);
+
+ /* Ignore placeholders */
+ if (!IsA(var, Var))
+ continue;
+ Assert(var->varno == rel->relid);
+
+ /* If we find a Var referencing this TLE, we're good */
+ if (var->varattno == tle->resno)
+ return copyObject(var); /* Make a copy for safety */
+ }
+ return NULL;
+}
+
+/*
+ * build_join_pathkeys
+ * Build the path keys for a join relation constructed by mergejoin or
+ * nestloop join. This is normally the same as the outer path's keys.
+ *
+ * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
+ * having the outer path's path keys, because null lefthand rows may be
+ * inserted at random points. It must be treated as unsorted.
+ *
+ * We truncate away any pathkeys that are uninteresting for higher joins.
+ *
+ * 'joinrel' is the join relation that paths are being formed for
+ * 'jointype' is the join type (inner, left, full, etc)
+ * 'outer_pathkeys' is the list of the current outer path's path keys
+ *
+ * Returns the list of new path keys.
+ */
+List *
+build_join_pathkeys(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ JoinType jointype,
+ List *outer_pathkeys)
+{
+ if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
+ return NIL;
+
+ /*
+ * This used to be quite a complex bit of code, but now that all pathkey
+ * sublists start out life canonicalized, we don't have to do a darn thing
+ * here!
+ *
+ * We do, however, need to truncate the pathkeys list, since it may
+ * contain pathkeys that were useful for forming this joinrel but are
+ * uninteresting to higher levels.
+ */
+ return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
+}
+
+/****************************************************************************
+ * PATHKEYS AND SORT CLAUSES
+ ****************************************************************************/
+
+/*
+ * make_pathkeys_for_sortclauses
+ * Generate a pathkeys list that represents the sort order specified
+ * by a list of SortGroupClauses
+ *
+ * The resulting PathKeys are always in canonical form. (Actually, there
+ * is no longer any code anywhere that creates non-canonical PathKeys.)
+ *
+ * We assume that root->nullable_baserels is the set of base relids that could
+ * have gone to NULL below the SortGroupClause expressions. This is okay if
+ * the expressions came from the query's top level (ORDER BY, DISTINCT, etc)
+ * and if this function is only invoked after deconstruct_jointree. In the
+ * future we might have to make callers pass in the appropriate
+ * nullable-relids set, but for now it seems unnecessary.
+ *
+ * 'sortclauses' is a list of SortGroupClause nodes
+ * 'tlist' is the targetlist to find the referenced tlist entries in
+ */
+List *
+make_pathkeys_for_sortclauses(PlannerInfo *root,
+ List *sortclauses,
+ List *tlist)
+{
+ List *pathkeys = NIL;
+ ListCell *l;
+
+ foreach(l, sortclauses)
+ {
+ SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
+ Expr *sortkey;
+ PathKey *pathkey;
+
+ sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
+ Assert(OidIsValid(sortcl->sortop));
+ pathkey = make_pathkey_from_sortop(root,
+ sortkey,
+ root->nullable_baserels,
+ sortcl->sortop,
+ sortcl->nulls_first,
+ sortcl->tleSortGroupRef,
+ true);
+
+ /* Canonical form eliminates redundant ordering keys */
+ if (!pathkey_is_redundant(pathkey, pathkeys))
+ pathkeys = lappend(pathkeys, pathkey);
+ }
+ return pathkeys;
+}
+
+/****************************************************************************
+ * PATHKEYS AND MERGECLAUSES
+ ****************************************************************************/
+
+/*
+ * initialize_mergeclause_eclasses
+ * Set the EquivalenceClass links in a mergeclause restrictinfo.
+ *
+ * RestrictInfo contains fields in which we may cache pointers to
+ * EquivalenceClasses for the left and right inputs of the mergeclause.
+ * (If the mergeclause is a true equivalence clause these will be the
+ * same EquivalenceClass, otherwise not.) If the mergeclause is either
+ * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
+ * then it's easy to set up the left_ec and right_ec members --- otherwise,
+ * this function should be called to set them up. We will generate new
+ * EquivalenceClauses if necessary to represent the mergeclause's left and
+ * right sides.
+ *
+ * Note this is called before EC merging is complete, so the links won't
+ * necessarily point to canonical ECs. Before they are actually used for
+ * anything, update_mergeclause_eclasses must be called to ensure that
+ * they've been updated to point to canonical ECs.
+ */
+void
+initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
+{
+ Expr *clause = restrictinfo->clause;
+ Oid lefttype,
+ righttype;
+
+ /* Should be a mergeclause ... */
+ Assert(restrictinfo->mergeopfamilies != NIL);
+ /* ... with links not yet set */
+ Assert(restrictinfo->left_ec == NULL);
+ Assert(restrictinfo->right_ec == NULL);
+
+ /* Need the declared input types of the operator */
+ op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
+
+ /* Find or create a matching EquivalenceClass for each side */
+ restrictinfo->left_ec =
+ get_eclass_for_sort_expr(root,
+ (Expr *) get_leftop(clause),
+ restrictinfo->nullable_relids,
+ restrictinfo->mergeopfamilies,
+ lefttype,
+ ((OpExpr *) clause)->inputcollid,
+ 0,
+ NULL,
+ true);
+ restrictinfo->right_ec =
+ get_eclass_for_sort_expr(root,
+ (Expr *) get_rightop(clause),
+ restrictinfo->nullable_relids,
+ restrictinfo->mergeopfamilies,
+ righttype,
+ ((OpExpr *) clause)->inputcollid,
+ 0,
+ NULL,
+ true);
+}
+
+/*
+ * update_mergeclause_eclasses
+ * Make the cached EquivalenceClass links valid in a mergeclause
+ * restrictinfo.
+ *
+ * These pointers should have been set by process_equivalence or
+ * initialize_mergeclause_eclasses, but they might have been set to
+ * non-canonical ECs that got merged later. Chase up to the canonical
+ * merged parent if so.
+ */
+void
+update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
+{
+ /* Should be a merge clause ... */
+ Assert(restrictinfo->mergeopfamilies != NIL);
+ /* ... with pointers already set */
+ Assert(restrictinfo->left_ec != NULL);
+ Assert(restrictinfo->right_ec != NULL);
+
+ /* Chase up to the top as needed */
+ while (restrictinfo->left_ec->ec_merged)
+ restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
+ while (restrictinfo->right_ec->ec_merged)
+ restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
+}
+
+/*
+ * find_mergeclauses_for_outer_pathkeys
+ * This routine attempts to find a list of mergeclauses that can be
+ * used with a specified ordering for the join's outer relation.
+ * If successful, it returns a list of mergeclauses.
+ *
+ * 'pathkeys' is a pathkeys list showing the ordering of an outer-rel path.
+ * 'restrictinfos' is a list of mergejoinable restriction clauses for the
+ * join relation being formed, in no particular order.
+ *
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path. (See
+ * select_mergejoin_clauses())
+ *
+ * The result is NIL if no merge can be done, else a maximal list of
+ * usable mergeclauses (represented as a list of their restrictinfo nodes).
+ * The list is ordered to match the pathkeys, as required for execution.
+ */
+List *
+find_mergeclauses_for_outer_pathkeys(PlannerInfo *root,
+ List *pathkeys,
+ List *restrictinfos)
+{
+ List *mergeclauses = NIL;
+ ListCell *i;
+
+ /* make sure we have eclasses cached in the clauses */
+ foreach(i, restrictinfos)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
+
+ update_mergeclause_eclasses(root, rinfo);
+ }
+
+ foreach(i, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(i);
+ EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
+ List *matched_restrictinfos = NIL;
+ ListCell *j;
+
+ /*----------
+ * A mergejoin clause matches a pathkey if it has the same EC.
+ * If there are multiple matching clauses, take them all. In plain
+ * inner-join scenarios we expect only one match, because
+ * equivalence-class processing will have removed any redundant
+ * mergeclauses. However, in outer-join scenarios there might be
+ * multiple matches. An example is
+ *
+ * select * from a full join b
+ * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
+ *
+ * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
+ * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
+ * we *must* do so or we will be unable to form a valid plan.
+ *
+ * We expect that the given pathkeys list is canonical, which means
+ * no two members have the same EC, so it's not possible for this
+ * code to enter the same mergeclause into the result list twice.
+ *
+ * It's possible that multiple matching clauses might have different
+ * ECs on the other side, in which case the order we put them into our
+ * result makes a difference in the pathkeys required for the inner
+ * input rel. However this routine hasn't got any info about which
+ * order would be best, so we don't worry about that.
+ *
+ * It's also possible that the selected mergejoin clauses produce
+ * a noncanonical ordering of pathkeys for the inner side, ie, we
+ * might select clauses that reference b.v1, b.v2, b.v1 in that
+ * order. This is not harmful in itself, though it suggests that
+ * the clauses are partially redundant. Since the alternative is
+ * to omit mergejoin clauses and thereby possibly fail to generate a
+ * plan altogether, we live with it. make_inner_pathkeys_for_merge()
+ * has to delete duplicates when it constructs the inner pathkeys
+ * list, and we also have to deal with such cases specially in
+ * create_mergejoin_plan().
+ *----------
+ */
+ foreach(j, restrictinfos)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
+ EquivalenceClass *clause_ec;
+
+ clause_ec = rinfo->outer_is_left ?
+ rinfo->left_ec : rinfo->right_ec;
+ if (clause_ec == pathkey_ec)
+ matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
+ }
+
+ /*
+ * If we didn't find a mergeclause, we're done --- any additional
+ * sort-key positions in the pathkeys are useless. (But we can still
+ * mergejoin if we found at least one mergeclause.)
+ */
+ if (matched_restrictinfos == NIL)
+ break;
+
+ /*
+ * If we did find usable mergeclause(s) for this sort-key position,
+ * add them to result list.
+ */
+ mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
+ }
+
+ return mergeclauses;
+}
+
+/*
+ * select_outer_pathkeys_for_merge
+ * Builds a pathkey list representing a possible sort ordering
+ * that can be used with the given mergeclauses.
+ *
+ * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
+ * that will be used in a merge join.
+ * 'joinrel' is the join relation we are trying to construct.
+ *
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path. (See
+ * select_mergejoin_clauses())
+ *
+ * Returns a pathkeys list that can be applied to the outer relation.
+ *
+ * Since we assume here that a sort is required, there is no particular use
+ * in matching any available ordering of the outerrel. (joinpath.c has an
+ * entirely separate code path for considering sort-free mergejoins.) Rather,
+ * it's interesting to try to match the requested query_pathkeys so that a
+ * second output sort may be avoided; and failing that, we try to list "more
+ * popular" keys (those with the most unmatched EquivalenceClass peers)
+ * earlier, in hopes of making the resulting ordering useful for as many
+ * higher-level mergejoins as possible.
+ */
+List *
+select_outer_pathkeys_for_merge(PlannerInfo *root,
+ List *mergeclauses,
+ RelOptInfo *joinrel)
+{
+ List *pathkeys = NIL;
+ int nClauses = list_length(mergeclauses);
+ EquivalenceClass **ecs;
+ int *scores;
+ int necs;
+ ListCell *lc;
+ int j;
+
+ /* Might have no mergeclauses */
+ if (nClauses == 0)
+ return NIL;
+
+ /*
+ * Make arrays of the ECs used by the mergeclauses (dropping any
+ * duplicates) and their "popularity" scores.
+ */
+ ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
+ scores = (int *) palloc(nClauses * sizeof(int));
+ necs = 0;
+
+ foreach(lc, mergeclauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+ EquivalenceClass *oeclass;
+ int score;
+ ListCell *lc2;
+
+ /* get the outer eclass */
+ update_mergeclause_eclasses(root, rinfo);
+
+ if (rinfo->outer_is_left)
+ oeclass = rinfo->left_ec;
+ else
+ oeclass = rinfo->right_ec;
+
+ /* reject duplicates */
+ for (j = 0; j < necs; j++)
+ {
+ if (ecs[j] == oeclass)
+ break;
+ }
+ if (j < necs)
+ continue;
+
+ /* compute score */
+ score = 0;
+ foreach(lc2, oeclass->ec_members)
+ {
+ EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
+
+ /* Potential future join partner? */
+ if (!em->em_is_const && !em->em_is_child &&
+ !bms_overlap(em->em_relids, joinrel->relids))
+ score++;
+ }
+
+ ecs[necs] = oeclass;
+ scores[necs] = score;
+ necs++;
+ }
+
+ /*
+ * Find out if we have all the ECs mentioned in query_pathkeys; if so we
+ * can generate a sort order that's also useful for final output. There is
+ * no percentage in a partial match, though, so we have to have 'em all.
+ */
+ if (root->query_pathkeys)
+ {
+ foreach(lc, root->query_pathkeys)
+ {
+ PathKey *query_pathkey = (PathKey *) lfirst(lc);
+ EquivalenceClass *query_ec = query_pathkey->pk_eclass;
+
+ for (j = 0; j < necs; j++)
+ {
+ if (ecs[j] == query_ec)
+ break; /* found match */
+ }
+ if (j >= necs)
+ break; /* didn't find match */
+ }
+ /* if we got to the end of the list, we have them all */
+ if (lc == NULL)
+ {
+ /* copy query_pathkeys as starting point for our output */
+ pathkeys = list_copy(root->query_pathkeys);
+ /* mark their ECs as already-emitted */
+ foreach(lc, root->query_pathkeys)
+ {
+ PathKey *query_pathkey = (PathKey *) lfirst(lc);
+ EquivalenceClass *query_ec = query_pathkey->pk_eclass;
+
+ for (j = 0; j < necs; j++)
+ {
+ if (ecs[j] == query_ec)
+ {
+ scores[j] = -1;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * Add remaining ECs to the list in popularity order, using a default sort
+ * ordering. (We could use qsort() here, but the list length is usually
+ * so small it's not worth it.)
+ */
+ for (;;)
+ {
+ int best_j;
+ int best_score;
+ EquivalenceClass *ec;
+ PathKey *pathkey;
+
+ best_j = 0;
+ best_score = scores[0];
+ for (j = 1; j < necs; j++)
+ {
+ if (scores[j] > best_score)
+ {
+ best_j = j;
+ best_score = scores[j];
+ }
+ }
+ if (best_score < 0)
+ break; /* all done */
+ ec = ecs[best_j];
+ scores[best_j] = -1;
+ pathkey = make_canonical_pathkey(root,
+ ec,
+ linitial_oid(ec->ec_opfamilies),
+ BTLessStrategyNumber,
+ false);
+ /* can't be redundant because no duplicate ECs */
+ Assert(!pathkey_is_redundant(pathkey, pathkeys));
+ pathkeys = lappend(pathkeys, pathkey);
+ }
+
+ pfree(ecs);
+ pfree(scores);
+
+ return pathkeys;
+}
+
+/*
+ * make_inner_pathkeys_for_merge
+ * Builds a pathkey list representing the explicit sort order that
+ * must be applied to an inner path to make it usable with the
+ * given mergeclauses.
+ *
+ * 'mergeclauses' is a list of RestrictInfos for the mergejoin clauses
+ * that will be used in a merge join, in order.
+ * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
+ * side of the join.
+ *
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path. (See
+ * select_mergejoin_clauses())
+ *
+ * Returns a pathkeys list that can be applied to the inner relation.
+ *
+ * Note that it is not this routine's job to decide whether sorting is
+ * actually needed for a particular input path. Assume a sort is necessary;
+ * just make the keys, eh?
+ */
+List *
+make_inner_pathkeys_for_merge(PlannerInfo *root,
+ List *mergeclauses,
+ List *outer_pathkeys)
+{
+ List *pathkeys = NIL;
+ EquivalenceClass *lastoeclass;
+ PathKey *opathkey;
+ ListCell *lc;
+ ListCell *lop;
+
+ lastoeclass = NULL;
+ opathkey = NULL;
+ lop = list_head(outer_pathkeys);
+
+ foreach(lc, mergeclauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+ EquivalenceClass *oeclass;
+ EquivalenceClass *ieclass;
+ PathKey *pathkey;
+
+ update_mergeclause_eclasses(root, rinfo);
+
+ if (rinfo->outer_is_left)
+ {
+ oeclass = rinfo->left_ec;
+ ieclass = rinfo->right_ec;
+ }
+ else
+ {
+ oeclass = rinfo->right_ec;
+ ieclass = rinfo->left_ec;
+ }
+
+ /* outer eclass should match current or next pathkeys */
+ /* we check this carefully for debugging reasons */
+ if (oeclass != lastoeclass)
+ {
+ if (!lop)
+ elog(ERROR, "too few pathkeys for mergeclauses");
+ opathkey = (PathKey *) lfirst(lop);
+ lop = lnext(outer_pathkeys, lop);
+ lastoeclass = opathkey->pk_eclass;
+ if (oeclass != lastoeclass)
+ elog(ERROR, "outer pathkeys do not match mergeclause");
+ }
+
+ /*
+ * Often, we'll have same EC on both sides, in which case the outer
+ * pathkey is also canonical for the inner side, and we can skip a
+ * useless search.
+ */
+ if (ieclass == oeclass)
+ pathkey = opathkey;
+ else
+ pathkey = make_canonical_pathkey(root,
+ ieclass,
+ opathkey->pk_opfamily,
+ opathkey->pk_strategy,
+ opathkey->pk_nulls_first);
+
+ /*
+ * Don't generate redundant pathkeys (which can happen if multiple
+ * mergeclauses refer to the same EC). Because we do this, the output
+ * pathkey list isn't necessarily ordered like the mergeclauses, which
+ * complicates life for create_mergejoin_plan(). But if we didn't,
+ * we'd have a noncanonical sort key list, which would be bad; for one
+ * reason, it certainly wouldn't match any available sort order for
+ * the input relation.
+ */
+ if (!pathkey_is_redundant(pathkey, pathkeys))
+ pathkeys = lappend(pathkeys, pathkey);
+ }
+
+ return pathkeys;
+}
+
+/*
+ * trim_mergeclauses_for_inner_pathkeys
+ * This routine trims a list of mergeclauses to include just those that
+ * work with a specified ordering for the join's inner relation.
+ *
+ * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses for the
+ * join relation being formed, in an order known to work for the
+ * currently-considered sort ordering of the join's outer rel.
+ * 'pathkeys' is a pathkeys list showing the ordering of an inner-rel path;
+ * it should be equal to, or a truncation of, the result of
+ * make_inner_pathkeys_for_merge for these mergeclauses.
+ *
+ * What we return will be a prefix of the given mergeclauses list.
+ *
+ * We need this logic because make_inner_pathkeys_for_merge's result isn't
+ * necessarily in the same order as the mergeclauses. That means that if we
+ * consider an inner-rel pathkey list that is a truncation of that result,
+ * we might need to drop mergeclauses even though they match a surviving inner
+ * pathkey. This happens when they are to the right of a mergeclause that
+ * matches a removed inner pathkey.
+ *
+ * The mergeclauses must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path. (See
+ * select_mergejoin_clauses())
+ */
+List *
+trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root,
+ List *mergeclauses,
+ List *pathkeys)
+{
+ List *new_mergeclauses = NIL;
+ PathKey *pathkey;
+ EquivalenceClass *pathkey_ec;
+ bool matched_pathkey;
+ ListCell *lip;
+ ListCell *i;
+
+ /* No pathkeys => no mergeclauses (though we don't expect this case) */
+ if (pathkeys == NIL)
+ return NIL;
+ /* Initialize to consider first pathkey */
+ lip = list_head(pathkeys);
+ pathkey = (PathKey *) lfirst(lip);
+ pathkey_ec = pathkey->pk_eclass;
+ lip = lnext(pathkeys, lip);
+ matched_pathkey = false;
+
+ /* Scan mergeclauses to see how many we can use */
+ foreach(i, mergeclauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
+ EquivalenceClass *clause_ec;
+
+ /* Assume we needn't do update_mergeclause_eclasses again here */
+
+ /* Check clause's inner-rel EC against current pathkey */
+ clause_ec = rinfo->outer_is_left ?
+ rinfo->right_ec : rinfo->left_ec;
+
+ /* If we don't have a match, attempt to advance to next pathkey */
+ if (clause_ec != pathkey_ec)
+ {
+ /* If we had no clauses matching this inner pathkey, must stop */
+ if (!matched_pathkey)
+ break;
+
+ /* Advance to next inner pathkey, if any */
+ if (lip == NULL)
+ break;
+ pathkey = (PathKey *) lfirst(lip);
+ pathkey_ec = pathkey->pk_eclass;
+ lip = lnext(pathkeys, lip);
+ matched_pathkey = false;
+ }
+
+ /* If mergeclause matches current inner pathkey, we can use it */
+ if (clause_ec == pathkey_ec)
+ {
+ new_mergeclauses = lappend(new_mergeclauses, rinfo);
+ matched_pathkey = true;
+ }
+ else
+ {
+ /* Else, no hope of adding any more mergeclauses */
+ break;
+ }
+ }
+
+ return new_mergeclauses;
+}
+
+
+/****************************************************************************
+ * PATHKEY USEFULNESS CHECKS
+ *
+ * We only want to remember as many of the pathkeys of a path as have some
+ * potential use, either for subsequent mergejoins or for meeting the query's
+ * requested output ordering. This ensures that add_path() won't consider
+ * a path to have a usefully different ordering unless it really is useful.
+ * These routines check for usefulness of given pathkeys.
+ ****************************************************************************/
+
+/*
+ * pathkeys_useful_for_merging
+ * Count the number of pathkeys that may be useful for mergejoins
+ * above the given relation.
+ *
+ * We consider a pathkey potentially useful if it corresponds to the merge
+ * ordering of either side of any joinclause for the rel. This might be
+ * overoptimistic, since joinclauses that require different other relations
+ * might never be usable at the same time, but trying to be exact is likely
+ * to be more trouble than it's worth.
+ *
+ * To avoid doubling the number of mergejoin paths considered, we would like
+ * to consider only one of the two scan directions (ASC or DESC) as useful
+ * for merging for any given target column. The choice is arbitrary unless
+ * one of the directions happens to match an ORDER BY key, in which case
+ * that direction should be preferred, in hopes of avoiding a final sort step.
+ * right_merge_direction() implements this heuristic.
+ */
+static int
+pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
+{
+ int useful = 0;
+ ListCell *i;
+
+ foreach(i, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(i);
+ bool matched = false;
+ ListCell *j;
+
+ /* If "wrong" direction, not useful for merging */
+ if (!right_merge_direction(root, pathkey))
+ break;
+
+ /*
+ * First look into the EquivalenceClass of the pathkey, to see if
+ * there are any members not yet joined to the rel. If so, it's
+ * surely possible to generate a mergejoin clause using them.
+ */
+ if (rel->has_eclass_joins &&
+ eclass_useful_for_merging(root, pathkey->pk_eclass, rel))
+ matched = true;
+ else
+ {
+ /*
+ * Otherwise search the rel's joininfo list, which contains
+ * non-EquivalenceClass-derivable join clauses that might
+ * nonetheless be mergejoinable.
+ */
+ foreach(j, rel->joininfo)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
+
+ if (restrictinfo->mergeopfamilies == NIL)
+ continue;
+ update_mergeclause_eclasses(root, restrictinfo);
+
+ if (pathkey->pk_eclass == restrictinfo->left_ec ||
+ pathkey->pk_eclass == restrictinfo->right_ec)
+ {
+ matched = true;
+ break;
+ }
+ }
+ }
+
+ /*
+ * If we didn't find a mergeclause, we're done --- any additional
+ * sort-key positions in the pathkeys are useless. (But we can still
+ * mergejoin if we found at least one mergeclause.)
+ */
+ if (matched)
+ useful++;
+ else
+ break;
+ }
+
+ return useful;
+}
+
+/*
+ * right_merge_direction
+ * Check whether the pathkey embodies the preferred sort direction
+ * for merging its target column.
+ */
+static bool
+right_merge_direction(PlannerInfo *root, PathKey *pathkey)
+{
+ ListCell *l;
+
+ foreach(l, root->query_pathkeys)
+ {
+ PathKey *query_pathkey = (PathKey *) lfirst(l);
+
+ if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
+ pathkey->pk_opfamily == query_pathkey->pk_opfamily)
+ {
+ /*
+ * Found a matching query sort column. Prefer this pathkey's
+ * direction iff it matches. Note that we ignore pk_nulls_first,
+ * which means that a sort might be needed anyway ... but we still
+ * want to prefer only one of the two possible directions, and we
+ * might as well use this one.
+ */
+ return (pathkey->pk_strategy == query_pathkey->pk_strategy);
+ }
+ }
+
+ /* If no matching ORDER BY request, prefer the ASC direction */
+ return (pathkey->pk_strategy == BTLessStrategyNumber);
+}
+
+/*
+ * pathkeys_useful_for_ordering
+ * Count the number of pathkeys that are useful for meeting the
+ * query's requested output ordering.
+ *
+ * Because we the have the possibility of incremental sort, a prefix list of
+ * keys is potentially useful for improving the performance of the requested
+ * ordering. Thus we return 0, if no valuable keys are found, or the number
+ * of leading keys shared by the list and the requested ordering..
+ */
+static int
+pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
+{
+ int n_common_pathkeys;
+
+ if (root->query_pathkeys == NIL)
+ return 0; /* no special ordering requested */
+
+ if (pathkeys == NIL)
+ return 0; /* unordered path */
+
+ (void) pathkeys_count_contained_in(root->query_pathkeys, pathkeys,
+ &n_common_pathkeys);
+
+ return n_common_pathkeys;
+}
+
+/*
+ * truncate_useless_pathkeys
+ * Shorten the given pathkey list to just the useful pathkeys.
+ */
+List *
+truncate_useless_pathkeys(PlannerInfo *root,
+ RelOptInfo *rel,
+ List *pathkeys)
+{
+ int nuseful;
+ int nuseful2;
+
+ nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
+ nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+ if (nuseful2 > nuseful)
+ nuseful = nuseful2;
+
+ /*
+ * Note: not safe to modify input list destructively, but we can avoid
+ * copying the list if we're not actually going to change it
+ */
+ if (nuseful == 0)
+ return NIL;
+ else if (nuseful == list_length(pathkeys))
+ return pathkeys;
+ else
+ return list_truncate(list_copy(pathkeys), nuseful);
+}
+
+/*
+ * has_useful_pathkeys
+ * Detect whether the specified rel could have any pathkeys that are
+ * useful according to truncate_useless_pathkeys().
+ *
+ * This is a cheap test that lets us skip building pathkeys at all in very
+ * simple queries. It's OK to err in the direction of returning "true" when
+ * there really aren't any usable pathkeys, but erring in the other direction
+ * is bad --- so keep this in sync with the routines above!
+ *
+ * We could make the test more complex, for example checking to see if any of
+ * the joinclauses are really mergejoinable, but that likely wouldn't win
+ * often enough to repay the extra cycles. Queries with neither a join nor
+ * a sort are reasonably common, though, so this much work seems worthwhile.
+ */
+bool
+has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
+{
+ if (rel->joininfo != NIL || rel->has_eclass_joins)
+ return true; /* might be able to use pathkeys for merging */
+ if (root->query_pathkeys != NIL)
+ return true; /* might be able to use them for ordering */
+ return false; /* definitely useless */
+}
diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c
new file mode 100644
index 0000000..279ca1f
--- /dev/null
+++ b/src/backend/optimizer/path/tidpath.c
@@ -0,0 +1,528 @@
+/*-------------------------------------------------------------------------
+ *
+ * tidpath.c
+ * Routines to determine which TID conditions are usable for scanning
+ * a given relation, and create TidPaths and TidRangePaths accordingly.
+ *
+ * For TidPaths, we look for WHERE conditions of the form
+ * "CTID = pseudoconstant", which can be implemented by just fetching
+ * the tuple directly via heap_fetch(). We can also handle OR'd conditions
+ * such as (CTID = const1) OR (CTID = const2), as well as ScalarArrayOpExpr
+ * conditions of the form CTID = ANY(pseudoconstant_array). In particular
+ * this allows
+ * WHERE ctid IN (tid1, tid2, ...)
+ *
+ * As with indexscans, our definition of "pseudoconstant" is pretty liberal:
+ * we allow anything that doesn't involve a volatile function or a Var of
+ * the relation under consideration. Vars belonging to other relations of
+ * the query are allowed, giving rise to parameterized TID scans.
+ *
+ * We also support "WHERE CURRENT OF cursor" conditions (CurrentOfExpr),
+ * which amount to "CTID = run-time-determined-TID". These could in
+ * theory be translated to a simple comparison of CTID to the result of
+ * a function, but in practice it works better to keep the special node
+ * representation all the way through to execution.
+ *
+ * Additionally, TidRangePaths may be created for conditions of the form
+ * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=, and
+ * AND-clauses composed of such conditions.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/path/tidpath.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/sysattr.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_type.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/restrictinfo.h"
+
+
+/*
+ * Does this Var represent the CTID column of the specified baserel?
+ */
+static inline bool
+IsCTIDVar(Var *var, RelOptInfo *rel)
+{
+ /* The vartype check is strictly paranoia */
+ if (var->varattno == SelfItemPointerAttributeNumber &&
+ var->vartype == TIDOID &&
+ var->varno == rel->relid &&
+ var->varlevelsup == 0)
+ return true;
+ return false;
+}
+
+/*
+ * Check to see if a RestrictInfo is of the form
+ * CTID OP pseudoconstant
+ * or
+ * pseudoconstant OP CTID
+ * where OP is a binary operation, the CTID Var belongs to relation "rel",
+ * and nothing on the other side of the clause does.
+ */
+static bool
+IsBinaryTidClause(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+ OpExpr *node;
+ Node *arg1,
+ *arg2,
+ *other;
+ Relids other_relids;
+
+ /* Must be an OpExpr */
+ if (!is_opclause(rinfo->clause))
+ return false;
+ node = (OpExpr *) rinfo->clause;
+
+ /* OpExpr must have two arguments */
+ if (list_length(node->args) != 2)
+ return false;
+ arg1 = linitial(node->args);
+ arg2 = lsecond(node->args);
+
+ /* Look for CTID as either argument */
+ other = NULL;
+ other_relids = NULL;
+ if (arg1 && IsA(arg1, Var) &&
+ IsCTIDVar((Var *) arg1, rel))
+ {
+ other = arg2;
+ other_relids = rinfo->right_relids;
+ }
+ if (!other && arg2 && IsA(arg2, Var) &&
+ IsCTIDVar((Var *) arg2, rel))
+ {
+ other = arg1;
+ other_relids = rinfo->left_relids;
+ }
+ if (!other)
+ return false;
+
+ /* The other argument must be a pseudoconstant */
+ if (bms_is_member(rel->relid, other_relids) ||
+ contain_volatile_functions(other))
+ return false;
+
+ return true; /* success */
+}
+
+/*
+ * Check to see if a RestrictInfo is of the form
+ * CTID = pseudoconstant
+ * or
+ * pseudoconstant = CTID
+ * where the CTID Var belongs to relation "rel", and nothing on the
+ * other side of the clause does.
+ */
+static bool
+IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+ if (!IsBinaryTidClause(rinfo, rel))
+ return false;
+
+ if (((OpExpr *) rinfo->clause)->opno == TIDEqualOperator)
+ return true;
+
+ return false;
+}
+
+/*
+ * Check to see if a RestrictInfo is of the form
+ * CTID OP pseudoconstant
+ * or
+ * pseudoconstant OP CTID
+ * where OP is a range operator such as <, <=, >, or >=, the CTID Var belongs
+ * to relation "rel", and nothing on the other side of the clause does.
+ */
+static bool
+IsTidRangeClause(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+ Oid opno;
+
+ if (!IsBinaryTidClause(rinfo, rel))
+ return false;
+ opno = ((OpExpr *) rinfo->clause)->opno;
+
+ if (opno == TIDLessOperator || opno == TIDLessEqOperator ||
+ opno == TIDGreaterOperator || opno == TIDGreaterEqOperator)
+ return true;
+
+ return false;
+}
+
+/*
+ * Check to see if a RestrictInfo is of the form
+ * CTID = ANY (pseudoconstant_array)
+ * where the CTID Var belongs to relation "rel", and nothing on the
+ * other side of the clause does.
+ */
+static bool
+IsTidEqualAnyClause(PlannerInfo *root, RestrictInfo *rinfo, RelOptInfo *rel)
+{
+ ScalarArrayOpExpr *node;
+ Node *arg1,
+ *arg2;
+
+ /* Must be a ScalarArrayOpExpr */
+ if (!(rinfo->clause && IsA(rinfo->clause, ScalarArrayOpExpr)))
+ return false;
+ node = (ScalarArrayOpExpr *) rinfo->clause;
+
+ /* Operator must be tideq */
+ if (node->opno != TIDEqualOperator)
+ return false;
+ if (!node->useOr)
+ return false;
+ Assert(list_length(node->args) == 2);
+ arg1 = linitial(node->args);
+ arg2 = lsecond(node->args);
+
+ /* CTID must be first argument */
+ if (arg1 && IsA(arg1, Var) &&
+ IsCTIDVar((Var *) arg1, rel))
+ {
+ /* The other argument must be a pseudoconstant */
+ if (bms_is_member(rel->relid, pull_varnos(root, arg2)) ||
+ contain_volatile_functions(arg2))
+ return false;
+
+ return true; /* success */
+ }
+
+ return false;
+}
+
+/*
+ * Check to see if a RestrictInfo is a CurrentOfExpr referencing "rel".
+ */
+static bool
+IsCurrentOfClause(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+ CurrentOfExpr *node;
+
+ /* Must be a CurrentOfExpr */
+ if (!(rinfo->clause && IsA(rinfo->clause, CurrentOfExpr)))
+ return false;
+ node = (CurrentOfExpr *) rinfo->clause;
+
+ /* If it references this rel, we're good */
+ if (node->cvarno == rel->relid)
+ return true;
+
+ return false;
+}
+
+/*
+ * Extract a set of CTID conditions from the given RestrictInfo
+ *
+ * Returns a List of CTID qual RestrictInfos for the specified rel (with
+ * implicit OR semantics across the list), or NIL if there are no usable
+ * conditions.
+ *
+ * This function considers only base cases; AND/OR combination is handled
+ * below. Therefore the returned List never has more than one element.
+ * (Using a List may seem a bit weird, but it simplifies the caller.)
+ */
+static List *
+TidQualFromRestrictInfo(PlannerInfo *root, RestrictInfo *rinfo, RelOptInfo *rel)
+{
+ /*
+ * We may ignore pseudoconstant clauses (they can't contain Vars, so could
+ * not match anyway).
+ */
+ if (rinfo->pseudoconstant)
+ return NIL;
+
+ /*
+ * If clause must wait till after some lower-security-level restriction
+ * clause, reject it.
+ */
+ if (!restriction_is_securely_promotable(rinfo, rel))
+ return NIL;
+
+ /*
+ * Check all base cases. If we get a match, return the clause.
+ */
+ if (IsTidEqualClause(rinfo, rel) ||
+ IsTidEqualAnyClause(root, rinfo, rel) ||
+ IsCurrentOfClause(rinfo, rel))
+ return list_make1(rinfo);
+
+ return NIL;
+}
+
+/*
+ * Extract a set of CTID conditions from implicit-AND List of RestrictInfos
+ *
+ * Returns a List of CTID qual RestrictInfos for the specified rel (with
+ * implicit OR semantics across the list), or NIL if there are no usable
+ * equality conditions.
+ *
+ * This function is just concerned with handling AND/OR recursion.
+ */
+static List *
+TidQualFromRestrictInfoList(PlannerInfo *root, List *rlist, RelOptInfo *rel)
+{
+ List *rlst = NIL;
+ ListCell *l;
+
+ foreach(l, rlist)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+
+ if (restriction_is_or_clause(rinfo))
+ {
+ ListCell *j;
+
+ /*
+ * We must be able to extract a CTID condition from every
+ * sub-clause of an OR, or we can't use it.
+ */
+ foreach(j, ((BoolExpr *) rinfo->orclause)->args)
+ {
+ Node *orarg = (Node *) lfirst(j);
+ List *sublist;
+
+ /* OR arguments should be ANDs or sub-RestrictInfos */
+ if (is_andclause(orarg))
+ {
+ List *andargs = ((BoolExpr *) orarg)->args;
+
+ /* Recurse in case there are sub-ORs */
+ sublist = TidQualFromRestrictInfoList(root, andargs, rel);
+ }
+ else
+ {
+ RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
+
+ Assert(!restriction_is_or_clause(rinfo));
+ sublist = TidQualFromRestrictInfo(root, rinfo, rel);
+ }
+
+ /*
+ * If nothing found in this arm, we can't do anything with
+ * this OR clause.
+ */
+ if (sublist == NIL)
+ {
+ rlst = NIL; /* forget anything we had */
+ break; /* out of loop over OR args */
+ }
+
+ /*
+ * OK, continue constructing implicitly-OR'ed result list.
+ */
+ rlst = list_concat(rlst, sublist);
+ }
+ }
+ else
+ {
+ /* Not an OR clause, so handle base cases */
+ rlst = TidQualFromRestrictInfo(root, rinfo, rel);
+ }
+
+ /*
+ * Stop as soon as we find any usable CTID condition. In theory we
+ * could get CTID equality conditions from different AND'ed clauses,
+ * in which case we could try to pick the most efficient one. In
+ * practice, such usage seems very unlikely, so we don't bother; we
+ * just exit as soon as we find the first candidate.
+ */
+ if (rlst)
+ break;
+ }
+
+ return rlst;
+}
+
+/*
+ * Extract a set of CTID range conditions from implicit-AND List of RestrictInfos
+ *
+ * Returns a List of CTID range qual RestrictInfos for the specified rel
+ * (with implicit AND semantics across the list), or NIL if there are no
+ * usable range conditions or if the rel's table AM does not support TID range
+ * scans.
+ */
+static List *
+TidRangeQualFromRestrictInfoList(List *rlist, RelOptInfo *rel)
+{
+ List *rlst = NIL;
+ ListCell *l;
+
+ if ((rel->amflags & AMFLAG_HAS_TID_RANGE) == 0)
+ return NIL;
+
+ foreach(l, rlist)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+
+ if (IsTidRangeClause(rinfo, rel))
+ rlst = lappend(rlst, rinfo);
+ }
+
+ return rlst;
+}
+
+/*
+ * Given a list of join clauses involving our rel, create a parameterized
+ * TidPath for each one that is a suitable TidEqual clause.
+ *
+ * In principle we could combine clauses that reference the same outer rels,
+ * but it doesn't seem like such cases would arise often enough to be worth
+ * troubling over.
+ */
+static void
+BuildParameterizedTidPaths(PlannerInfo *root, RelOptInfo *rel, List *clauses)
+{
+ ListCell *l;
+
+ foreach(l, clauses)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+ List *tidquals;
+ Relids required_outer;
+
+ /*
+ * Validate whether each clause is actually usable; we must check this
+ * even when examining clauses generated from an EquivalenceClass,
+ * since they might not satisfy the restriction on not having Vars of
+ * our rel on the other side, or somebody might've built an operator
+ * class that accepts type "tid" but has other operators in it.
+ *
+ * We currently consider only TidEqual join clauses. In principle we
+ * might find a suitable ScalarArrayOpExpr in the rel's joininfo list,
+ * but it seems unlikely to be worth expending the cycles to check.
+ * And we definitely won't find a CurrentOfExpr here. Hence, we don't
+ * use TidQualFromRestrictInfo; but this must match that function
+ * otherwise.
+ */
+ if (rinfo->pseudoconstant ||
+ !restriction_is_securely_promotable(rinfo, rel) ||
+ !IsTidEqualClause(rinfo, rel))
+ continue;
+
+ /*
+ * Check if clause can be moved to this rel; this is probably
+ * redundant when considering EC-derived clauses, but we must check it
+ * for "loose" join clauses.
+ */
+ if (!join_clause_is_movable_to(rinfo, rel))
+ continue;
+
+ /* OK, make list of clauses for this path */
+ tidquals = list_make1(rinfo);
+
+ /* Compute required outer rels for this path */
+ required_outer = bms_union(rinfo->required_relids, rel->lateral_relids);
+ required_outer = bms_del_member(required_outer, rel->relid);
+
+ add_path(rel, (Path *) create_tidscan_path(root, rel, tidquals,
+ required_outer));
+ }
+}
+
+/*
+ * Test whether an EquivalenceClass member matches our rel's CTID Var.
+ *
+ * This is a callback for use by generate_implied_equalities_for_column.
+ */
+static bool
+ec_member_matches_ctid(PlannerInfo *root, RelOptInfo *rel,
+ EquivalenceClass *ec, EquivalenceMember *em,
+ void *arg)
+{
+ if (em->em_expr && IsA(em->em_expr, Var) &&
+ IsCTIDVar((Var *) em->em_expr, rel))
+ return true;
+ return false;
+}
+
+/*
+ * create_tidscan_paths
+ * Create paths corresponding to direct TID scans of the given rel.
+ *
+ * Candidate paths are added to the rel's pathlist (using add_path).
+ */
+void
+create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
+{
+ List *tidquals;
+ List *tidrangequals;
+
+ /*
+ * If any suitable quals exist in the rel's baserestrict list, generate a
+ * plain (unparameterized) TidPath with them.
+ */
+ tidquals = TidQualFromRestrictInfoList(root, rel->baserestrictinfo, rel);
+
+ if (tidquals != NIL)
+ {
+ /*
+ * This path uses no join clauses, but it could still have required
+ * parameterization due to LATERAL refs in its tlist.
+ */
+ Relids required_outer = rel->lateral_relids;
+
+ add_path(rel, (Path *) create_tidscan_path(root, rel, tidquals,
+ required_outer));
+ }
+
+ /*
+ * If there are range quals in the baserestrict list, generate a
+ * TidRangePath.
+ */
+ tidrangequals = TidRangeQualFromRestrictInfoList(rel->baserestrictinfo,
+ rel);
+
+ if (tidrangequals != NIL)
+ {
+ /*
+ * This path uses no join clauses, but it could still have required
+ * parameterization due to LATERAL refs in its tlist.
+ */
+ Relids required_outer = rel->lateral_relids;
+
+ add_path(rel, (Path *) create_tidrangescan_path(root, rel,
+ tidrangequals,
+ required_outer));
+ }
+
+ /*
+ * Try to generate parameterized TidPaths using equality clauses extracted
+ * from EquivalenceClasses. (This is important since simple "t1.ctid =
+ * t2.ctid" clauses will turn into ECs.)
+ */
+ if (rel->has_eclass_joins)
+ {
+ List *clauses;
+
+ /* Generate clauses, skipping any that join to lateral_referencers */
+ clauses = generate_implied_equalities_for_column(root,
+ rel,
+ ec_member_matches_ctid,
+ NULL,
+ rel->lateral_referencers);
+
+ /* Generate a path for each usable join clause */
+ BuildParameterizedTidPaths(root, rel, clauses);
+ }
+
+ /*
+ * Also consider parameterized TidPaths using "loose" join quals. Quals
+ * of the form "t1.ctid = t2.ctid" would turn into these if they are outer
+ * join quals, for example.
+ */
+ BuildParameterizedTidPaths(root, rel, rel->joininfo);
+}