summaryrefslogtreecommitdiffstats
path: root/src/backend/optimizer/prep
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/backend/optimizer/prep/Makefile22
-rw-r--r--src/backend/optimizer/prep/prepagg.c674
-rw-r--r--src/backend/optimizer/prep/prepjointree.c3633
-rw-r--r--src/backend/optimizer/prep/prepqual.c676
-rw-r--r--src/backend/optimizer/prep/preptlist.c437
-rw-r--r--src/backend/optimizer/prep/prepunion.c1420
6 files changed, 6862 insertions, 0 deletions
diff --git a/src/backend/optimizer/prep/Makefile b/src/backend/optimizer/prep/Makefile
new file mode 100644
index 0000000..6f8c6c8
--- /dev/null
+++ b/src/backend/optimizer/prep/Makefile
@@ -0,0 +1,22 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for optimizer/prep
+#
+# IDENTIFICATION
+# src/backend/optimizer/prep/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/optimizer/prep
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = \
+ prepagg.o \
+ prepjointree.o \
+ prepqual.o \
+ preptlist.o \
+ prepunion.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/optimizer/prep/prepagg.c b/src/backend/optimizer/prep/prepagg.c
new file mode 100644
index 0000000..e1c5257
--- /dev/null
+++ b/src/backend/optimizer/prep/prepagg.c
@@ -0,0 +1,674 @@
+/*-------------------------------------------------------------------------
+ *
+ * prepagg.c
+ * Routines to preprocess aggregate function calls
+ *
+ * If there are identical aggregate calls in the query, they only need to
+ * be computed once. Also, some aggregate functions can share the same
+ * transition state, so that we only need to call the final function for
+ * them separately. These optimizations are independent of how the
+ * aggregates are executed.
+ *
+ * preprocess_aggrefs() detects those cases, creates AggInfo and
+ * AggTransInfo structs for each aggregate and transition state that needs
+ * to be computed, and sets the 'aggno' and 'transno' fields in the Aggrefs
+ * accordingly. It also resolves polymorphic transition types, and sets
+ * the 'aggtranstype' fields accordingly.
+ *
+ * XXX: The AggInfo and AggTransInfo structs are thrown away after
+ * planning, so executor startup has to perform some of the same lookups
+ * of transition functions and initial values that we do here. One day, we
+ * might want to carry that information to the Agg nodes to save the effort
+ * at executor startup. The Agg nodes are constructed much later in the
+ * planning, however, so it's not trivial.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/prep/prepagg.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "catalog/pg_aggregate.h"
+#include "catalog/pg_type.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/pathnodes.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/plancat.h"
+#include "optimizer/prep.h"
+#include "parser/parse_agg.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+
+static bool preprocess_aggrefs_walker(Node *node, PlannerInfo *root);
+static int find_compatible_agg(PlannerInfo *root, Aggref *newagg,
+ List **same_input_transnos);
+static int find_compatible_trans(PlannerInfo *root, Aggref *newagg,
+ bool shareable,
+ Oid aggtransfn, Oid aggtranstype,
+ int transtypeLen, bool transtypeByVal,
+ Oid aggcombinefn,
+ Oid aggserialfn, Oid aggdeserialfn,
+ Datum initValue, bool initValueIsNull,
+ List *transnos);
+static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
+
+/* -----------------
+ * Resolve the transition type of all Aggrefs, and determine which Aggrefs
+ * can share aggregate or transition state.
+ *
+ * Information about the aggregates and transition functions are collected
+ * in the root->agginfos and root->aggtransinfos lists. The 'aggtranstype',
+ * 'aggno', and 'aggtransno' fields of each Aggref are filled in.
+ *
+ * NOTE: This modifies the Aggrefs in the input expression in-place!
+ *
+ * We try to optimize by detecting duplicate aggregate functions so that
+ * their state and final values are re-used, rather than needlessly being
+ * re-calculated independently. We also detect aggregates that are not
+ * the same, but which can share the same transition state.
+ *
+ * Scenarios:
+ *
+ * 1. Identical aggregate function calls appear in the query:
+ *
+ * SELECT SUM(x) FROM ... HAVING SUM(x) > 0
+ *
+ * Since these aggregates are identical, we only need to calculate
+ * the value once. Both aggregates will share the same 'aggno' value.
+ *
+ * 2. Two different aggregate functions appear in the query, but the
+ * aggregates have the same arguments, transition functions and
+ * initial values (and, presumably, different final functions):
+ *
+ * SELECT AVG(x), STDDEV(x) FROM ...
+ *
+ * In this case we must create a new AggInfo for the varying aggregate,
+ * and we need to call the final functions separately, but we need
+ * only run the transition function once. (This requires that the
+ * final functions be nondestructive of the transition state, but
+ * that's required anyway for other reasons.)
+ *
+ * For either of these optimizations to be valid, all aggregate properties
+ * used in the transition phase must be the same, including any modifiers
+ * such as ORDER BY, DISTINCT and FILTER, and the arguments mustn't
+ * contain any volatile functions.
+ * -----------------
+ */
+void
+preprocess_aggrefs(PlannerInfo *root, Node *clause)
+{
+ (void) preprocess_aggrefs_walker(clause, root);
+}
+
+static void
+preprocess_aggref(Aggref *aggref, PlannerInfo *root)
+{
+ HeapTuple aggTuple;
+ Form_pg_aggregate aggform;
+ Oid aggtransfn;
+ Oid aggfinalfn;
+ Oid aggcombinefn;
+ Oid aggserialfn;
+ Oid aggdeserialfn;
+ Oid aggtranstype;
+ int32 aggtranstypmod;
+ int32 aggtransspace;
+ bool shareable;
+ int aggno;
+ int transno;
+ List *same_input_transnos;
+ int16 resulttypeLen;
+ bool resulttypeByVal;
+ Datum textInitVal;
+ Datum initValue;
+ bool initValueIsNull;
+ bool transtypeByVal;
+ int16 transtypeLen;
+ Oid inputTypes[FUNC_MAX_ARGS];
+ int numArguments;
+
+ Assert(aggref->agglevelsup == 0);
+
+ /*
+ * Fetch info about the aggregate from pg_aggregate. Note it's correct to
+ * ignore the moving-aggregate variant, since what we're concerned with
+ * here is aggregates not window functions.
+ */
+ aggTuple = SearchSysCache1(AGGFNOID,
+ ObjectIdGetDatum(aggref->aggfnoid));
+ if (!HeapTupleIsValid(aggTuple))
+ elog(ERROR, "cache lookup failed for aggregate %u",
+ aggref->aggfnoid);
+ aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+ aggtransfn = aggform->aggtransfn;
+ aggfinalfn = aggform->aggfinalfn;
+ aggcombinefn = aggform->aggcombinefn;
+ aggserialfn = aggform->aggserialfn;
+ aggdeserialfn = aggform->aggdeserialfn;
+ aggtranstype = aggform->aggtranstype;
+ aggtransspace = aggform->aggtransspace;
+
+ /*
+ * Resolve the possibly-polymorphic aggregate transition type.
+ */
+
+ /* extract argument types (ignoring any ORDER BY expressions) */
+ numArguments = get_aggregate_argtypes(aggref, inputTypes);
+
+ /* resolve actual type of transition state, if polymorphic */
+ aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid,
+ aggtranstype,
+ inputTypes,
+ numArguments);
+ aggref->aggtranstype = aggtranstype;
+
+ /*
+ * If transition state is of same type as first aggregated input, assume
+ * it's the same typmod (same width) as well. This works for cases like
+ * MAX/MIN and is probably somewhat reasonable otherwise.
+ */
+ aggtranstypmod = -1;
+ if (aggref->args)
+ {
+ TargetEntry *tle = (TargetEntry *) linitial(aggref->args);
+
+ if (aggtranstype == exprType((Node *) tle->expr))
+ aggtranstypmod = exprTypmod((Node *) tle->expr);
+ }
+
+ /*
+ * If finalfn is marked read-write, we can't share transition states; but
+ * it is okay to share states for AGGMODIFY_SHAREABLE aggs.
+ *
+ * In principle, in a partial aggregate, we could share the transition
+ * state even if the final function is marked as read-write, because the
+ * partial aggregate doesn't execute the final function. But it's too
+ * early to know whether we're going perform a partial aggregate.
+ */
+ shareable = (aggform->aggfinalmodify != AGGMODIFY_READ_WRITE);
+
+ /* get info about the output value's datatype */
+ get_typlenbyval(aggref->aggtype,
+ &resulttypeLen,
+ &resulttypeByVal);
+
+ /* get initial value */
+ textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
+ Anum_pg_aggregate_agginitval,
+ &initValueIsNull);
+ if (initValueIsNull)
+ initValue = (Datum) 0;
+ else
+ initValue = GetAggInitVal(textInitVal, aggtranstype);
+
+ ReleaseSysCache(aggTuple);
+
+ /*
+ * 1. See if this is identical to another aggregate function call that
+ * we've seen already.
+ */
+ aggno = find_compatible_agg(root, aggref, &same_input_transnos);
+ if (aggno != -1)
+ {
+ AggInfo *agginfo = list_nth(root->agginfos, aggno);
+
+ transno = agginfo->transno;
+ }
+ else
+ {
+ AggInfo *agginfo = palloc(sizeof(AggInfo));
+
+ agginfo->finalfn_oid = aggfinalfn;
+ agginfo->representative_aggref = aggref;
+ agginfo->shareable = shareable;
+
+ aggno = list_length(root->agginfos);
+ root->agginfos = lappend(root->agginfos, agginfo);
+
+ /*
+ * Count it, and check for cases requiring ordered input. Note that
+ * ordered-set aggs always have nonempty aggorder. Any ordered-input
+ * case also defeats partial aggregation.
+ */
+ if (aggref->aggorder != NIL || aggref->aggdistinct != NIL)
+ {
+ root->numOrderedAggs++;
+ root->hasNonPartialAggs = true;
+ }
+
+ get_typlenbyval(aggtranstype,
+ &transtypeLen,
+ &transtypeByVal);
+
+ /*
+ * 2. See if this aggregate can share transition state with another
+ * aggregate that we've initialized already.
+ */
+ transno = find_compatible_trans(root, aggref, shareable,
+ aggtransfn, aggtranstype,
+ transtypeLen, transtypeByVal,
+ aggcombinefn,
+ aggserialfn, aggdeserialfn,
+ initValue, initValueIsNull,
+ same_input_transnos);
+ if (transno == -1)
+ {
+ AggTransInfo *transinfo = palloc(sizeof(AggTransInfo));
+
+ transinfo->args = aggref->args;
+ transinfo->aggfilter = aggref->aggfilter;
+ transinfo->transfn_oid = aggtransfn;
+ transinfo->combinefn_oid = aggcombinefn;
+ transinfo->serialfn_oid = aggserialfn;
+ transinfo->deserialfn_oid = aggdeserialfn;
+ transinfo->aggtranstype = aggtranstype;
+ transinfo->aggtranstypmod = aggtranstypmod;
+ transinfo->transtypeLen = transtypeLen;
+ transinfo->transtypeByVal = transtypeByVal;
+ transinfo->aggtransspace = aggtransspace;
+ transinfo->initValue = initValue;
+ transinfo->initValueIsNull = initValueIsNull;
+
+ transno = list_length(root->aggtransinfos);
+ root->aggtransinfos = lappend(root->aggtransinfos, transinfo);
+
+ /*
+ * Check whether partial aggregation is feasible, unless we
+ * already found out that we can't do it.
+ */
+ if (!root->hasNonPartialAggs)
+ {
+ /*
+ * If there is no combine function, then partial aggregation
+ * is not possible.
+ */
+ if (!OidIsValid(transinfo->combinefn_oid))
+ root->hasNonPartialAggs = true;
+
+ /*
+ * If we have any aggs with transtype INTERNAL then we must
+ * check whether they have serialization/deserialization
+ * functions; if not, we can't serialize partial-aggregation
+ * results.
+ */
+ else if (transinfo->aggtranstype == INTERNALOID &&
+ (!OidIsValid(transinfo->serialfn_oid) ||
+ !OidIsValid(transinfo->deserialfn_oid)))
+ root->hasNonSerialAggs = true;
+ }
+ }
+ agginfo->transno = transno;
+ }
+
+ /*
+ * Fill in the fields in the Aggref (aggtranstype was set above already)
+ */
+ aggref->aggno = aggno;
+ aggref->aggtransno = transno;
+}
+
+static bool
+preprocess_aggrefs_walker(Node *node, PlannerInfo *root)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, Aggref))
+ {
+ Aggref *aggref = (Aggref *) node;
+
+ preprocess_aggref(aggref, root);
+
+ /*
+ * We assume that the parser checked that there are no aggregates (of
+ * this level anyway) in the aggregated arguments, direct arguments,
+ * or filter clause. Hence, we need not recurse into any of them.
+ */
+ return false;
+ }
+ Assert(!IsA(node, SubLink));
+ return expression_tree_walker(node, preprocess_aggrefs_walker,
+ (void *) root);
+}
+
+
+/*
+ * find_compatible_agg - search for a previously initialized per-Agg struct
+ *
+ * Searches the previously looked at aggregates to find one which is compatible
+ * with this one, with the same input parameters. If no compatible aggregate
+ * can be found, returns -1.
+ *
+ * As a side-effect, this also collects a list of existing, shareable per-Trans
+ * structs with matching inputs. If no identical Aggref is found, the list is
+ * passed later to find_compatible_trans, to see if we can at least reuse
+ * the state value of another aggregate.
+ */
+static int
+find_compatible_agg(PlannerInfo *root, Aggref *newagg,
+ List **same_input_transnos)
+{
+ ListCell *lc;
+ int aggno;
+
+ *same_input_transnos = NIL;
+
+ /* we mustn't reuse the aggref if it contains volatile function calls */
+ if (contain_volatile_functions((Node *) newagg))
+ return -1;
+
+ /*
+ * Search through the list of already seen aggregates. If we find an
+ * existing identical aggregate call, then we can re-use that one. While
+ * searching, we'll also collect a list of Aggrefs with the same input
+ * parameters. If no matching Aggref is found, the caller can potentially
+ * still re-use the transition state of one of them. (At this stage we
+ * just compare the parsetrees; whether different aggregates share the
+ * same transition function will be checked later.)
+ */
+ aggno = -1;
+ foreach(lc, root->agginfos)
+ {
+ AggInfo *agginfo = (AggInfo *) lfirst(lc);
+ Aggref *existingRef;
+
+ aggno++;
+
+ existingRef = agginfo->representative_aggref;
+
+ /* all of the following must be the same or it's no match */
+ if (newagg->inputcollid != existingRef->inputcollid ||
+ newagg->aggtranstype != existingRef->aggtranstype ||
+ newagg->aggstar != existingRef->aggstar ||
+ newagg->aggvariadic != existingRef->aggvariadic ||
+ newagg->aggkind != existingRef->aggkind ||
+ !equal(newagg->args, existingRef->args) ||
+ !equal(newagg->aggorder, existingRef->aggorder) ||
+ !equal(newagg->aggdistinct, existingRef->aggdistinct) ||
+ !equal(newagg->aggfilter, existingRef->aggfilter))
+ continue;
+
+ /* if it's the same aggregate function then report exact match */
+ if (newagg->aggfnoid == existingRef->aggfnoid &&
+ newagg->aggtype == existingRef->aggtype &&
+ newagg->aggcollid == existingRef->aggcollid &&
+ equal(newagg->aggdirectargs, existingRef->aggdirectargs))
+ {
+ list_free(*same_input_transnos);
+ *same_input_transnos = NIL;
+ return aggno;
+ }
+
+ /*
+ * Not identical, but it had the same inputs. If the final function
+ * permits sharing, return its transno to the caller, in case we can
+ * re-use its per-trans state. (If there's already sharing going on,
+ * we might report a transno more than once. find_compatible_trans is
+ * cheap enough that it's not worth spending cycles to avoid that.)
+ */
+ if (agginfo->shareable)
+ *same_input_transnos = lappend_int(*same_input_transnos,
+ agginfo->transno);
+ }
+
+ return -1;
+}
+
+/*
+ * find_compatible_trans - search for a previously initialized per-Trans
+ * struct
+ *
+ * Searches the list of transnos for a per-Trans struct with the same
+ * transition function and initial condition. (The inputs have already been
+ * verified to match.)
+ */
+static int
+find_compatible_trans(PlannerInfo *root, Aggref *newagg, bool shareable,
+ Oid aggtransfn, Oid aggtranstype,
+ int transtypeLen, bool transtypeByVal,
+ Oid aggcombinefn,
+ Oid aggserialfn, Oid aggdeserialfn,
+ Datum initValue, bool initValueIsNull,
+ List *transnos)
+{
+ ListCell *lc;
+
+ /* If this aggregate can't share transition states, give up */
+ if (!shareable)
+ return -1;
+
+ foreach(lc, transnos)
+ {
+ int transno = lfirst_int(lc);
+ AggTransInfo *pertrans = (AggTransInfo *) list_nth(root->aggtransinfos, transno);
+
+ /*
+ * if the transfns or transition state types are not the same then the
+ * state can't be shared.
+ */
+ if (aggtransfn != pertrans->transfn_oid ||
+ aggtranstype != pertrans->aggtranstype)
+ continue;
+
+ /*
+ * The serialization and deserialization functions must match, if
+ * present, as we're unable to share the trans state for aggregates
+ * which will serialize or deserialize into different formats.
+ * Remember that these will be InvalidOid if they're not required for
+ * this agg node.
+ */
+ if (aggserialfn != pertrans->serialfn_oid ||
+ aggdeserialfn != pertrans->deserialfn_oid)
+ continue;
+
+ /*
+ * Combine function must also match. We only care about the combine
+ * function with partial aggregates, but it's too early in the
+ * planning to know if we will do partial aggregation, so be
+ * conservative.
+ */
+ if (aggcombinefn != pertrans->combinefn_oid)
+ continue;
+
+ /*
+ * Check that the initial condition matches, too.
+ */
+ if (initValueIsNull && pertrans->initValueIsNull)
+ return transno;
+
+ if (!initValueIsNull && !pertrans->initValueIsNull &&
+ datumIsEqual(initValue, pertrans->initValue,
+ transtypeByVal, transtypeLen))
+ return transno;
+ }
+ return -1;
+}
+
+static Datum
+GetAggInitVal(Datum textInitVal, Oid transtype)
+{
+ Oid typinput,
+ typioparam;
+ char *strInitVal;
+ Datum initVal;
+
+ getTypeInputInfo(transtype, &typinput, &typioparam);
+ strInitVal = TextDatumGetCString(textInitVal);
+ initVal = OidInputFunctionCall(typinput, strInitVal,
+ typioparam, -1);
+ pfree(strInitVal);
+ return initVal;
+}
+
+
+/*
+ * get_agg_clause_costs
+ * Process the PlannerInfo's 'aggtransinfos' and 'agginfos' lists
+ * accumulating the cost information about them.
+ *
+ * 'aggsplit' tells us the expected partial-aggregation mode, which affects
+ * the cost estimates.
+ *
+ * NOTE that the costs are ADDED to those already in *costs ... so the caller
+ * is responsible for zeroing the struct initially.
+ *
+ * For each AggTransInfo, we add the cost of an aggregate transition using
+ * either the transfn or combinefn depending on the 'aggsplit' value. We also
+ * account for the costs of any aggfilters and any serializations and
+ * deserializations of the transition state and also estimate the total space
+ * needed for the transition states as if each aggregate's state was stored in
+ * memory concurrently (as would be done in a HashAgg plan).
+ *
+ * For each AggInfo in the 'agginfos' list we add the cost of running the
+ * final function and the direct args, if any.
+ */
+void
+get_agg_clause_costs(PlannerInfo *root, AggSplit aggsplit, AggClauseCosts *costs)
+{
+ ListCell *lc;
+
+ foreach(lc, root->aggtransinfos)
+ {
+ AggTransInfo *transinfo = (AggTransInfo *) lfirst(lc);
+
+ /*
+ * Add the appropriate component function execution costs to
+ * appropriate totals.
+ */
+ if (DO_AGGSPLIT_COMBINE(aggsplit))
+ {
+ /* charge for combining previously aggregated states */
+ add_function_cost(root, transinfo->combinefn_oid, NULL,
+ &costs->transCost);
+ }
+ else
+ add_function_cost(root, transinfo->transfn_oid, NULL,
+ &costs->transCost);
+ if (DO_AGGSPLIT_DESERIALIZE(aggsplit) &&
+ OidIsValid(transinfo->deserialfn_oid))
+ add_function_cost(root, transinfo->deserialfn_oid, NULL,
+ &costs->transCost);
+ if (DO_AGGSPLIT_SERIALIZE(aggsplit) &&
+ OidIsValid(transinfo->serialfn_oid))
+ add_function_cost(root, transinfo->serialfn_oid, NULL,
+ &costs->finalCost);
+
+ /*
+ * These costs are incurred only by the initial aggregate node, so we
+ * mustn't include them again at upper levels.
+ */
+ if (!DO_AGGSPLIT_COMBINE(aggsplit))
+ {
+ /* add the input expressions' cost to per-input-row costs */
+ QualCost argcosts;
+
+ cost_qual_eval_node(&argcosts, (Node *) transinfo->args, root);
+ costs->transCost.startup += argcosts.startup;
+ costs->transCost.per_tuple += argcosts.per_tuple;
+
+ /*
+ * Add any filter's cost to per-input-row costs.
+ *
+ * XXX Ideally we should reduce input expression costs according
+ * to filter selectivity, but it's not clear it's worth the
+ * trouble.
+ */
+ if (transinfo->aggfilter)
+ {
+ cost_qual_eval_node(&argcosts, (Node *) transinfo->aggfilter,
+ root);
+ costs->transCost.startup += argcosts.startup;
+ costs->transCost.per_tuple += argcosts.per_tuple;
+ }
+ }
+
+ /*
+ * If the transition type is pass-by-value then it doesn't add
+ * anything to the required size of the hashtable. If it is
+ * pass-by-reference then we have to add the estimated size of the
+ * value itself, plus palloc overhead.
+ */
+ if (!transinfo->transtypeByVal)
+ {
+ int32 avgwidth;
+
+ /* Use average width if aggregate definition gave one */
+ if (transinfo->aggtransspace > 0)
+ avgwidth = transinfo->aggtransspace;
+ else if (transinfo->transfn_oid == F_ARRAY_APPEND)
+ {
+ /*
+ * If the transition function is array_append(), it'll use an
+ * expanded array as transvalue, which will occupy at least
+ * ALLOCSET_SMALL_INITSIZE and possibly more. Use that as the
+ * estimate for lack of a better idea.
+ */
+ avgwidth = ALLOCSET_SMALL_INITSIZE;
+ }
+ else
+ {
+ avgwidth = get_typavgwidth(transinfo->aggtranstype, transinfo->aggtranstypmod);
+ }
+
+ avgwidth = MAXALIGN(avgwidth);
+ costs->transitionSpace += avgwidth + 2 * sizeof(void *);
+ }
+ else if (transinfo->aggtranstype == INTERNALOID)
+ {
+ /*
+ * INTERNAL transition type is a special case: although INTERNAL
+ * is pass-by-value, it's almost certainly being used as a pointer
+ * to some large data structure. The aggregate definition can
+ * provide an estimate of the size. If it doesn't, then we assume
+ * ALLOCSET_DEFAULT_INITSIZE, which is a good guess if the data is
+ * being kept in a private memory context, as is done by
+ * array_agg() for instance.
+ */
+ if (transinfo->aggtransspace > 0)
+ costs->transitionSpace += transinfo->aggtransspace;
+ else
+ costs->transitionSpace += ALLOCSET_DEFAULT_INITSIZE;
+ }
+ }
+
+ foreach(lc, root->agginfos)
+ {
+ AggInfo *agginfo = (AggInfo *) lfirst(lc);
+ Aggref *aggref = agginfo->representative_aggref;
+
+ /*
+ * Add the appropriate component function execution costs to
+ * appropriate totals.
+ */
+ if (!DO_AGGSPLIT_SKIPFINAL(aggsplit) &&
+ OidIsValid(agginfo->finalfn_oid))
+ add_function_cost(root, agginfo->finalfn_oid, NULL,
+ &costs->finalCost);
+
+ /*
+ * If there are direct arguments, treat their evaluation cost like the
+ * cost of the finalfn.
+ */
+ if (aggref->aggdirectargs)
+ {
+ QualCost argcosts;
+
+ cost_qual_eval_node(&argcosts, (Node *) aggref->aggdirectargs,
+ root);
+ costs->finalCost.startup += argcosts.startup;
+ costs->finalCost.per_tuple += argcosts.per_tuple;
+ }
+ }
+}
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
new file mode 100644
index 0000000..b164866
--- /dev/null
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -0,0 +1,3633 @@
+/*-------------------------------------------------------------------------
+ *
+ * prepjointree.c
+ * Planner preprocessing for subqueries and join tree manipulation.
+ *
+ * NOTE: the intended sequence for invoking these operations is
+ * replace_empty_jointree
+ * pull_up_sublinks
+ * preprocess_function_rtes
+ * pull_up_subqueries
+ * flatten_simple_union_all
+ * do expression preprocessing (including flattening JOIN alias vars)
+ * reduce_outer_joins
+ * remove_useless_result_rtes
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/prep/prepjointree.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/placeholder.h"
+#include "optimizer/prep.h"
+#include "optimizer/subselect.h"
+#include "optimizer/tlist.h"
+#include "parser/parse_relation.h"
+#include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
+
+
+typedef struct pullup_replace_vars_context
+{
+ PlannerInfo *root;
+ List *targetlist; /* tlist of subquery being pulled up */
+ RangeTblEntry *target_rte; /* RTE of subquery */
+ Relids relids; /* relids within subquery, as numbered after
+ * pullup (set only if target_rte->lateral) */
+ bool *outer_hasSubLinks; /* -> outer query's hasSubLinks */
+ int varno; /* varno of subquery */
+ bool need_phvs; /* do we need PlaceHolderVars? */
+ bool wrap_non_vars; /* do we need 'em on *all* non-Vars? */
+ Node **rv_cache; /* cache for results with PHVs */
+} pullup_replace_vars_context;
+
+typedef struct reduce_outer_joins_state
+{
+ Relids relids; /* base relids within this subtree */
+ bool contains_outer; /* does subtree contain outer join(s)? */
+ List *sub_states; /* List of states for subtree components */
+} reduce_outer_joins_state;
+
+static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
+ Relids *relids);
+static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
+ Node **jtlink1, Relids available_rels1,
+ Node **jtlink2, Relids available_rels2);
+static Node *pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
+ JoinExpr *lowest_outer_join,
+ JoinExpr *lowest_nulling_outer_join,
+ AppendRelInfo *containing_appendrel);
+static Node *pull_up_simple_subquery(PlannerInfo *root, Node *jtnode,
+ RangeTblEntry *rte,
+ JoinExpr *lowest_outer_join,
+ JoinExpr *lowest_nulling_outer_join,
+ AppendRelInfo *containing_appendrel);
+static Node *pull_up_simple_union_all(PlannerInfo *root, Node *jtnode,
+ RangeTblEntry *rte);
+static void pull_up_union_leaf_queries(Node *setOp, PlannerInfo *root,
+ int parentRTindex, Query *setOpQuery,
+ int childRToffset);
+static void make_setop_translation_list(Query *query, Index newvarno,
+ AppendRelInfo *appinfo);
+static bool is_simple_subquery(PlannerInfo *root, Query *subquery,
+ RangeTblEntry *rte,
+ JoinExpr *lowest_outer_join);
+static Node *pull_up_simple_values(PlannerInfo *root, Node *jtnode,
+ RangeTblEntry *rte);
+static bool is_simple_values(PlannerInfo *root, RangeTblEntry *rte);
+static Node *pull_up_constant_function(PlannerInfo *root, Node *jtnode,
+ RangeTblEntry *rte,
+ JoinExpr *lowest_nulling_outer_join,
+ AppendRelInfo *containing_appendrel);
+static bool is_simple_union_all(Query *subquery);
+static bool is_simple_union_all_recurse(Node *setOp, Query *setOpQuery,
+ List *colTypes);
+static bool is_safe_append_member(Query *subquery);
+static bool jointree_contains_lateral_outer_refs(PlannerInfo *root,
+ Node *jtnode, bool restricted,
+ Relids safe_upper_varnos);
+static void perform_pullup_replace_vars(PlannerInfo *root,
+ pullup_replace_vars_context *rvcontext,
+ JoinExpr *lowest_nulling_outer_join,
+ AppendRelInfo *containing_appendrel);
+static void replace_vars_in_jointree(Node *jtnode,
+ pullup_replace_vars_context *context,
+ JoinExpr *lowest_nulling_outer_join);
+static Node *pullup_replace_vars(Node *expr,
+ pullup_replace_vars_context *context);
+static Node *pullup_replace_vars_callback(Var *var,
+ replace_rte_variables_context *context);
+static Query *pullup_replace_vars_subquery(Query *query,
+ pullup_replace_vars_context *context);
+static reduce_outer_joins_state *reduce_outer_joins_pass1(Node *jtnode);
+static void reduce_outer_joins_pass2(Node *jtnode,
+ reduce_outer_joins_state *state,
+ PlannerInfo *root,
+ Relids nonnullable_rels,
+ List *nonnullable_vars,
+ List *forced_null_vars);
+static Node *remove_useless_results_recurse(PlannerInfo *root, Node *jtnode);
+static int get_result_relid(PlannerInfo *root, Node *jtnode);
+static void remove_result_refs(PlannerInfo *root, int varno, Node *newjtloc);
+static bool find_dependent_phvs(PlannerInfo *root, int varno);
+static bool find_dependent_phvs_in_jointree(PlannerInfo *root,
+ Node *node, int varno);
+static void substitute_phv_relids(Node *node,
+ int varno, Relids subrelids);
+static void fix_append_rel_relids(List *append_rel_list, int varno,
+ Relids subrelids);
+static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
+
+
+/*
+ * replace_empty_jointree
+ * If the Query's jointree is empty, replace it with a dummy RTE_RESULT
+ * relation.
+ *
+ * By doing this, we can avoid a bunch of corner cases that formerly existed
+ * for SELECTs with omitted FROM clauses. An example is that a subquery
+ * with empty jointree previously could not be pulled up, because that would
+ * have resulted in an empty relid set, making the subquery not uniquely
+ * identifiable for join or PlaceHolderVar processing.
+ *
+ * Unlike most other functions in this file, this function doesn't recurse;
+ * we rely on other processing to invoke it on sub-queries at suitable times.
+ */
+void
+replace_empty_jointree(Query *parse)
+{
+ RangeTblEntry *rte;
+ Index rti;
+ RangeTblRef *rtr;
+
+ /* Nothing to do if jointree is already nonempty */
+ if (parse->jointree->fromlist != NIL)
+ return;
+
+ /* We mustn't change it in the top level of a setop tree, either */
+ if (parse->setOperations)
+ return;
+
+ /* Create suitable RTE */
+ rte = makeNode(RangeTblEntry);
+ rte->rtekind = RTE_RESULT;
+ rte->eref = makeAlias("*RESULT*", NIL);
+
+ /* Add it to rangetable */
+ parse->rtable = lappend(parse->rtable, rte);
+ rti = list_length(parse->rtable);
+
+ /* And jam a reference into the jointree */
+ rtr = makeNode(RangeTblRef);
+ rtr->rtindex = rti;
+ parse->jointree->fromlist = list_make1(rtr);
+}
+
+/*
+ * pull_up_sublinks
+ * Attempt to pull up ANY and EXISTS SubLinks to be treated as
+ * semijoins or anti-semijoins.
+ *
+ * A clause "foo op ANY (sub-SELECT)" can be processed by pulling the
+ * sub-SELECT up to become a rangetable entry and treating the implied
+ * comparisons as quals of a semijoin. However, this optimization *only*
+ * works at the top level of WHERE or a JOIN/ON clause, because we cannot
+ * distinguish whether the ANY ought to return FALSE or NULL in cases
+ * involving NULL inputs. Also, in an outer join's ON clause we can only
+ * do this if the sublink is degenerate (ie, references only the nullable
+ * side of the join). In that case it is legal to push the semijoin
+ * down into the nullable side of the join. If the sublink references any
+ * nonnullable-side variables then it would have to be evaluated as part
+ * of the outer join, which makes things way too complicated.
+ *
+ * Under similar conditions, EXISTS and NOT EXISTS clauses can be handled
+ * by pulling up the sub-SELECT and creating a semijoin or anti-semijoin.
+ *
+ * This routine searches for such clauses and does the necessary parsetree
+ * transformations if any are found.
+ *
+ * This routine has to run before preprocess_expression(), so the quals
+ * clauses are not yet reduced to implicit-AND format, and are not guaranteed
+ * to be AND/OR-flat either. That means we need to recursively search through
+ * explicit AND clauses. We stop as soon as we hit a non-AND item.
+ */
+void
+pull_up_sublinks(PlannerInfo *root)
+{
+ Node *jtnode;
+ Relids relids;
+
+ /* Begin recursion through the jointree */
+ jtnode = pull_up_sublinks_jointree_recurse(root,
+ (Node *) root->parse->jointree,
+ &relids);
+
+ /*
+ * root->parse->jointree must always be a FromExpr, so insert a dummy one
+ * if we got a bare RangeTblRef or JoinExpr out of the recursion.
+ */
+ if (IsA(jtnode, FromExpr))
+ root->parse->jointree = (FromExpr *) jtnode;
+ else
+ root->parse->jointree = makeFromExpr(list_make1(jtnode), NULL);
+}
+
+/*
+ * Recurse through jointree nodes for pull_up_sublinks()
+ *
+ * In addition to returning the possibly-modified jointree node, we return
+ * a relids set of the contained rels into *relids.
+ */
+static Node *
+pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
+ Relids *relids)
+{
+ if (jtnode == NULL)
+ {
+ *relids = NULL;
+ }
+ else if (IsA(jtnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+
+ *relids = bms_make_singleton(varno);
+ /* jtnode is returned unmodified */
+ }
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ List *newfromlist = NIL;
+ Relids frelids = NULL;
+ FromExpr *newf;
+ Node *jtlink;
+ ListCell *l;
+
+ /* First, recurse to process children and collect their relids */
+ foreach(l, f->fromlist)
+ {
+ Node *newchild;
+ Relids childrelids;
+
+ newchild = pull_up_sublinks_jointree_recurse(root,
+ lfirst(l),
+ &childrelids);
+ newfromlist = lappend(newfromlist, newchild);
+ frelids = bms_join(frelids, childrelids);
+ }
+ /* Build the replacement FromExpr; no quals yet */
+ newf = makeFromExpr(newfromlist, NULL);
+ /* Set up a link representing the rebuilt jointree */
+ jtlink = (Node *) newf;
+ /* Now process qual --- all children are available for use */
+ newf->quals = pull_up_sublinks_qual_recurse(root, f->quals,
+ &jtlink, frelids,
+ NULL, NULL);
+
+ /*
+ * Note that the result will be either newf, or a stack of JoinExprs
+ * with newf at the base. We rely on subsequent optimization steps to
+ * flatten this and rearrange the joins as needed.
+ *
+ * Although we could include the pulled-up subqueries in the returned
+ * relids, there's no need since upper quals couldn't refer to their
+ * outputs anyway.
+ */
+ *relids = frelids;
+ jtnode = jtlink;
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j;
+ Relids leftrelids;
+ Relids rightrelids;
+ Node *jtlink;
+
+ /*
+ * Make a modifiable copy of join node, but don't bother copying its
+ * subnodes (yet).
+ */
+ j = (JoinExpr *) palloc(sizeof(JoinExpr));
+ memcpy(j, jtnode, sizeof(JoinExpr));
+ jtlink = (Node *) j;
+
+ /* Recurse to process children and collect their relids */
+ j->larg = pull_up_sublinks_jointree_recurse(root, j->larg,
+ &leftrelids);
+ j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg,
+ &rightrelids);
+
+ /*
+ * Now process qual, showing appropriate child relids as available,
+ * and attach any pulled-up jointree items at the right place. In the
+ * inner-join case we put new JoinExprs above the existing one (much
+ * as for a FromExpr-style join). In outer-join cases the new
+ * JoinExprs must go into the nullable side of the outer join. The
+ * point of the available_rels machinations is to ensure that we only
+ * pull up quals for which that's okay.
+ *
+ * We don't expect to see any pre-existing JOIN_SEMI or JOIN_ANTI
+ * nodes here.
+ */
+ switch (j->jointype)
+ {
+ case JOIN_INNER:
+ j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
+ &jtlink,
+ bms_union(leftrelids,
+ rightrelids),
+ NULL, NULL);
+ break;
+ case JOIN_LEFT:
+ j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
+ &j->rarg,
+ rightrelids,
+ NULL, NULL);
+ break;
+ case JOIN_FULL:
+ /* can't do anything with full-join quals */
+ break;
+ case JOIN_RIGHT:
+ j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
+ &j->larg,
+ leftrelids,
+ NULL, NULL);
+ break;
+ default:
+ elog(ERROR, "unrecognized join type: %d",
+ (int) j->jointype);
+ break;
+ }
+
+ /*
+ * Although we could include the pulled-up subqueries in the returned
+ * relids, there's no need since upper quals couldn't refer to their
+ * outputs anyway. But we *do* need to include the join's own rtindex
+ * because we haven't yet collapsed join alias variables, so upper
+ * levels would mistakenly think they couldn't use references to this
+ * join.
+ */
+ *relids = bms_join(leftrelids, rightrelids);
+ if (j->rtindex)
+ *relids = bms_add_member(*relids, j->rtindex);
+ jtnode = jtlink;
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+ return jtnode;
+}
+
+/*
+ * Recurse through top-level qual nodes for pull_up_sublinks()
+ *
+ * jtlink1 points to the link in the jointree where any new JoinExprs should
+ * be inserted if they reference available_rels1 (i.e., available_rels1
+ * denotes the relations present underneath jtlink1). Optionally, jtlink2 can
+ * point to a second link where new JoinExprs should be inserted if they
+ * reference available_rels2 (pass NULL for both those arguments if not used).
+ * Note that SubLinks referencing both sets of variables cannot be optimized.
+ * If we find multiple pull-up-able SubLinks, they'll get stacked onto jtlink1
+ * and/or jtlink2 in the order we encounter them. We rely on subsequent
+ * optimization to rearrange the stack if appropriate.
+ *
+ * Returns the replacement qual node, or NULL if the qual should be removed.
+ */
+static Node *
+pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
+ Node **jtlink1, Relids available_rels1,
+ Node **jtlink2, Relids available_rels2)
+{
+ if (node == NULL)
+ return NULL;
+ if (IsA(node, SubLink))
+ {
+ SubLink *sublink = (SubLink *) node;
+ JoinExpr *j;
+ Relids child_rels;
+
+ /* Is it a convertible ANY or EXISTS clause? */
+ if (sublink->subLinkType == ANY_SUBLINK)
+ {
+ if ((j = convert_ANY_sublink_to_join(root, sublink,
+ available_rels1)) != NULL)
+ {
+ /* Yes; insert the new join node into the join tree */
+ j->larg = *jtlink1;
+ *jtlink1 = (Node *) j;
+ /* Recursively process pulled-up jointree nodes */
+ j->rarg = pull_up_sublinks_jointree_recurse(root,
+ j->rarg,
+ &child_rels);
+
+ /*
+ * Now recursively process the pulled-up quals. Any inserted
+ * joins can get stacked onto either j->larg or j->rarg,
+ * depending on which rels they reference.
+ */
+ j->quals = pull_up_sublinks_qual_recurse(root,
+ j->quals,
+ &j->larg,
+ available_rels1,
+ &j->rarg,
+ child_rels);
+ /* Return NULL representing constant TRUE */
+ return NULL;
+ }
+ if (available_rels2 != NULL &&
+ (j = convert_ANY_sublink_to_join(root, sublink,
+ available_rels2)) != NULL)
+ {
+ /* Yes; insert the new join node into the join tree */
+ j->larg = *jtlink2;
+ *jtlink2 = (Node *) j;
+ /* Recursively process pulled-up jointree nodes */
+ j->rarg = pull_up_sublinks_jointree_recurse(root,
+ j->rarg,
+ &child_rels);
+
+ /*
+ * Now recursively process the pulled-up quals. Any inserted
+ * joins can get stacked onto either j->larg or j->rarg,
+ * depending on which rels they reference.
+ */
+ j->quals = pull_up_sublinks_qual_recurse(root,
+ j->quals,
+ &j->larg,
+ available_rels2,
+ &j->rarg,
+ child_rels);
+ /* Return NULL representing constant TRUE */
+ return NULL;
+ }
+ }
+ else if (sublink->subLinkType == EXISTS_SUBLINK)
+ {
+ if ((j = convert_EXISTS_sublink_to_join(root, sublink, false,
+ available_rels1)) != NULL)
+ {
+ /* Yes; insert the new join node into the join tree */
+ j->larg = *jtlink1;
+ *jtlink1 = (Node *) j;
+ /* Recursively process pulled-up jointree nodes */
+ j->rarg = pull_up_sublinks_jointree_recurse(root,
+ j->rarg,
+ &child_rels);
+
+ /*
+ * Now recursively process the pulled-up quals. Any inserted
+ * joins can get stacked onto either j->larg or j->rarg,
+ * depending on which rels they reference.
+ */
+ j->quals = pull_up_sublinks_qual_recurse(root,
+ j->quals,
+ &j->larg,
+ available_rels1,
+ &j->rarg,
+ child_rels);
+ /* Return NULL representing constant TRUE */
+ return NULL;
+ }
+ if (available_rels2 != NULL &&
+ (j = convert_EXISTS_sublink_to_join(root, sublink, false,
+ available_rels2)) != NULL)
+ {
+ /* Yes; insert the new join node into the join tree */
+ j->larg = *jtlink2;
+ *jtlink2 = (Node *) j;
+ /* Recursively process pulled-up jointree nodes */
+ j->rarg = pull_up_sublinks_jointree_recurse(root,
+ j->rarg,
+ &child_rels);
+
+ /*
+ * Now recursively process the pulled-up quals. Any inserted
+ * joins can get stacked onto either j->larg or j->rarg,
+ * depending on which rels they reference.
+ */
+ j->quals = pull_up_sublinks_qual_recurse(root,
+ j->quals,
+ &j->larg,
+ available_rels2,
+ &j->rarg,
+ child_rels);
+ /* Return NULL representing constant TRUE */
+ return NULL;
+ }
+ }
+ /* Else return it unmodified */
+ return node;
+ }
+ if (is_notclause(node))
+ {
+ /* If the immediate argument of NOT is EXISTS, try to convert */
+ SubLink *sublink = (SubLink *) get_notclausearg((Expr *) node);
+ JoinExpr *j;
+ Relids child_rels;
+
+ if (sublink && IsA(sublink, SubLink))
+ {
+ if (sublink->subLinkType == EXISTS_SUBLINK)
+ {
+ if ((j = convert_EXISTS_sublink_to_join(root, sublink, true,
+ available_rels1)) != NULL)
+ {
+ /* Yes; insert the new join node into the join tree */
+ j->larg = *jtlink1;
+ *jtlink1 = (Node *) j;
+ /* Recursively process pulled-up jointree nodes */
+ j->rarg = pull_up_sublinks_jointree_recurse(root,
+ j->rarg,
+ &child_rels);
+
+ /*
+ * Now recursively process the pulled-up quals. Because
+ * we are underneath a NOT, we can't pull up sublinks that
+ * reference the left-hand stuff, but it's still okay to
+ * pull up sublinks referencing j->rarg.
+ */
+ j->quals = pull_up_sublinks_qual_recurse(root,
+ j->quals,
+ &j->rarg,
+ child_rels,
+ NULL, NULL);
+ /* Return NULL representing constant TRUE */
+ return NULL;
+ }
+ if (available_rels2 != NULL &&
+ (j = convert_EXISTS_sublink_to_join(root, sublink, true,
+ available_rels2)) != NULL)
+ {
+ /* Yes; insert the new join node into the join tree */
+ j->larg = *jtlink2;
+ *jtlink2 = (Node *) j;
+ /* Recursively process pulled-up jointree nodes */
+ j->rarg = pull_up_sublinks_jointree_recurse(root,
+ j->rarg,
+ &child_rels);
+
+ /*
+ * Now recursively process the pulled-up quals. Because
+ * we are underneath a NOT, we can't pull up sublinks that
+ * reference the left-hand stuff, but it's still okay to
+ * pull up sublinks referencing j->rarg.
+ */
+ j->quals = pull_up_sublinks_qual_recurse(root,
+ j->quals,
+ &j->rarg,
+ child_rels,
+ NULL, NULL);
+ /* Return NULL representing constant TRUE */
+ return NULL;
+ }
+ }
+ }
+ /* Else return it unmodified */
+ return node;
+ }
+ if (is_andclause(node))
+ {
+ /* Recurse into AND clause */
+ List *newclauses = NIL;
+ ListCell *l;
+
+ foreach(l, ((BoolExpr *) node)->args)
+ {
+ Node *oldclause = (Node *) lfirst(l);
+ Node *newclause;
+
+ newclause = pull_up_sublinks_qual_recurse(root,
+ oldclause,
+ jtlink1,
+ available_rels1,
+ jtlink2,
+ available_rels2);
+ if (newclause)
+ newclauses = lappend(newclauses, newclause);
+ }
+ /* We might have got back fewer clauses than we started with */
+ if (newclauses == NIL)
+ return NULL;
+ else if (list_length(newclauses) == 1)
+ return (Node *) linitial(newclauses);
+ else
+ return (Node *) make_andclause(newclauses);
+ }
+ /* Stop if not an AND */
+ return node;
+}
+
+/*
+ * preprocess_function_rtes
+ * Constant-simplify any FUNCTION RTEs in the FROM clause, and then
+ * attempt to "inline" any that are set-returning functions.
+ *
+ * If an RTE_FUNCTION rtable entry invokes a set-returning function that
+ * contains just a simple SELECT, we can convert the rtable entry to an
+ * RTE_SUBQUERY entry exposing the SELECT directly. This is especially
+ * useful if the subquery can then be "pulled up" for further optimization,
+ * but we do it even if not, to reduce executor overhead.
+ *
+ * This has to be done before we have started to do any optimization of
+ * subqueries, else any such steps wouldn't get applied to subqueries
+ * obtained via inlining. However, we do it after pull_up_sublinks
+ * so that we can inline any functions used in SubLink subselects.
+ *
+ * The reason for applying const-simplification at this stage is that
+ * (a) we'd need to do it anyway to inline a SRF, and (b) by doing it now,
+ * we can be sure that pull_up_constant_function() will see constants
+ * if there are constants to be seen. This approach also guarantees
+ * that every FUNCTION RTE has been const-simplified, allowing planner.c's
+ * preprocess_expression() to skip doing it again.
+ *
+ * Like most of the planner, this feels free to scribble on its input data
+ * structure.
+ */
+void
+preprocess_function_rtes(PlannerInfo *root)
+{
+ ListCell *rt;
+
+ foreach(rt, root->parse->rtable)
+ {
+ RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt);
+
+ if (rte->rtekind == RTE_FUNCTION)
+ {
+ Query *funcquery;
+
+ /* Apply const-simplification */
+ rte->functions = (List *)
+ eval_const_expressions(root, (Node *) rte->functions);
+
+ /* Check safety of expansion, and expand if possible */
+ funcquery = inline_set_returning_function(root, rte);
+ if (funcquery)
+ {
+ /* Successful expansion, convert the RTE to a subquery */
+ rte->rtekind = RTE_SUBQUERY;
+ rte->subquery = funcquery;
+ rte->security_barrier = false;
+ /* Clear fields that should not be set in a subquery RTE */
+ rte->functions = NIL;
+ rte->funcordinality = false;
+ }
+ }
+ }
+}
+
+/*
+ * pull_up_subqueries
+ * Look for subqueries in the rangetable that can be pulled up into
+ * the parent query. If the subquery has no special features like
+ * grouping/aggregation then we can merge it into the parent's jointree.
+ * Also, subqueries that are simple UNION ALL structures can be
+ * converted into "append relations".
+ */
+void
+pull_up_subqueries(PlannerInfo *root)
+{
+ /* Top level of jointree must always be a FromExpr */
+ Assert(IsA(root->parse->jointree, FromExpr));
+ /* Recursion starts with no containing join nor appendrel */
+ root->parse->jointree = (FromExpr *)
+ pull_up_subqueries_recurse(root, (Node *) root->parse->jointree,
+ NULL, NULL, NULL);
+ /* We should still have a FromExpr */
+ Assert(IsA(root->parse->jointree, FromExpr));
+}
+
+/*
+ * pull_up_subqueries_recurse
+ * Recursive guts of pull_up_subqueries.
+ *
+ * This recursively processes the jointree and returns a modified jointree.
+ *
+ * If this jointree node is within either side of an outer join, then
+ * lowest_outer_join references the lowest such JoinExpr node; otherwise
+ * it is NULL. We use this to constrain the effects of LATERAL subqueries.
+ *
+ * If this jointree node is within the nullable side of an outer join, then
+ * lowest_nulling_outer_join references the lowest such JoinExpr node;
+ * otherwise it is NULL. This forces use of the PlaceHolderVar mechanism for
+ * references to non-nullable targetlist items, but only for references above
+ * that join.
+ *
+ * If we are looking at a member subquery of an append relation,
+ * containing_appendrel describes that relation; else it is NULL.
+ * This forces use of the PlaceHolderVar mechanism for all non-Var targetlist
+ * items, and puts some additional restrictions on what can be pulled up.
+ *
+ * A tricky aspect of this code is that if we pull up a subquery we have
+ * to replace Vars that reference the subquery's outputs throughout the
+ * parent query, including quals attached to jointree nodes above the one
+ * we are currently processing! We handle this by being careful to maintain
+ * validity of the jointree structure while recursing, in the following sense:
+ * whenever we recurse, all qual expressions in the tree must be reachable
+ * from the top level, in case the recursive call needs to modify them.
+ *
+ * Notice also that we can't turn pullup_replace_vars loose on the whole
+ * jointree, because it'd return a mutated copy of the tree; we have to
+ * invoke it just on the quals, instead. This behavior is what makes it
+ * reasonable to pass lowest_outer_join and lowest_nulling_outer_join as
+ * pointers rather than some more-indirect way of identifying the lowest
+ * OJs. Likewise, we don't replace append_rel_list members but only their
+ * substructure, so the containing_appendrel reference is safe to use.
+ */
+static Node *
+pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
+ JoinExpr *lowest_outer_join,
+ JoinExpr *lowest_nulling_outer_join,
+ AppendRelInfo *containing_appendrel)
+{
+ Assert(jtnode != NULL);
+ if (IsA(jtnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+ RangeTblEntry *rte = rt_fetch(varno, root->parse->rtable);
+
+ /*
+ * Is this a subquery RTE, and if so, is the subquery simple enough to
+ * pull up?
+ *
+ * If we are looking at an append-relation member, we can't pull it up
+ * unless is_safe_append_member says so.
+ */
+ if (rte->rtekind == RTE_SUBQUERY &&
+ is_simple_subquery(root, rte->subquery, rte, lowest_outer_join) &&
+ (containing_appendrel == NULL ||
+ is_safe_append_member(rte->subquery)))
+ return pull_up_simple_subquery(root, jtnode, rte,
+ lowest_outer_join,
+ lowest_nulling_outer_join,
+ containing_appendrel);
+
+ /*
+ * Alternatively, is it a simple UNION ALL subquery? If so, flatten
+ * into an "append relation".
+ *
+ * It's safe to do this regardless of whether this query is itself an
+ * appendrel member. (If you're thinking we should try to flatten the
+ * two levels of appendrel together, you're right; but we handle that
+ * in set_append_rel_pathlist, not here.)
+ */
+ if (rte->rtekind == RTE_SUBQUERY &&
+ is_simple_union_all(rte->subquery))
+ return pull_up_simple_union_all(root, jtnode, rte);
+
+ /*
+ * Or perhaps it's a simple VALUES RTE?
+ *
+ * We don't allow VALUES pullup below an outer join nor into an
+ * appendrel (such cases are impossible anyway at the moment).
+ */
+ if (rte->rtekind == RTE_VALUES &&
+ lowest_outer_join == NULL &&
+ containing_appendrel == NULL &&
+ is_simple_values(root, rte))
+ return pull_up_simple_values(root, jtnode, rte);
+
+ /*
+ * Or perhaps it's a FUNCTION RTE that we could inline?
+ */
+ if (rte->rtekind == RTE_FUNCTION)
+ return pull_up_constant_function(root, jtnode, rte,
+ lowest_nulling_outer_join,
+ containing_appendrel);
+
+ /* Otherwise, do nothing at this node. */
+ }
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ ListCell *l;
+
+ Assert(containing_appendrel == NULL);
+ /* Recursively transform all the child nodes */
+ foreach(l, f->fromlist)
+ {
+ lfirst(l) = pull_up_subqueries_recurse(root, lfirst(l),
+ lowest_outer_join,
+ lowest_nulling_outer_join,
+ NULL);
+ }
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+
+ Assert(containing_appendrel == NULL);
+ /* Recurse, being careful to tell myself when inside outer join */
+ switch (j->jointype)
+ {
+ case JOIN_INNER:
+ j->larg = pull_up_subqueries_recurse(root, j->larg,
+ lowest_outer_join,
+ lowest_nulling_outer_join,
+ NULL);
+ j->rarg = pull_up_subqueries_recurse(root, j->rarg,
+ lowest_outer_join,
+ lowest_nulling_outer_join,
+ NULL);
+ break;
+ case JOIN_LEFT:
+ case JOIN_SEMI:
+ case JOIN_ANTI:
+ j->larg = pull_up_subqueries_recurse(root, j->larg,
+ j,
+ lowest_nulling_outer_join,
+ NULL);
+ j->rarg = pull_up_subqueries_recurse(root, j->rarg,
+ j,
+ j,
+ NULL);
+ break;
+ case JOIN_FULL:
+ j->larg = pull_up_subqueries_recurse(root, j->larg,
+ j,
+ j,
+ NULL);
+ j->rarg = pull_up_subqueries_recurse(root, j->rarg,
+ j,
+ j,
+ NULL);
+ break;
+ case JOIN_RIGHT:
+ j->larg = pull_up_subqueries_recurse(root, j->larg,
+ j,
+ j,
+ NULL);
+ j->rarg = pull_up_subqueries_recurse(root, j->rarg,
+ j,
+ lowest_nulling_outer_join,
+ NULL);
+ break;
+ default:
+ elog(ERROR, "unrecognized join type: %d",
+ (int) j->jointype);
+ break;
+ }
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+ return jtnode;
+}
+
+/*
+ * pull_up_simple_subquery
+ * Attempt to pull up a single simple subquery.
+ *
+ * jtnode is a RangeTblRef that has been tentatively identified as a simple
+ * subquery by pull_up_subqueries. We return the replacement jointree node,
+ * or jtnode itself if we determine that the subquery can't be pulled up
+ * after all.
+ *
+ * rte is the RangeTblEntry referenced by jtnode. Remaining parameters are
+ * as for pull_up_subqueries_recurse.
+ */
+static Node *
+pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
+ JoinExpr *lowest_outer_join,
+ JoinExpr *lowest_nulling_outer_join,
+ AppendRelInfo *containing_appendrel)
+{
+ Query *parse = root->parse;
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+ Query *subquery;
+ PlannerInfo *subroot;
+ int rtoffset;
+ pullup_replace_vars_context rvcontext;
+ ListCell *lc;
+
+ /*
+ * Need a modifiable copy of the subquery to hack on. Even if we didn't
+ * sometimes choose not to pull up below, we must do this to avoid
+ * problems if the same subquery is referenced from multiple jointree
+ * items (which can't happen normally, but might after rule rewriting).
+ */
+ subquery = copyObject(rte->subquery);
+
+ /*
+ * Create a PlannerInfo data structure for this subquery.
+ *
+ * NOTE: the next few steps should match the first processing in
+ * subquery_planner(). Can we refactor to avoid code duplication, or
+ * would that just make things uglier?
+ */
+ subroot = makeNode(PlannerInfo);
+ subroot->parse = subquery;
+ subroot->glob = root->glob;
+ subroot->query_level = root->query_level;
+ subroot->parent_root = root->parent_root;
+ subroot->plan_params = NIL;
+ subroot->outer_params = NULL;
+ subroot->planner_cxt = CurrentMemoryContext;
+ subroot->init_plans = NIL;
+ subroot->cte_plan_ids = NIL;
+ subroot->multiexpr_params = NIL;
+ subroot->eq_classes = NIL;
+ subroot->ec_merging_done = false;
+ subroot->all_result_relids = NULL;
+ subroot->leaf_result_relids = NULL;
+ subroot->append_rel_list = NIL;
+ subroot->row_identity_vars = NIL;
+ subroot->rowMarks = NIL;
+ memset(subroot->upper_rels, 0, sizeof(subroot->upper_rels));
+ memset(subroot->upper_targets, 0, sizeof(subroot->upper_targets));
+ subroot->processed_tlist = NIL;
+ subroot->update_colnos = NIL;
+ subroot->grouping_map = NULL;
+ subroot->minmax_aggs = NIL;
+ subroot->qual_security_level = 0;
+ subroot->hasRecursion = false;
+ subroot->wt_param_id = -1;
+ subroot->non_recursive_path = NULL;
+
+ /* No CTEs to worry about */
+ Assert(subquery->cteList == NIL);
+
+ /*
+ * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
+ * that we don't need so many special cases to deal with that situation.
+ */
+ replace_empty_jointree(subquery);
+
+ /*
+ * Pull up any SubLinks within the subquery's quals, so that we don't
+ * leave unoptimized SubLinks behind.
+ */
+ if (subquery->hasSubLinks)
+ pull_up_sublinks(subroot);
+
+ /*
+ * Similarly, preprocess its function RTEs to inline any set-returning
+ * functions in its rangetable.
+ */
+ preprocess_function_rtes(subroot);
+
+ /*
+ * Recursively pull up the subquery's subqueries, so that
+ * pull_up_subqueries' processing is complete for its jointree and
+ * rangetable.
+ *
+ * Note: it's okay that the subquery's recursion starts with NULL for
+ * containing-join info, even if we are within an outer join in the upper
+ * query; the lower query starts with a clean slate for outer-join
+ * semantics. Likewise, we needn't pass down appendrel state.
+ */
+ pull_up_subqueries(subroot);
+
+ /*
+ * Now we must recheck whether the subquery is still simple enough to pull
+ * up. If not, abandon processing it.
+ *
+ * We don't really need to recheck all the conditions involved, but it's
+ * easier just to keep this "if" looking the same as the one in
+ * pull_up_subqueries_recurse.
+ */
+ if (is_simple_subquery(root, subquery, rte, lowest_outer_join) &&
+ (containing_appendrel == NULL || is_safe_append_member(subquery)))
+ {
+ /* good to go */
+ }
+ else
+ {
+ /*
+ * Give up, return unmodified RangeTblRef.
+ *
+ * Note: The work we just did will be redone when the subquery gets
+ * planned on its own. Perhaps we could avoid that by storing the
+ * modified subquery back into the rangetable, but I'm not gonna risk
+ * it now.
+ */
+ return jtnode;
+ }
+
+ /*
+ * We must flatten any join alias Vars in the subquery's targetlist,
+ * because pulling up the subquery's subqueries might have changed their
+ * expansions into arbitrary expressions, which could affect
+ * pullup_replace_vars' decisions about whether PlaceHolderVar wrappers
+ * are needed for tlist entries. (Likely it'd be better to do
+ * flatten_join_alias_vars on the whole query tree at some earlier stage,
+ * maybe even in the rewriter; but for now let's just fix this case here.)
+ */
+ subquery->targetList = (List *)
+ flatten_join_alias_vars(subroot->parse, (Node *) subquery->targetList);
+
+ /*
+ * Adjust level-0 varnos in subquery so that we can append its rangetable
+ * to upper query's. We have to fix the subquery's append_rel_list as
+ * well.
+ */
+ rtoffset = list_length(parse->rtable);
+ OffsetVarNodes((Node *) subquery, rtoffset, 0);
+ OffsetVarNodes((Node *) subroot->append_rel_list, rtoffset, 0);
+
+ /*
+ * Upper-level vars in subquery are now one level closer to their parent
+ * than before.
+ */
+ IncrementVarSublevelsUp((Node *) subquery, -1, 1);
+ IncrementVarSublevelsUp((Node *) subroot->append_rel_list, -1, 1);
+
+ /*
+ * The subquery's targetlist items are now in the appropriate form to
+ * insert into the top query, except that we may need to wrap them in
+ * PlaceHolderVars. Set up required context data for pullup_replace_vars.
+ */
+ rvcontext.root = root;
+ rvcontext.targetlist = subquery->targetList;
+ rvcontext.target_rte = rte;
+ if (rte->lateral)
+ rvcontext.relids = get_relids_in_jointree((Node *) subquery->jointree,
+ true);
+ else /* won't need relids */
+ rvcontext.relids = NULL;
+ rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
+ rvcontext.varno = varno;
+ /* these flags will be set below, if needed */
+ rvcontext.need_phvs = false;
+ rvcontext.wrap_non_vars = false;
+ /* initialize cache array with indexes 0 .. length(tlist) */
+ rvcontext.rv_cache = palloc0((list_length(subquery->targetList) + 1) *
+ sizeof(Node *));
+
+ /*
+ * If we are under an outer join then non-nullable items and lateral
+ * references may have to be turned into PlaceHolderVars.
+ */
+ if (lowest_nulling_outer_join != NULL)
+ rvcontext.need_phvs = true;
+
+ /*
+ * If we are dealing with an appendrel member then anything that's not a
+ * simple Var has to be turned into a PlaceHolderVar. We force this to
+ * ensure that what we pull up doesn't get merged into a surrounding
+ * expression during later processing and then fail to match the
+ * expression actually available from the appendrel.
+ */
+ if (containing_appendrel != NULL)
+ {
+ rvcontext.need_phvs = true;
+ rvcontext.wrap_non_vars = true;
+ }
+
+ /*
+ * If the parent query uses grouping sets, we need a PlaceHolderVar for
+ * anything that's not a simple Var. Again, this ensures that expressions
+ * retain their separate identity so that they will match grouping set
+ * columns when appropriate. (It'd be sufficient to wrap values used in
+ * grouping set columns, and do so only in non-aggregated portions of the
+ * tlist and havingQual, but that would require a lot of infrastructure
+ * that pullup_replace_vars hasn't currently got.)
+ */
+ if (parse->groupingSets)
+ {
+ rvcontext.need_phvs = true;
+ rvcontext.wrap_non_vars = true;
+ }
+
+ /*
+ * Replace all of the top query's references to the subquery's outputs
+ * with copies of the adjusted subtlist items, being careful not to
+ * replace any of the jointree structure.
+ */
+ perform_pullup_replace_vars(root, &rvcontext,
+ lowest_nulling_outer_join,
+ containing_appendrel);
+
+ /*
+ * If the subquery had a LATERAL marker, propagate that to any of its
+ * child RTEs that could possibly now contain lateral cross-references.
+ * The children might or might not contain any actual lateral
+ * cross-references, but we have to mark the pulled-up child RTEs so that
+ * later planner stages will check for such.
+ */
+ if (rte->lateral)
+ {
+ foreach(lc, subquery->rtable)
+ {
+ RangeTblEntry *child_rte = (RangeTblEntry *) lfirst(lc);
+
+ switch (child_rte->rtekind)
+ {
+ case RTE_RELATION:
+ if (child_rte->tablesample)
+ child_rte->lateral = true;
+ break;
+ case RTE_SUBQUERY:
+ case RTE_FUNCTION:
+ case RTE_VALUES:
+ case RTE_TABLEFUNC:
+ child_rte->lateral = true;
+ break;
+ case RTE_JOIN:
+ case RTE_CTE:
+ case RTE_NAMEDTUPLESTORE:
+ case RTE_RESULT:
+ /* these can't contain any lateral references */
+ break;
+ }
+ }
+ }
+
+ /*
+ * Now append the adjusted rtable entries to upper query. (We hold off
+ * until after fixing the upper rtable entries; no point in running that
+ * code on the subquery ones too.)
+ */
+ parse->rtable = list_concat(parse->rtable, subquery->rtable);
+
+ /*
+ * Pull up any FOR UPDATE/SHARE markers, too. (OffsetVarNodes already
+ * adjusted the marker rtindexes, so just concat the lists.)
+ */
+ parse->rowMarks = list_concat(parse->rowMarks, subquery->rowMarks);
+
+ /*
+ * We also have to fix the relid sets of any PlaceHolderVar nodes in the
+ * parent query. (This could perhaps be done by pullup_replace_vars(),
+ * but it seems cleaner to use two passes.) Note in particular that any
+ * PlaceHolderVar nodes just created by pullup_replace_vars() will be
+ * adjusted, so having created them with the subquery's varno is correct.
+ *
+ * Likewise, relids appearing in AppendRelInfo nodes have to be fixed. We
+ * already checked that this won't require introducing multiple subrelids
+ * into the single-slot AppendRelInfo structs.
+ */
+ if (parse->hasSubLinks || root->glob->lastPHId != 0 ||
+ root->append_rel_list)
+ {
+ Relids subrelids;
+
+ subrelids = get_relids_in_jointree((Node *) subquery->jointree, false);
+ substitute_phv_relids((Node *) parse, varno, subrelids);
+ fix_append_rel_relids(root->append_rel_list, varno, subrelids);
+ }
+
+ /*
+ * And now add subquery's AppendRelInfos to our list.
+ */
+ root->append_rel_list = list_concat(root->append_rel_list,
+ subroot->append_rel_list);
+
+ /*
+ * We don't have to do the equivalent bookkeeping for outer-join info,
+ * because that hasn't been set up yet. placeholder_list likewise.
+ */
+ Assert(root->join_info_list == NIL);
+ Assert(subroot->join_info_list == NIL);
+ Assert(root->placeholder_list == NIL);
+ Assert(subroot->placeholder_list == NIL);
+
+ /*
+ * Miscellaneous housekeeping.
+ *
+ * Although replace_rte_variables() faithfully updated parse->hasSubLinks
+ * if it copied any SubLinks out of the subquery's targetlist, we still
+ * could have SubLinks added to the query in the expressions of FUNCTION
+ * and VALUES RTEs copied up from the subquery. So it's necessary to copy
+ * subquery->hasSubLinks anyway. Perhaps this can be improved someday.
+ */
+ parse->hasSubLinks |= subquery->hasSubLinks;
+
+ /* If subquery had any RLS conditions, now main query does too */
+ parse->hasRowSecurity |= subquery->hasRowSecurity;
+
+ /*
+ * subquery won't be pulled up if it hasAggs, hasWindowFuncs, or
+ * hasTargetSRFs, so no work needed on those flags
+ */
+
+ /*
+ * Return the adjusted subquery jointree to replace the RangeTblRef entry
+ * in parent's jointree; or, if the FromExpr is degenerate, just return
+ * its single member.
+ */
+ Assert(IsA(subquery->jointree, FromExpr));
+ Assert(subquery->jointree->fromlist != NIL);
+ if (subquery->jointree->quals == NULL &&
+ list_length(subquery->jointree->fromlist) == 1)
+ return (Node *) linitial(subquery->jointree->fromlist);
+
+ return (Node *) subquery->jointree;
+}
+
+/*
+ * pull_up_simple_union_all
+ * Pull up a single simple UNION ALL subquery.
+ *
+ * jtnode is a RangeTblRef that has been identified as a simple UNION ALL
+ * subquery by pull_up_subqueries. We pull up the leaf subqueries and
+ * build an "append relation" for the union set. The result value is just
+ * jtnode, since we don't actually need to change the query jointree.
+ */
+static Node *
+pull_up_simple_union_all(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte)
+{
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+ Query *subquery = rte->subquery;
+ int rtoffset = list_length(root->parse->rtable);
+ List *rtable;
+
+ /*
+ * Make a modifiable copy of the subquery's rtable, so we can adjust
+ * upper-level Vars in it. There are no such Vars in the setOperations
+ * tree proper, so fixing the rtable should be sufficient.
+ */
+ rtable = copyObject(subquery->rtable);
+
+ /*
+ * Upper-level vars in subquery are now one level closer to their parent
+ * than before. We don't have to worry about offsetting varnos, though,
+ * because the UNION leaf queries can't cross-reference each other.
+ */
+ IncrementVarSublevelsUp_rtable(rtable, -1, 1);
+
+ /*
+ * If the UNION ALL subquery had a LATERAL marker, propagate that to all
+ * its children. The individual children might or might not contain any
+ * actual lateral cross-references, but we have to mark the pulled-up
+ * child RTEs so that later planner stages will check for such.
+ */
+ if (rte->lateral)
+ {
+ ListCell *rt;
+
+ foreach(rt, rtable)
+ {
+ RangeTblEntry *child_rte = (RangeTblEntry *) lfirst(rt);
+
+ Assert(child_rte->rtekind == RTE_SUBQUERY);
+ child_rte->lateral = true;
+ }
+ }
+
+ /*
+ * Append child RTEs to parent rtable.
+ */
+ root->parse->rtable = list_concat(root->parse->rtable, rtable);
+
+ /*
+ * Recursively scan the subquery's setOperations tree and add
+ * AppendRelInfo nodes for leaf subqueries to the parent's
+ * append_rel_list. Also apply pull_up_subqueries to the leaf subqueries.
+ */
+ Assert(subquery->setOperations);
+ pull_up_union_leaf_queries(subquery->setOperations, root, varno, subquery,
+ rtoffset);
+
+ /*
+ * Mark the parent as an append relation.
+ */
+ rte->inh = true;
+
+ return jtnode;
+}
+
+/*
+ * pull_up_union_leaf_queries -- recursive guts of pull_up_simple_union_all
+ *
+ * Build an AppendRelInfo for each leaf query in the setop tree, and then
+ * apply pull_up_subqueries to the leaf query.
+ *
+ * Note that setOpQuery is the Query containing the setOp node, whose tlist
+ * contains references to all the setop output columns. When called from
+ * pull_up_simple_union_all, this is *not* the same as root->parse, which is
+ * the parent Query we are pulling up into.
+ *
+ * parentRTindex is the appendrel parent's index in root->parse->rtable.
+ *
+ * The child RTEs have already been copied to the parent. childRToffset
+ * tells us where in the parent's range table they were copied. When called
+ * from flatten_simple_union_all, childRToffset is 0 since the child RTEs
+ * were already in root->parse->rtable and no RT index adjustment is needed.
+ */
+static void
+pull_up_union_leaf_queries(Node *setOp, PlannerInfo *root, int parentRTindex,
+ Query *setOpQuery, int childRToffset)
+{
+ if (IsA(setOp, RangeTblRef))
+ {
+ RangeTblRef *rtr = (RangeTblRef *) setOp;
+ int childRTindex;
+ AppendRelInfo *appinfo;
+
+ /*
+ * Calculate the index in the parent's range table
+ */
+ childRTindex = childRToffset + rtr->rtindex;
+
+ /*
+ * Build a suitable AppendRelInfo, and attach to parent's list.
+ */
+ appinfo = makeNode(AppendRelInfo);
+ appinfo->parent_relid = parentRTindex;
+ appinfo->child_relid = childRTindex;
+ appinfo->parent_reltype = InvalidOid;
+ appinfo->child_reltype = InvalidOid;
+ make_setop_translation_list(setOpQuery, childRTindex, appinfo);
+ appinfo->parent_reloid = InvalidOid;
+ root->append_rel_list = lappend(root->append_rel_list, appinfo);
+
+ /*
+ * Recursively apply pull_up_subqueries to the new child RTE. (We
+ * must build the AppendRelInfo first, because this will modify it.)
+ * Note that we can pass NULL for containing-join info even if we're
+ * actually under an outer join, because the child's expressions
+ * aren't going to propagate up to the join. Also, we ignore the
+ * possibility that pull_up_subqueries_recurse() returns a different
+ * jointree node than what we pass it; if it does, the important thing
+ * is that it replaced the child relid in the AppendRelInfo node.
+ */
+ rtr = makeNode(RangeTblRef);
+ rtr->rtindex = childRTindex;
+ (void) pull_up_subqueries_recurse(root, (Node *) rtr,
+ NULL, NULL, appinfo);
+ }
+ else if (IsA(setOp, SetOperationStmt))
+ {
+ SetOperationStmt *op = (SetOperationStmt *) setOp;
+
+ /* Recurse to reach leaf queries */
+ pull_up_union_leaf_queries(op->larg, root, parentRTindex, setOpQuery,
+ childRToffset);
+ pull_up_union_leaf_queries(op->rarg, root, parentRTindex, setOpQuery,
+ childRToffset);
+ }
+ else
+ {
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(setOp));
+ }
+}
+
+/*
+ * make_setop_translation_list
+ * Build the list of translations from parent Vars to child Vars for
+ * a UNION ALL member. (At this point it's just a simple list of
+ * referencing Vars, but if we succeed in pulling up the member
+ * subquery, the Vars will get replaced by pulled-up expressions.)
+ * Also create the rather trivial reverse-translation array.
+ */
+static void
+make_setop_translation_list(Query *query, Index newvarno,
+ AppendRelInfo *appinfo)
+{
+ List *vars = NIL;
+ AttrNumber *pcolnos;
+ ListCell *l;
+
+ /* Initialize reverse-translation array with all entries zero */
+ /* (entries for resjunk columns will stay that way) */
+ appinfo->num_child_cols = list_length(query->targetList);
+ appinfo->parent_colnos = pcolnos =
+ (AttrNumber *) palloc0(appinfo->num_child_cols * sizeof(AttrNumber));
+
+ foreach(l, query->targetList)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+ if (tle->resjunk)
+ continue;
+
+ vars = lappend(vars, makeVarFromTargetEntry(newvarno, tle));
+ pcolnos[tle->resno - 1] = tle->resno;
+ }
+
+ appinfo->translated_vars = vars;
+}
+
+/*
+ * is_simple_subquery
+ * Check a subquery in the range table to see if it's simple enough
+ * to pull up into the parent query.
+ *
+ * rte is the RTE_SUBQUERY RangeTblEntry that contained the subquery.
+ * (Note subquery is not necessarily equal to rte->subquery; it could be a
+ * processed copy of that.)
+ * lowest_outer_join is the lowest outer join above the subquery, or NULL.
+ */
+static bool
+is_simple_subquery(PlannerInfo *root, Query *subquery, RangeTblEntry *rte,
+ JoinExpr *lowest_outer_join)
+{
+ /*
+ * Let's just make sure it's a valid subselect ...
+ */
+ if (!IsA(subquery, Query) ||
+ subquery->commandType != CMD_SELECT)
+ elog(ERROR, "subquery is bogus");
+
+ /*
+ * Can't currently pull up a query with setops (unless it's simple UNION
+ * ALL, which is handled by a different code path). Maybe after querytree
+ * redesign...
+ */
+ if (subquery->setOperations)
+ return false;
+
+ /*
+ * Can't pull up a subquery involving grouping, aggregation, SRFs,
+ * sorting, limiting, or WITH. (XXX WITH could possibly be allowed later)
+ *
+ * We also don't pull up a subquery that has explicit FOR UPDATE/SHARE
+ * clauses, because pullup would cause the locking to occur semantically
+ * higher than it should. Implicit FOR UPDATE/SHARE is okay because in
+ * that case the locking was originally declared in the upper query
+ * anyway.
+ */
+ if (subquery->hasAggs ||
+ subquery->hasWindowFuncs ||
+ subquery->hasTargetSRFs ||
+ subquery->groupClause ||
+ subquery->groupingSets ||
+ subquery->havingQual ||
+ subquery->sortClause ||
+ subquery->distinctClause ||
+ subquery->limitOffset ||
+ subquery->limitCount ||
+ subquery->hasForUpdate ||
+ subquery->cteList)
+ return false;
+
+ /*
+ * Don't pull up if the RTE represents a security-barrier view; we
+ * couldn't prevent information leakage once the RTE's Vars are scattered
+ * about in the upper query.
+ */
+ if (rte->security_barrier)
+ return false;
+
+ /*
+ * If the subquery is LATERAL, check for pullup restrictions from that.
+ */
+ if (rte->lateral)
+ {
+ bool restricted;
+ Relids safe_upper_varnos;
+
+ /*
+ * The subquery's WHERE and JOIN/ON quals mustn't contain any lateral
+ * references to rels outside a higher outer join (including the case
+ * where the outer join is within the subquery itself). In such a
+ * case, pulling up would result in a situation where we need to
+ * postpone quals from below an outer join to above it, which is
+ * probably completely wrong and in any case is a complication that
+ * doesn't seem worth addressing at the moment.
+ */
+ if (lowest_outer_join != NULL)
+ {
+ restricted = true;
+ safe_upper_varnos = get_relids_in_jointree((Node *) lowest_outer_join,
+ true);
+ }
+ else
+ {
+ restricted = false;
+ safe_upper_varnos = NULL; /* doesn't matter */
+ }
+
+ if (jointree_contains_lateral_outer_refs(root,
+ (Node *) subquery->jointree,
+ restricted, safe_upper_varnos))
+ return false;
+
+ /*
+ * If there's an outer join above the LATERAL subquery, also disallow
+ * pullup if the subquery's targetlist has any references to rels
+ * outside the outer join, since these might get pulled into quals
+ * above the subquery (but in or below the outer join) and then lead
+ * to qual-postponement issues similar to the case checked for above.
+ * (We wouldn't need to prevent pullup if no such references appear in
+ * outer-query quals, but we don't have enough info here to check
+ * that. Also, maybe this restriction could be removed if we forced
+ * such refs to be wrapped in PlaceHolderVars, even when they're below
+ * the nearest outer join? But it's a pretty hokey usage, so not
+ * clear this is worth sweating over.)
+ */
+ if (lowest_outer_join != NULL)
+ {
+ Relids lvarnos = pull_varnos_of_level(root,
+ (Node *) subquery->targetList,
+ 1);
+
+ if (!bms_is_subset(lvarnos, safe_upper_varnos))
+ return false;
+ }
+ }
+
+ /*
+ * Don't pull up a subquery that has any volatile functions in its
+ * targetlist. Otherwise we might introduce multiple evaluations of these
+ * functions, if they get copied to multiple places in the upper query,
+ * leading to surprising results. (Note: the PlaceHolderVar mechanism
+ * doesn't quite guarantee single evaluation; else we could pull up anyway
+ * and just wrap such items in PlaceHolderVars ...)
+ */
+ if (contain_volatile_functions((Node *) subquery->targetList))
+ return false;
+
+ return true;
+}
+
+/*
+ * pull_up_simple_values
+ * Pull up a single simple VALUES RTE.
+ *
+ * jtnode is a RangeTblRef that has been identified as a simple VALUES RTE
+ * by pull_up_subqueries. We always return a RangeTblRef representing a
+ * RESULT RTE to replace it (all failure cases should have been detected by
+ * is_simple_values()). Actually, what we return is just jtnode, because
+ * we replace the VALUES RTE in the rangetable with the RESULT RTE.
+ *
+ * rte is the RangeTblEntry referenced by jtnode. Because of the limited
+ * possible usage of VALUES RTEs, we do not need the remaining parameters
+ * of pull_up_subqueries_recurse.
+ */
+static Node *
+pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte)
+{
+ Query *parse = root->parse;
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+ List *values_list;
+ List *tlist;
+ AttrNumber attrno;
+ pullup_replace_vars_context rvcontext;
+ ListCell *lc;
+
+ Assert(rte->rtekind == RTE_VALUES);
+ Assert(list_length(rte->values_lists) == 1);
+
+ /*
+ * Need a modifiable copy of the VALUES list to hack on, just in case it's
+ * multiply referenced.
+ */
+ values_list = copyObject(linitial(rte->values_lists));
+
+ /*
+ * The VALUES RTE can't contain any Vars of level zero, let alone any that
+ * are join aliases, so no need to flatten join alias Vars.
+ */
+ Assert(!contain_vars_of_level((Node *) values_list, 0));
+
+ /*
+ * Set up required context data for pullup_replace_vars. In particular,
+ * we have to make the VALUES list look like a subquery targetlist.
+ */
+ tlist = NIL;
+ attrno = 1;
+ foreach(lc, values_list)
+ {
+ tlist = lappend(tlist,
+ makeTargetEntry((Expr *) lfirst(lc),
+ attrno,
+ NULL,
+ false));
+ attrno++;
+ }
+ rvcontext.root = root;
+ rvcontext.targetlist = tlist;
+ rvcontext.target_rte = rte;
+ rvcontext.relids = NULL;
+ rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
+ rvcontext.varno = varno;
+ rvcontext.need_phvs = false;
+ rvcontext.wrap_non_vars = false;
+ /* initialize cache array with indexes 0 .. length(tlist) */
+ rvcontext.rv_cache = palloc0((list_length(tlist) + 1) *
+ sizeof(Node *));
+
+ /*
+ * Replace all of the top query's references to the RTE's outputs with
+ * copies of the adjusted VALUES expressions, being careful not to replace
+ * any of the jointree structure. We can assume there's no outer joins or
+ * appendrels in the dummy Query that surrounds a VALUES RTE.
+ */
+ perform_pullup_replace_vars(root, &rvcontext, NULL, NULL);
+
+ /*
+ * There should be no appendrels to fix, nor any outer joins and hence no
+ * PlaceHolderVars.
+ */
+ Assert(root->append_rel_list == NIL);
+ Assert(root->join_info_list == NIL);
+ Assert(root->placeholder_list == NIL);
+
+ /*
+ * Replace the VALUES RTE with a RESULT RTE. The VALUES RTE is the only
+ * rtable entry in the current query level, so this is easy.
+ */
+ Assert(list_length(parse->rtable) == 1);
+
+ /* Create suitable RTE */
+ rte = makeNode(RangeTblEntry);
+ rte->rtekind = RTE_RESULT;
+ rte->eref = makeAlias("*RESULT*", NIL);
+
+ /* Replace rangetable */
+ parse->rtable = list_make1(rte);
+
+ /* We could manufacture a new RangeTblRef, but the one we have is fine */
+ Assert(varno == 1);
+
+ return jtnode;
+}
+
+/*
+ * is_simple_values
+ * Check a VALUES RTE in the range table to see if it's simple enough
+ * to pull up into the parent query.
+ *
+ * rte is the RTE_VALUES RangeTblEntry to check.
+ */
+static bool
+is_simple_values(PlannerInfo *root, RangeTblEntry *rte)
+{
+ Assert(rte->rtekind == RTE_VALUES);
+
+ /*
+ * There must be exactly one VALUES list, else it's not semantically
+ * correct to replace the VALUES RTE with a RESULT RTE, nor would we have
+ * a unique set of expressions to substitute into the parent query.
+ */
+ if (list_length(rte->values_lists) != 1)
+ return false;
+
+ /*
+ * Because VALUES can't appear under an outer join (or at least, we won't
+ * try to pull it up if it does), we need not worry about LATERAL, nor
+ * about validity of PHVs for the VALUES' outputs.
+ */
+
+ /*
+ * Don't pull up a VALUES that contains any set-returning or volatile
+ * functions. The considerations here are basically identical to the
+ * restrictions on a pull-able subquery's targetlist.
+ */
+ if (expression_returns_set((Node *) rte->values_lists) ||
+ contain_volatile_functions((Node *) rte->values_lists))
+ return false;
+
+ /*
+ * Do not pull up a VALUES that's not the only RTE in its parent query.
+ * This is actually the only case that the parser will generate at the
+ * moment, and assuming this is true greatly simplifies
+ * pull_up_simple_values().
+ */
+ if (list_length(root->parse->rtable) != 1 ||
+ rte != (RangeTblEntry *) linitial(root->parse->rtable))
+ return false;
+
+ return true;
+}
+
+/*
+ * pull_up_constant_function
+ * Pull up an RTE_FUNCTION expression that was simplified to a constant.
+ *
+ * jtnode is a RangeTblRef that has been identified as a FUNCTION RTE by
+ * pull_up_subqueries. If its expression is just a Const, hoist that value
+ * up into the parent query, and replace the RTE_FUNCTION with RTE_RESULT.
+ *
+ * In principle we could pull up any immutable expression, but we don't.
+ * That might result in multiple evaluations of the expression, which could
+ * be costly if it's not just a Const. Also, the main value of this is
+ * to let the constant participate in further const-folding, and of course
+ * that won't happen for a non-Const.
+ *
+ * The pulled-up value might need to be wrapped in a PlaceHolderVar if the
+ * RTE is below an outer join or is part of an appendrel; the extra
+ * parameters show whether that's needed.
+ */
+static Node *
+pull_up_constant_function(PlannerInfo *root, Node *jtnode,
+ RangeTblEntry *rte,
+ JoinExpr *lowest_nulling_outer_join,
+ AppendRelInfo *containing_appendrel)
+{
+ Query *parse = root->parse;
+ RangeTblFunction *rtf;
+ TypeFuncClass functypclass;
+ Oid funcrettype;
+ TupleDesc tupdesc;
+ pullup_replace_vars_context rvcontext;
+
+ /* Fail if the RTE has ORDINALITY - we don't implement that here. */
+ if (rte->funcordinality)
+ return jtnode;
+
+ /* Fail if RTE isn't a single, simple Const expr */
+ if (list_length(rte->functions) != 1)
+ return jtnode;
+ rtf = linitial_node(RangeTblFunction, rte->functions);
+ if (!IsA(rtf->funcexpr, Const))
+ return jtnode;
+
+ /*
+ * If the function's result is not a scalar, we punt. In principle we
+ * could break the composite constant value apart into per-column
+ * constants, but for now it seems not worth the work.
+ */
+ if (rtf->funccolcount != 1)
+ return jtnode; /* definitely composite */
+
+ functypclass = get_expr_result_type(rtf->funcexpr,
+ &funcrettype,
+ &tupdesc);
+ if (functypclass != TYPEFUNC_SCALAR)
+ return jtnode; /* must be a one-column composite type */
+
+ /* Create context for applying pullup_replace_vars */
+ rvcontext.root = root;
+ rvcontext.targetlist = list_make1(makeTargetEntry((Expr *) rtf->funcexpr,
+ 1, /* resno */
+ NULL, /* resname */
+ false)); /* resjunk */
+ rvcontext.target_rte = rte;
+
+ /*
+ * Since this function was reduced to a Const, it doesn't contain any
+ * lateral references, even if it's marked as LATERAL. This means we
+ * don't need to fill relids.
+ */
+ rvcontext.relids = NULL;
+
+ rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
+ rvcontext.varno = ((RangeTblRef *) jtnode)->rtindex;
+ /* these flags will be set below, if needed */
+ rvcontext.need_phvs = false;
+ rvcontext.wrap_non_vars = false;
+ /* initialize cache array with indexes 0 .. length(tlist) */
+ rvcontext.rv_cache = palloc0((list_length(rvcontext.targetlist) + 1) *
+ sizeof(Node *));
+
+ /*
+ * If we are under an outer join then non-nullable items and lateral
+ * references may have to be turned into PlaceHolderVars.
+ */
+ if (lowest_nulling_outer_join != NULL)
+ rvcontext.need_phvs = true;
+
+ /*
+ * If we are dealing with an appendrel member then anything that's not a
+ * simple Var has to be turned into a PlaceHolderVar. (See comments in
+ * pull_up_simple_subquery().)
+ */
+ if (containing_appendrel != NULL)
+ {
+ rvcontext.need_phvs = true;
+ rvcontext.wrap_non_vars = true;
+ }
+
+ /*
+ * If the parent query uses grouping sets, we need a PlaceHolderVar for
+ * anything that's not a simple Var.
+ */
+ if (parse->groupingSets)
+ {
+ rvcontext.need_phvs = true;
+ rvcontext.wrap_non_vars = true;
+ }
+
+ /*
+ * Replace all of the top query's references to the RTE's output with
+ * copies of the funcexpr, being careful not to replace any of the
+ * jointree structure.
+ */
+ perform_pullup_replace_vars(root, &rvcontext,
+ lowest_nulling_outer_join,
+ containing_appendrel);
+
+ /*
+ * We don't need to bother with changing PlaceHolderVars in the parent
+ * query. Their references to the RT index are still good for now, and
+ * will get removed later if we're able to drop the RTE_RESULT.
+ */
+
+ /*
+ * Convert the RTE to be RTE_RESULT type, signifying that we don't need to
+ * scan it anymore, and zero out RTE_FUNCTION-specific fields. Also make
+ * sure the RTE is not marked LATERAL, since elsewhere we don't expect
+ * RTE_RESULTs to be LATERAL.
+ */
+ rte->rtekind = RTE_RESULT;
+ rte->functions = NIL;
+ rte->lateral = false;
+
+ /*
+ * We can reuse the RangeTblRef node.
+ */
+ return jtnode;
+}
+
+/*
+ * is_simple_union_all
+ * Check a subquery to see if it's a simple UNION ALL.
+ *
+ * We require all the setops to be UNION ALL (no mixing) and there can't be
+ * any datatype coercions involved, ie, all the leaf queries must emit the
+ * same datatypes.
+ */
+static bool
+is_simple_union_all(Query *subquery)
+{
+ SetOperationStmt *topop;
+
+ /* Let's just make sure it's a valid subselect ... */
+ if (!IsA(subquery, Query) ||
+ subquery->commandType != CMD_SELECT)
+ elog(ERROR, "subquery is bogus");
+
+ /* Is it a set-operation query at all? */
+ topop = castNode(SetOperationStmt, subquery->setOperations);
+ if (!topop)
+ return false;
+
+ /* Can't handle ORDER BY, LIMIT/OFFSET, locking, or WITH */
+ if (subquery->sortClause ||
+ subquery->limitOffset ||
+ subquery->limitCount ||
+ subquery->rowMarks ||
+ subquery->cteList)
+ return false;
+
+ /* Recursively check the tree of set operations */
+ return is_simple_union_all_recurse((Node *) topop, subquery,
+ topop->colTypes);
+}
+
+static bool
+is_simple_union_all_recurse(Node *setOp, Query *setOpQuery, List *colTypes)
+{
+ if (IsA(setOp, RangeTblRef))
+ {
+ RangeTblRef *rtr = (RangeTblRef *) setOp;
+ RangeTblEntry *rte = rt_fetch(rtr->rtindex, setOpQuery->rtable);
+ Query *subquery = rte->subquery;
+
+ Assert(subquery != NULL);
+
+ /* Leaf nodes are OK if they match the toplevel column types */
+ /* We don't have to compare typmods or collations here */
+ return tlist_same_datatypes(subquery->targetList, colTypes, true);
+ }
+ else if (IsA(setOp, SetOperationStmt))
+ {
+ SetOperationStmt *op = (SetOperationStmt *) setOp;
+
+ /* Must be UNION ALL */
+ if (op->op != SETOP_UNION || !op->all)
+ return false;
+
+ /* Recurse to check inputs */
+ return is_simple_union_all_recurse(op->larg, setOpQuery, colTypes) &&
+ is_simple_union_all_recurse(op->rarg, setOpQuery, colTypes);
+ }
+ else
+ {
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(setOp));
+ return false; /* keep compiler quiet */
+ }
+}
+
+/*
+ * is_safe_append_member
+ * Check a subquery that is a leaf of a UNION ALL appendrel to see if it's
+ * safe to pull up.
+ */
+static bool
+is_safe_append_member(Query *subquery)
+{
+ FromExpr *jtnode;
+
+ /*
+ * It's only safe to pull up the child if its jointree contains exactly
+ * one RTE, else the AppendRelInfo data structure breaks. The one base RTE
+ * could be buried in several levels of FromExpr, however. Also, if the
+ * child's jointree is completely empty, we can pull up because
+ * pull_up_simple_subquery will insert a single RTE_RESULT RTE instead.
+ *
+ * Also, the child can't have any WHERE quals because there's no place to
+ * put them in an appendrel. (This is a bit annoying...) If we didn't
+ * need to check this, we'd just test whether get_relids_in_jointree()
+ * yields a singleton set, to be more consistent with the coding of
+ * fix_append_rel_relids().
+ */
+ jtnode = subquery->jointree;
+ Assert(IsA(jtnode, FromExpr));
+ /* Check the completely-empty case */
+ if (jtnode->fromlist == NIL && jtnode->quals == NULL)
+ return true;
+ /* Check the more general case */
+ while (IsA(jtnode, FromExpr))
+ {
+ if (jtnode->quals != NULL)
+ return false;
+ if (list_length(jtnode->fromlist) != 1)
+ return false;
+ jtnode = linitial(jtnode->fromlist);
+ }
+ if (!IsA(jtnode, RangeTblRef))
+ return false;
+
+ return true;
+}
+
+/*
+ * jointree_contains_lateral_outer_refs
+ * Check for disallowed lateral references in a jointree's quals
+ *
+ * If restricted is false, all level-1 Vars are allowed (but we still must
+ * search the jointree, since it might contain outer joins below which there
+ * will be restrictions). If restricted is true, return true when any qual
+ * in the jointree contains level-1 Vars coming from outside the rels listed
+ * in safe_upper_varnos.
+ */
+static bool
+jointree_contains_lateral_outer_refs(PlannerInfo *root, Node *jtnode,
+ bool restricted,
+ Relids safe_upper_varnos)
+{
+ if (jtnode == NULL)
+ return false;
+ if (IsA(jtnode, RangeTblRef))
+ return false;
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ ListCell *l;
+
+ /* First, recurse to check child joins */
+ foreach(l, f->fromlist)
+ {
+ if (jointree_contains_lateral_outer_refs(root,
+ lfirst(l),
+ restricted,
+ safe_upper_varnos))
+ return true;
+ }
+
+ /* Then check the top-level quals */
+ if (restricted &&
+ !bms_is_subset(pull_varnos_of_level(root, f->quals, 1),
+ safe_upper_varnos))
+ return true;
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+
+ /*
+ * If this is an outer join, we mustn't allow any upper lateral
+ * references in or below it.
+ */
+ if (j->jointype != JOIN_INNER)
+ {
+ restricted = true;
+ safe_upper_varnos = NULL;
+ }
+
+ /* Check the child joins */
+ if (jointree_contains_lateral_outer_refs(root,
+ j->larg,
+ restricted,
+ safe_upper_varnos))
+ return true;
+ if (jointree_contains_lateral_outer_refs(root,
+ j->rarg,
+ restricted,
+ safe_upper_varnos))
+ return true;
+
+ /* Check the JOIN's qual clauses */
+ if (restricted &&
+ !bms_is_subset(pull_varnos_of_level(root, j->quals, 1),
+ safe_upper_varnos))
+ return true;
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+ return false;
+}
+
+/*
+ * Perform pullup_replace_vars everyplace it's needed in the query tree.
+ *
+ * Caller has already filled *rvcontext with data describing what to
+ * substitute for Vars referencing the target subquery. In addition
+ * we need the identity of the lowest outer join that can null the
+ * target subquery, and its containing appendrel if any.
+ */
+static void
+perform_pullup_replace_vars(PlannerInfo *root,
+ pullup_replace_vars_context *rvcontext,
+ JoinExpr *lowest_nulling_outer_join,
+ AppendRelInfo *containing_appendrel)
+{
+ Query *parse = root->parse;
+ ListCell *lc;
+
+ /*
+ * Replace all of the top query's references to the subquery's outputs
+ * with copies of the adjusted subtlist items, being careful not to
+ * replace any of the jointree structure. (This'd be a lot cleaner if we
+ * could use query_tree_mutator.) We have to use PHVs in the targetList,
+ * returningList, and havingQual, since those are certainly above any
+ * outer join. replace_vars_in_jointree tracks its location in the
+ * jointree and uses PHVs or not appropriately.
+ */
+ parse->targetList = (List *)
+ pullup_replace_vars((Node *) parse->targetList, rvcontext);
+ parse->returningList = (List *)
+ pullup_replace_vars((Node *) parse->returningList, rvcontext);
+ if (parse->onConflict)
+ {
+ parse->onConflict->onConflictSet = (List *)
+ pullup_replace_vars((Node *) parse->onConflict->onConflictSet,
+ rvcontext);
+ parse->onConflict->onConflictWhere =
+ pullup_replace_vars(parse->onConflict->onConflictWhere,
+ rvcontext);
+
+ /*
+ * We assume ON CONFLICT's arbiterElems, arbiterWhere, exclRelTlist
+ * can't contain any references to a subquery.
+ */
+ }
+ replace_vars_in_jointree((Node *) parse->jointree, rvcontext,
+ lowest_nulling_outer_join);
+ Assert(parse->setOperations == NULL);
+ parse->havingQual = pullup_replace_vars(parse->havingQual, rvcontext);
+
+ /*
+ * Replace references in the translated_vars lists of appendrels. When
+ * pulling up an appendrel member, we do not need PHVs in the list of the
+ * parent appendrel --- there isn't any outer join between. Elsewhere,
+ * use PHVs for safety. (This analysis could be made tighter but it seems
+ * unlikely to be worth much trouble.)
+ */
+ foreach(lc, root->append_rel_list)
+ {
+ AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
+ bool save_need_phvs = rvcontext->need_phvs;
+
+ if (appinfo == containing_appendrel)
+ rvcontext->need_phvs = false;
+ appinfo->translated_vars = (List *)
+ pullup_replace_vars((Node *) appinfo->translated_vars, rvcontext);
+ rvcontext->need_phvs = save_need_phvs;
+ }
+
+ /*
+ * Replace references in the joinaliasvars lists of join RTEs.
+ *
+ * You might think that we could avoid using PHVs for alias vars of joins
+ * below lowest_nulling_outer_join, but that doesn't work because the
+ * alias vars could be referenced above that join; we need the PHVs to be
+ * present in such references after the alias vars get flattened. (It
+ * might be worth trying to be smarter here, someday.)
+ */
+ foreach(lc, parse->rtable)
+ {
+ RangeTblEntry *otherrte = (RangeTblEntry *) lfirst(lc);
+
+ if (otherrte->rtekind == RTE_JOIN)
+ otherrte->joinaliasvars = (List *)
+ pullup_replace_vars((Node *) otherrte->joinaliasvars,
+ rvcontext);
+ }
+}
+
+/*
+ * Helper routine for perform_pullup_replace_vars: do pullup_replace_vars on
+ * every expression in the jointree, without changing the jointree structure
+ * itself. Ugly, but there's no other way...
+ *
+ * If we are at or below lowest_nulling_outer_join, we can suppress use of
+ * PlaceHolderVars wrapped around the replacement expressions.
+ */
+static void
+replace_vars_in_jointree(Node *jtnode,
+ pullup_replace_vars_context *context,
+ JoinExpr *lowest_nulling_outer_join)
+{
+ if (jtnode == NULL)
+ return;
+ if (IsA(jtnode, RangeTblRef))
+ {
+ /*
+ * If the RangeTblRef refers to a LATERAL subquery (that isn't the
+ * same subquery we're pulling up), it might contain references to the
+ * target subquery, which we must replace. We drive this from the
+ * jointree scan, rather than a scan of the rtable, for a couple of
+ * reasons: we can avoid processing no-longer-referenced RTEs, and we
+ * can use the appropriate setting of need_phvs depending on whether
+ * the RTE is above possibly-nulling outer joins or not.
+ */
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+
+ if (varno != context->varno) /* ignore target subquery itself */
+ {
+ RangeTblEntry *rte = rt_fetch(varno, context->root->parse->rtable);
+
+ Assert(rte != context->target_rte);
+ if (rte->lateral)
+ {
+ switch (rte->rtekind)
+ {
+ case RTE_RELATION:
+ /* shouldn't be marked LATERAL unless tablesample */
+ Assert(rte->tablesample);
+ rte->tablesample = (TableSampleClause *)
+ pullup_replace_vars((Node *) rte->tablesample,
+ context);
+ break;
+ case RTE_SUBQUERY:
+ rte->subquery =
+ pullup_replace_vars_subquery(rte->subquery,
+ context);
+ break;
+ case RTE_FUNCTION:
+ rte->functions = (List *)
+ pullup_replace_vars((Node *) rte->functions,
+ context);
+ break;
+ case RTE_TABLEFUNC:
+ rte->tablefunc = (TableFunc *)
+ pullup_replace_vars((Node *) rte->tablefunc,
+ context);
+ break;
+ case RTE_VALUES:
+ rte->values_lists = (List *)
+ pullup_replace_vars((Node *) rte->values_lists,
+ context);
+ break;
+ case RTE_JOIN:
+ case RTE_CTE:
+ case RTE_NAMEDTUPLESTORE:
+ case RTE_RESULT:
+ /* these shouldn't be marked LATERAL */
+ Assert(false);
+ break;
+ }
+ }
+ }
+ }
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ ListCell *l;
+
+ foreach(l, f->fromlist)
+ replace_vars_in_jointree(lfirst(l), context,
+ lowest_nulling_outer_join);
+ f->quals = pullup_replace_vars(f->quals, context);
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+ bool save_need_phvs = context->need_phvs;
+
+ if (j == lowest_nulling_outer_join)
+ {
+ /* no more PHVs in or below this join */
+ context->need_phvs = false;
+ lowest_nulling_outer_join = NULL;
+ }
+ replace_vars_in_jointree(j->larg, context, lowest_nulling_outer_join);
+ replace_vars_in_jointree(j->rarg, context, lowest_nulling_outer_join);
+
+ /*
+ * Use PHVs within the join quals of a full join, even when it's the
+ * lowest nulling outer join. Otherwise, we cannot identify which
+ * side of the join a pulled-up var-free expression came from, which
+ * can lead to failure to make a plan at all because none of the quals
+ * appear to be mergeable or hashable conditions. For this purpose we
+ * don't care about the state of wrap_non_vars, so leave it alone.
+ */
+ if (j->jointype == JOIN_FULL)
+ context->need_phvs = true;
+
+ j->quals = pullup_replace_vars(j->quals, context);
+
+ /*
+ * We don't bother to update the colvars list, since it won't be used
+ * again ...
+ */
+ context->need_phvs = save_need_phvs;
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+}
+
+/*
+ * Apply pullup variable replacement throughout an expression tree
+ *
+ * Returns a modified copy of the tree, so this can't be used where we
+ * need to do in-place replacement.
+ */
+static Node *
+pullup_replace_vars(Node *expr, pullup_replace_vars_context *context)
+{
+ return replace_rte_variables(expr,
+ context->varno, 0,
+ pullup_replace_vars_callback,
+ (void *) context,
+ context->outer_hasSubLinks);
+}
+
+static Node *
+pullup_replace_vars_callback(Var *var,
+ replace_rte_variables_context *context)
+{
+ pullup_replace_vars_context *rcon = (pullup_replace_vars_context *) context->callback_arg;
+ int varattno = var->varattno;
+ Node *newnode;
+
+ /*
+ * If PlaceHolderVars are needed, we cache the modified expressions in
+ * rcon->rv_cache[]. This is not in hopes of any material speed gain
+ * within this function, but to avoid generating identical PHVs with
+ * different IDs. That would result in duplicate evaluations at runtime,
+ * and possibly prevent optimizations that rely on recognizing different
+ * references to the same subquery output as being equal(). So it's worth
+ * a bit of extra effort to avoid it.
+ */
+ if (rcon->need_phvs &&
+ varattno >= InvalidAttrNumber &&
+ varattno <= list_length(rcon->targetlist) &&
+ rcon->rv_cache[varattno] != NULL)
+ {
+ /* Just copy the entry and fall through to adjust its varlevelsup */
+ newnode = copyObject(rcon->rv_cache[varattno]);
+ }
+ else if (varattno == InvalidAttrNumber)
+ {
+ /* Must expand whole-tuple reference into RowExpr */
+ RowExpr *rowexpr;
+ List *colnames;
+ List *fields;
+ bool save_need_phvs = rcon->need_phvs;
+ int save_sublevelsup = context->sublevels_up;
+
+ /*
+ * If generating an expansion for a var of a named rowtype (ie, this
+ * is a plain relation RTE), then we must include dummy items for
+ * dropped columns. If the var is RECORD (ie, this is a JOIN), then
+ * omit dropped columns. Either way, attach column names to the
+ * RowExpr for use of ruleutils.c.
+ *
+ * In order to be able to cache the results, we always generate the
+ * expansion with varlevelsup = 0, and then adjust if needed.
+ */
+ expandRTE(rcon->target_rte,
+ var->varno, 0 /* not varlevelsup */ , var->location,
+ (var->vartype != RECORDOID),
+ &colnames, &fields);
+ /* Adjust the generated per-field Vars, but don't insert PHVs */
+ rcon->need_phvs = false;
+ context->sublevels_up = 0; /* to match the expandRTE output */
+ fields = (List *) replace_rte_variables_mutator((Node *) fields,
+ context);
+ rcon->need_phvs = save_need_phvs;
+ context->sublevels_up = save_sublevelsup;
+
+ rowexpr = makeNode(RowExpr);
+ rowexpr->args = fields;
+ rowexpr->row_typeid = var->vartype;
+ rowexpr->row_format = COERCE_IMPLICIT_CAST;
+ rowexpr->colnames = colnames;
+ rowexpr->location = var->location;
+ newnode = (Node *) rowexpr;
+
+ /*
+ * Insert PlaceHolderVar if needed. Notice that we are wrapping one
+ * PlaceHolderVar around the whole RowExpr, rather than putting one
+ * around each element of the row. This is because we need the
+ * expression to yield NULL, not ROW(NULL,NULL,...) when it is forced
+ * to null by an outer join.
+ */
+ if (rcon->need_phvs)
+ {
+ /* RowExpr is certainly not strict, so always need PHV */
+ newnode = (Node *)
+ make_placeholder_expr(rcon->root,
+ (Expr *) newnode,
+ bms_make_singleton(rcon->varno));
+ /* cache it with the PHV, and with varlevelsup still zero */
+ rcon->rv_cache[InvalidAttrNumber] = copyObject(newnode);
+ }
+ }
+ else
+ {
+ /* Normal case referencing one targetlist element */
+ TargetEntry *tle = get_tle_by_resno(rcon->targetlist, varattno);
+
+ if (tle == NULL) /* shouldn't happen */
+ elog(ERROR, "could not find attribute %d in subquery targetlist",
+ varattno);
+
+ /* Make a copy of the tlist item to return */
+ newnode = (Node *) copyObject(tle->expr);
+
+ /* Insert PlaceHolderVar if needed */
+ if (rcon->need_phvs)
+ {
+ bool wrap;
+
+ if (newnode && IsA(newnode, Var) &&
+ ((Var *) newnode)->varlevelsup == 0)
+ {
+ /*
+ * Simple Vars always escape being wrapped, unless they are
+ * lateral references to something outside the subquery being
+ * pulled up. (Even then, we could omit the PlaceHolderVar if
+ * the referenced rel is under the same lowest outer join, but
+ * it doesn't seem worth the trouble to check that.)
+ */
+ if (rcon->target_rte->lateral &&
+ !bms_is_member(((Var *) newnode)->varno, rcon->relids))
+ wrap = true;
+ else
+ wrap = false;
+ }
+ else if (newnode && IsA(newnode, PlaceHolderVar) &&
+ ((PlaceHolderVar *) newnode)->phlevelsup == 0)
+ {
+ /* No need to wrap a PlaceHolderVar with another one, either */
+ wrap = false;
+ }
+ else if (rcon->wrap_non_vars)
+ {
+ /* Wrap all non-Vars in a PlaceHolderVar */
+ wrap = true;
+ }
+ else
+ {
+ /*
+ * If it contains a Var of the subquery being pulled up, and
+ * does not contain any non-strict constructs, then it's
+ * certainly nullable so we don't need to insert a
+ * PlaceHolderVar.
+ *
+ * This analysis could be tighter: in particular, a non-strict
+ * construct hidden within a lower-level PlaceHolderVar is not
+ * reason to add another PHV. But for now it doesn't seem
+ * worth the code to be more exact.
+ *
+ * Note: in future maybe we should insert a PlaceHolderVar
+ * anyway, if the tlist item is expensive to evaluate?
+ *
+ * For a LATERAL subquery, we have to check the actual var
+ * membership of the node, but if it's non-lateral then any
+ * level-zero var must belong to the subquery.
+ */
+ if ((rcon->target_rte->lateral ?
+ bms_overlap(pull_varnos(rcon->root, (Node *) newnode),
+ rcon->relids) :
+ contain_vars_of_level((Node *) newnode, 0)) &&
+ !contain_nonstrict_functions((Node *) newnode))
+ {
+ /* No wrap needed */
+ wrap = false;
+ }
+ else
+ {
+ /* Else wrap it in a PlaceHolderVar */
+ wrap = true;
+ }
+ }
+
+ if (wrap)
+ newnode = (Node *)
+ make_placeholder_expr(rcon->root,
+ (Expr *) newnode,
+ bms_make_singleton(rcon->varno));
+
+ /*
+ * Cache it if possible (ie, if the attno is in range, which it
+ * probably always should be). We can cache the value even if we
+ * decided we didn't need a PHV, since this result will be
+ * suitable for any request that has need_phvs.
+ */
+ if (varattno > InvalidAttrNumber &&
+ varattno <= list_length(rcon->targetlist))
+ rcon->rv_cache[varattno] = copyObject(newnode);
+ }
+ }
+
+ /* Must adjust varlevelsup if tlist item is from higher query */
+ if (var->varlevelsup > 0)
+ IncrementVarSublevelsUp(newnode, var->varlevelsup, 0);
+
+ return newnode;
+}
+
+/*
+ * Apply pullup variable replacement to a subquery
+ *
+ * This needs to be different from pullup_replace_vars() because
+ * replace_rte_variables will think that it shouldn't increment sublevels_up
+ * before entering the Query; so we need to call it with sublevels_up == 1.
+ */
+static Query *
+pullup_replace_vars_subquery(Query *query,
+ pullup_replace_vars_context *context)
+{
+ Assert(IsA(query, Query));
+ return (Query *) replace_rte_variables((Node *) query,
+ context->varno, 1,
+ pullup_replace_vars_callback,
+ (void *) context,
+ NULL);
+}
+
+
+/*
+ * flatten_simple_union_all
+ * Try to optimize top-level UNION ALL structure into an appendrel
+ *
+ * If a query's setOperations tree consists entirely of simple UNION ALL
+ * operations, flatten it into an append relation, which we can process more
+ * intelligently than the general setops case. Otherwise, do nothing.
+ *
+ * In most cases, this can succeed only for a top-level query, because for a
+ * subquery in FROM, the parent query's invocation of pull_up_subqueries would
+ * already have flattened the UNION via pull_up_simple_union_all. But there
+ * are a few cases we can support here but not in that code path, for example
+ * when the subquery also contains ORDER BY.
+ */
+void
+flatten_simple_union_all(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ SetOperationStmt *topop;
+ Node *leftmostjtnode;
+ int leftmostRTI;
+ RangeTblEntry *leftmostRTE;
+ int childRTI;
+ RangeTblEntry *childRTE;
+ RangeTblRef *rtr;
+
+ /* Shouldn't be called unless query has setops */
+ topop = castNode(SetOperationStmt, parse->setOperations);
+ Assert(topop);
+
+ /* Can't optimize away a recursive UNION */
+ if (root->hasRecursion)
+ return;
+
+ /*
+ * Recursively check the tree of set operations. If not all UNION ALL
+ * with identical column types, punt.
+ */
+ if (!is_simple_union_all_recurse((Node *) topop, parse, topop->colTypes))
+ return;
+
+ /*
+ * Locate the leftmost leaf query in the setops tree. The upper query's
+ * Vars all refer to this RTE (see transformSetOperationStmt).
+ */
+ leftmostjtnode = topop->larg;
+ while (leftmostjtnode && IsA(leftmostjtnode, SetOperationStmt))
+ leftmostjtnode = ((SetOperationStmt *) leftmostjtnode)->larg;
+ Assert(leftmostjtnode && IsA(leftmostjtnode, RangeTblRef));
+ leftmostRTI = ((RangeTblRef *) leftmostjtnode)->rtindex;
+ leftmostRTE = rt_fetch(leftmostRTI, parse->rtable);
+ Assert(leftmostRTE->rtekind == RTE_SUBQUERY);
+
+ /*
+ * Make a copy of the leftmost RTE and add it to the rtable. This copy
+ * will represent the leftmost leaf query in its capacity as a member of
+ * the appendrel. The original will represent the appendrel as a whole.
+ * (We must do things this way because the upper query's Vars have to be
+ * seen as referring to the whole appendrel.)
+ */
+ childRTE = copyObject(leftmostRTE);
+ parse->rtable = lappend(parse->rtable, childRTE);
+ childRTI = list_length(parse->rtable);
+
+ /* Modify the setops tree to reference the child copy */
+ ((RangeTblRef *) leftmostjtnode)->rtindex = childRTI;
+
+ /* Modify the formerly-leftmost RTE to mark it as an appendrel parent */
+ leftmostRTE->inh = true;
+
+ /*
+ * Form a RangeTblRef for the appendrel, and insert it into FROM. The top
+ * Query of a setops tree should have had an empty FromClause initially.
+ */
+ rtr = makeNode(RangeTblRef);
+ rtr->rtindex = leftmostRTI;
+ Assert(parse->jointree->fromlist == NIL);
+ parse->jointree->fromlist = list_make1(rtr);
+
+ /*
+ * Now pretend the query has no setops. We must do this before trying to
+ * do subquery pullup, because of Assert in pull_up_simple_subquery.
+ */
+ parse->setOperations = NULL;
+
+ /*
+ * Build AppendRelInfo information, and apply pull_up_subqueries to the
+ * leaf queries of the UNION ALL. (We must do that now because they
+ * weren't previously referenced by the jointree, and so were missed by
+ * the main invocation of pull_up_subqueries.)
+ */
+ pull_up_union_leaf_queries((Node *) topop, root, leftmostRTI, parse, 0);
+}
+
+
+/*
+ * reduce_outer_joins
+ * Attempt to reduce outer joins to plain inner joins.
+ *
+ * The idea here is that given a query like
+ * SELECT ... FROM a LEFT JOIN b ON (...) WHERE b.y = 42;
+ * we can reduce the LEFT JOIN to a plain JOIN if the "=" operator in WHERE
+ * is strict. The strict operator will always return NULL, causing the outer
+ * WHERE to fail, on any row where the LEFT JOIN filled in NULLs for b's
+ * columns. Therefore, there's no need for the join to produce null-extended
+ * rows in the first place --- which makes it a plain join not an outer join.
+ * (This scenario may not be very likely in a query written out by hand, but
+ * it's reasonably likely when pushing quals down into complex views.)
+ *
+ * More generally, an outer join can be reduced in strength if there is a
+ * strict qual above it in the qual tree that constrains a Var from the
+ * nullable side of the join to be non-null. (For FULL joins this applies
+ * to each side separately.)
+ *
+ * Another transformation we apply here is to recognize cases like
+ * SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.y IS NULL;
+ * If the join clause is strict for b.y, then only null-extended rows could
+ * pass the upper WHERE, and we can conclude that what the query is really
+ * specifying is an anti-semijoin. We change the join type from JOIN_LEFT
+ * to JOIN_ANTI. The IS NULL clause then becomes redundant, and must be
+ * removed to prevent bogus selectivity calculations, but we leave it to
+ * distribute_qual_to_rels to get rid of such clauses.
+ *
+ * Also, we get rid of JOIN_RIGHT cases by flipping them around to become
+ * JOIN_LEFT. This saves some code here and in some later planner routines,
+ * but the main reason to do it is to not need to invent a JOIN_REVERSE_ANTI
+ * join type.
+ *
+ * To ease recognition of strict qual clauses, we require this routine to be
+ * run after expression preprocessing (i.e., qual canonicalization and JOIN
+ * alias-var expansion).
+ */
+void
+reduce_outer_joins(PlannerInfo *root)
+{
+ reduce_outer_joins_state *state;
+
+ /*
+ * To avoid doing strictness checks on more quals than necessary, we want
+ * to stop descending the jointree as soon as there are no outer joins
+ * below our current point. This consideration forces a two-pass process.
+ * The first pass gathers information about which base rels appear below
+ * each side of each join clause, and about whether there are outer
+ * join(s) below each side of each join clause. The second pass examines
+ * qual clauses and changes join types as it descends the tree.
+ */
+ state = reduce_outer_joins_pass1((Node *) root->parse->jointree);
+
+ /* planner.c shouldn't have called me if no outer joins */
+ if (state == NULL || !state->contains_outer)
+ elog(ERROR, "so where are the outer joins?");
+
+ reduce_outer_joins_pass2((Node *) root->parse->jointree,
+ state, root, NULL, NIL, NIL);
+}
+
+/*
+ * reduce_outer_joins_pass1 - phase 1 data collection
+ *
+ * Returns a state node describing the given jointree node.
+ */
+static reduce_outer_joins_state *
+reduce_outer_joins_pass1(Node *jtnode)
+{
+ reduce_outer_joins_state *result;
+
+ result = (reduce_outer_joins_state *)
+ palloc(sizeof(reduce_outer_joins_state));
+ result->relids = NULL;
+ result->contains_outer = false;
+ result->sub_states = NIL;
+
+ if (jtnode == NULL)
+ return result;
+ if (IsA(jtnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+
+ result->relids = bms_make_singleton(varno);
+ }
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ ListCell *l;
+
+ foreach(l, f->fromlist)
+ {
+ reduce_outer_joins_state *sub_state;
+
+ sub_state = reduce_outer_joins_pass1(lfirst(l));
+ result->relids = bms_add_members(result->relids,
+ sub_state->relids);
+ result->contains_outer |= sub_state->contains_outer;
+ result->sub_states = lappend(result->sub_states, sub_state);
+ }
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+ reduce_outer_joins_state *sub_state;
+
+ /* join's own RT index is not wanted in result->relids */
+ if (IS_OUTER_JOIN(j->jointype))
+ result->contains_outer = true;
+
+ sub_state = reduce_outer_joins_pass1(j->larg);
+ result->relids = bms_add_members(result->relids,
+ sub_state->relids);
+ result->contains_outer |= sub_state->contains_outer;
+ result->sub_states = lappend(result->sub_states, sub_state);
+
+ sub_state = reduce_outer_joins_pass1(j->rarg);
+ result->relids = bms_add_members(result->relids,
+ sub_state->relids);
+ result->contains_outer |= sub_state->contains_outer;
+ result->sub_states = lappend(result->sub_states, sub_state);
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+ return result;
+}
+
+/*
+ * reduce_outer_joins_pass2 - phase 2 processing
+ *
+ * jtnode: current jointree node
+ * state: state data collected by phase 1 for this node
+ * root: toplevel planner state
+ * nonnullable_rels: set of base relids forced non-null by upper quals
+ * nonnullable_vars: list of Vars forced non-null by upper quals
+ * forced_null_vars: list of Vars forced null by upper quals
+ */
+static void
+reduce_outer_joins_pass2(Node *jtnode,
+ reduce_outer_joins_state *state,
+ PlannerInfo *root,
+ Relids nonnullable_rels,
+ List *nonnullable_vars,
+ List *forced_null_vars)
+{
+ /*
+ * pass 2 should never descend as far as an empty subnode or base rel,
+ * because it's only called on subtrees marked as contains_outer.
+ */
+ if (jtnode == NULL)
+ elog(ERROR, "reached empty jointree");
+ if (IsA(jtnode, RangeTblRef))
+ elog(ERROR, "reached base rel");
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ ListCell *l;
+ ListCell *s;
+ Relids pass_nonnullable_rels;
+ List *pass_nonnullable_vars;
+ List *pass_forced_null_vars;
+
+ /* Scan quals to see if we can add any constraints */
+ pass_nonnullable_rels = find_nonnullable_rels(f->quals);
+ pass_nonnullable_rels = bms_add_members(pass_nonnullable_rels,
+ nonnullable_rels);
+ pass_nonnullable_vars = find_nonnullable_vars(f->quals);
+ pass_nonnullable_vars = list_concat(pass_nonnullable_vars,
+ nonnullable_vars);
+ pass_forced_null_vars = find_forced_null_vars(f->quals);
+ pass_forced_null_vars = list_concat(pass_forced_null_vars,
+ forced_null_vars);
+ /* And recurse --- but only into interesting subtrees */
+ Assert(list_length(f->fromlist) == list_length(state->sub_states));
+ forboth(l, f->fromlist, s, state->sub_states)
+ {
+ reduce_outer_joins_state *sub_state = lfirst(s);
+
+ if (sub_state->contains_outer)
+ reduce_outer_joins_pass2(lfirst(l), sub_state, root,
+ pass_nonnullable_rels,
+ pass_nonnullable_vars,
+ pass_forced_null_vars);
+ }
+ bms_free(pass_nonnullable_rels);
+ /* can't so easily clean up var lists, unfortunately */
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+ int rtindex = j->rtindex;
+ JoinType jointype = j->jointype;
+ reduce_outer_joins_state *left_state = linitial(state->sub_states);
+ reduce_outer_joins_state *right_state = lsecond(state->sub_states);
+ List *local_nonnullable_vars = NIL;
+ bool computed_local_nonnullable_vars = false;
+
+ /* Can we simplify this join? */
+ switch (jointype)
+ {
+ case JOIN_INNER:
+ break;
+ case JOIN_LEFT:
+ if (bms_overlap(nonnullable_rels, right_state->relids))
+ jointype = JOIN_INNER;
+ break;
+ case JOIN_RIGHT:
+ if (bms_overlap(nonnullable_rels, left_state->relids))
+ jointype = JOIN_INNER;
+ break;
+ case JOIN_FULL:
+ if (bms_overlap(nonnullable_rels, left_state->relids))
+ {
+ if (bms_overlap(nonnullable_rels, right_state->relids))
+ jointype = JOIN_INNER;
+ else
+ jointype = JOIN_LEFT;
+ }
+ else
+ {
+ if (bms_overlap(nonnullable_rels, right_state->relids))
+ jointype = JOIN_RIGHT;
+ }
+ break;
+ case JOIN_SEMI:
+ case JOIN_ANTI:
+
+ /*
+ * These could only have been introduced by pull_up_sublinks,
+ * so there's no way that upper quals could refer to their
+ * righthand sides, and no point in checking.
+ */
+ break;
+ default:
+ elog(ERROR, "unrecognized join type: %d",
+ (int) jointype);
+ break;
+ }
+
+ /*
+ * Convert JOIN_RIGHT to JOIN_LEFT. Note that in the case where we
+ * reduced JOIN_FULL to JOIN_RIGHT, this will mean the JoinExpr no
+ * longer matches the internal ordering of any CoalesceExpr's built to
+ * represent merged join variables. We don't care about that at
+ * present, but be wary of it ...
+ */
+ if (jointype == JOIN_RIGHT)
+ {
+ Node *tmparg;
+
+ tmparg = j->larg;
+ j->larg = j->rarg;
+ j->rarg = tmparg;
+ jointype = JOIN_LEFT;
+ right_state = linitial(state->sub_states);
+ left_state = lsecond(state->sub_states);
+ }
+
+ /*
+ * See if we can reduce JOIN_LEFT to JOIN_ANTI. This is the case if
+ * the join's own quals are strict for any var that was forced null by
+ * higher qual levels. NOTE: there are other ways that we could
+ * detect an anti-join, in particular if we were to check whether Vars
+ * coming from the RHS must be non-null because of table constraints.
+ * That seems complicated and expensive though (in particular, one
+ * would have to be wary of lower outer joins). For the moment this
+ * seems sufficient.
+ */
+ if (jointype == JOIN_LEFT)
+ {
+ List *overlap;
+
+ local_nonnullable_vars = find_nonnullable_vars(j->quals);
+ computed_local_nonnullable_vars = true;
+
+ /*
+ * It's not sufficient to check whether local_nonnullable_vars and
+ * forced_null_vars overlap: we need to know if the overlap
+ * includes any RHS variables.
+ */
+ overlap = list_intersection(local_nonnullable_vars,
+ forced_null_vars);
+ if (overlap != NIL &&
+ bms_overlap(pull_varnos(root, (Node *) overlap),
+ right_state->relids))
+ jointype = JOIN_ANTI;
+ }
+
+ /* Apply the jointype change, if any, to both jointree node and RTE */
+ if (rtindex && jointype != j->jointype)
+ {
+ RangeTblEntry *rte = rt_fetch(rtindex, root->parse->rtable);
+
+ Assert(rte->rtekind == RTE_JOIN);
+ Assert(rte->jointype == j->jointype);
+ rte->jointype = jointype;
+ }
+ j->jointype = jointype;
+
+ /* Only recurse if there's more to do below here */
+ if (left_state->contains_outer || right_state->contains_outer)
+ {
+ Relids local_nonnullable_rels;
+ List *local_forced_null_vars;
+ Relids pass_nonnullable_rels;
+ List *pass_nonnullable_vars;
+ List *pass_forced_null_vars;
+
+ /*
+ * If this join is (now) inner, we can add any constraints its
+ * quals provide to those we got from above. But if it is outer,
+ * we can pass down the local constraints only into the nullable
+ * side, because an outer join never eliminates any rows from its
+ * non-nullable side. Also, there is no point in passing upper
+ * constraints into the nullable side, since if there were any
+ * we'd have been able to reduce the join. (In the case of upper
+ * forced-null constraints, we *must not* pass them into the
+ * nullable side --- they either applied here, or not.) The upshot
+ * is that we pass either the local or the upper constraints,
+ * never both, to the children of an outer join.
+ *
+ * Note that a SEMI join works like an inner join here: it's okay
+ * to pass down both local and upper constraints. (There can't be
+ * any upper constraints affecting its inner side, but it's not
+ * worth having a separate code path to avoid passing them.)
+ *
+ * At a FULL join we just punt and pass nothing down --- is it
+ * possible to be smarter?
+ */
+ if (jointype != JOIN_FULL)
+ {
+ local_nonnullable_rels = find_nonnullable_rels(j->quals);
+ if (!computed_local_nonnullable_vars)
+ local_nonnullable_vars = find_nonnullable_vars(j->quals);
+ local_forced_null_vars = find_forced_null_vars(j->quals);
+ if (jointype == JOIN_INNER || jointype == JOIN_SEMI)
+ {
+ /* OK to merge upper and local constraints */
+ local_nonnullable_rels = bms_add_members(local_nonnullable_rels,
+ nonnullable_rels);
+ local_nonnullable_vars = list_concat(local_nonnullable_vars,
+ nonnullable_vars);
+ local_forced_null_vars = list_concat(local_forced_null_vars,
+ forced_null_vars);
+ }
+ }
+ else
+ {
+ /* no use in calculating these */
+ local_nonnullable_rels = NULL;
+ local_forced_null_vars = NIL;
+ }
+
+ if (left_state->contains_outer)
+ {
+ if (jointype == JOIN_INNER || jointype == JOIN_SEMI)
+ {
+ /* pass union of local and upper constraints */
+ pass_nonnullable_rels = local_nonnullable_rels;
+ pass_nonnullable_vars = local_nonnullable_vars;
+ pass_forced_null_vars = local_forced_null_vars;
+ }
+ else if (jointype != JOIN_FULL) /* ie, LEFT or ANTI */
+ {
+ /* can't pass local constraints to non-nullable side */
+ pass_nonnullable_rels = nonnullable_rels;
+ pass_nonnullable_vars = nonnullable_vars;
+ pass_forced_null_vars = forced_null_vars;
+ }
+ else
+ {
+ /* no constraints pass through JOIN_FULL */
+ pass_nonnullable_rels = NULL;
+ pass_nonnullable_vars = NIL;
+ pass_forced_null_vars = NIL;
+ }
+ reduce_outer_joins_pass2(j->larg, left_state, root,
+ pass_nonnullable_rels,
+ pass_nonnullable_vars,
+ pass_forced_null_vars);
+ }
+
+ if (right_state->contains_outer)
+ {
+ if (jointype != JOIN_FULL) /* ie, INNER/LEFT/SEMI/ANTI */
+ {
+ /* pass appropriate constraints, per comment above */
+ pass_nonnullable_rels = local_nonnullable_rels;
+ pass_nonnullable_vars = local_nonnullable_vars;
+ pass_forced_null_vars = local_forced_null_vars;
+ }
+ else
+ {
+ /* no constraints pass through JOIN_FULL */
+ pass_nonnullable_rels = NULL;
+ pass_nonnullable_vars = NIL;
+ pass_forced_null_vars = NIL;
+ }
+ reduce_outer_joins_pass2(j->rarg, right_state, root,
+ pass_nonnullable_rels,
+ pass_nonnullable_vars,
+ pass_forced_null_vars);
+ }
+ bms_free(local_nonnullable_rels);
+ }
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+}
+
+
+/*
+ * remove_useless_result_rtes
+ * Attempt to remove RTE_RESULT RTEs from the join tree.
+ *
+ * We can remove RTE_RESULT entries from the join tree using the knowledge
+ * that RTE_RESULT returns exactly one row and has no output columns. Hence,
+ * if one is inner-joined to anything else, we can delete it. Optimizations
+ * are also possible for some outer-join cases, as detailed below.
+ *
+ * Some of these optimizations depend on recognizing empty (constant-true)
+ * quals for FromExprs and JoinExprs. That makes it useful to apply this
+ * optimization pass after expression preprocessing, since that will have
+ * eliminated constant-true quals, allowing more cases to be recognized as
+ * optimizable. What's more, the usual reason for an RTE_RESULT to be present
+ * is that we pulled up a subquery or VALUES clause, thus very possibly
+ * replacing Vars with constants, making it more likely that a qual can be
+ * reduced to constant true. Also, because some optimizations depend on
+ * the outer-join type, it's best to have done reduce_outer_joins() first.
+ *
+ * A PlaceHolderVar referencing an RTE_RESULT RTE poses an obstacle to this
+ * process: we must remove the RTE_RESULT's relid from the PHV's phrels, but
+ * we must not reduce the phrels set to empty. If that would happen, and
+ * the RTE_RESULT is an immediate child of an outer join, we have to give up
+ * and not remove the RTE_RESULT: there is noplace else to evaluate the
+ * PlaceHolderVar. (That is, in such cases the RTE_RESULT *does* have output
+ * columns.) But if the RTE_RESULT is an immediate child of an inner join,
+ * we can usually change the PlaceHolderVar's phrels so as to evaluate it at
+ * the inner join instead. This is OK because we really only care that PHVs
+ * are evaluated above or below the correct outer joins. We can't, however,
+ * postpone the evaluation of a PHV to above where it is used; so there are
+ * some checks below on whether output PHVs are laterally referenced in the
+ * other join input rel(s).
+ *
+ * We used to try to do this work as part of pull_up_subqueries() where the
+ * potentially-optimizable cases get introduced; but it's way simpler, and
+ * more effective, to do it separately.
+ */
+void
+remove_useless_result_rtes(PlannerInfo *root)
+{
+ ListCell *cell;
+
+ /* Top level of jointree must always be a FromExpr */
+ Assert(IsA(root->parse->jointree, FromExpr));
+ /* Recurse ... */
+ root->parse->jointree = (FromExpr *)
+ remove_useless_results_recurse(root, (Node *) root->parse->jointree);
+ /* We should still have a FromExpr */
+ Assert(IsA(root->parse->jointree, FromExpr));
+
+ /*
+ * Remove any PlanRowMark referencing an RTE_RESULT RTE. We obviously
+ * must do that for any RTE_RESULT that we just removed. But one for a
+ * RTE that we did not remove can be dropped anyway: since the RTE has
+ * only one possible output row, there is no need for EPQ to mark and
+ * restore that row.
+ *
+ * It's necessary, not optional, to remove the PlanRowMark for a surviving
+ * RTE_RESULT RTE; otherwise we'll generate a whole-row Var for the
+ * RTE_RESULT, which the executor has no support for.
+ */
+ foreach(cell, root->rowMarks)
+ {
+ PlanRowMark *rc = (PlanRowMark *) lfirst(cell);
+
+ if (rt_fetch(rc->rti, root->parse->rtable)->rtekind == RTE_RESULT)
+ root->rowMarks = foreach_delete_current(root->rowMarks, cell);
+ }
+}
+
+/*
+ * remove_useless_results_recurse
+ * Recursive guts of remove_useless_result_rtes.
+ *
+ * This recursively processes the jointree and returns a modified jointree.
+ */
+static Node *
+remove_useless_results_recurse(PlannerInfo *root, Node *jtnode)
+{
+ Assert(jtnode != NULL);
+ if (IsA(jtnode, RangeTblRef))
+ {
+ /* Can't immediately do anything with a RangeTblRef */
+ }
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ Relids result_relids = NULL;
+ ListCell *cell;
+
+ /*
+ * We can drop RTE_RESULT rels from the fromlist so long as at least
+ * one child remains, since joining to a one-row table changes
+ * nothing. (But we can't drop a RTE_RESULT that computes PHV(s) that
+ * are needed by some sibling. The cleanup transformation below would
+ * reassign the PHVs to be computed at the join, which is too late for
+ * the sibling's use.) The easiest way to mechanize this rule is to
+ * modify the list in-place.
+ */
+ foreach(cell, f->fromlist)
+ {
+ Node *child = (Node *) lfirst(cell);
+ int varno;
+
+ /* Recursively transform child ... */
+ child = remove_useless_results_recurse(root, child);
+ /* ... and stick it back into the tree */
+ lfirst(cell) = child;
+
+ /*
+ * If it's an RTE_RESULT with at least one sibling, and no sibling
+ * references dependent PHVs, we can drop it. We don't yet know
+ * what the inner join's final relid set will be, so postpone
+ * cleanup of PHVs etc till after this loop.
+ */
+ if (list_length(f->fromlist) > 1 &&
+ (varno = get_result_relid(root, child)) != 0 &&
+ !find_dependent_phvs_in_jointree(root, (Node *) f, varno))
+ {
+ f->fromlist = foreach_delete_current(f->fromlist, cell);
+ result_relids = bms_add_member(result_relids, varno);
+ }
+ }
+
+ /*
+ * Clean up if we dropped any RTE_RESULT RTEs. This is a bit
+ * inefficient if there's more than one, but it seems better to
+ * optimize the support code for the single-relid case.
+ */
+ if (result_relids)
+ {
+ int varno = -1;
+
+ while ((varno = bms_next_member(result_relids, varno)) >= 0)
+ remove_result_refs(root, varno, (Node *) f);
+ }
+
+ /*
+ * If we're not at the top of the jointree, it's valid to simplify a
+ * degenerate FromExpr into its single child. (At the top, we must
+ * keep the FromExpr since Query.jointree is required to point to a
+ * FromExpr.)
+ */
+ if (f != root->parse->jointree &&
+ f->quals == NULL &&
+ list_length(f->fromlist) == 1)
+ return (Node *) linitial(f->fromlist);
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+ int varno;
+
+ /* First, recurse */
+ j->larg = remove_useless_results_recurse(root, j->larg);
+ j->rarg = remove_useless_results_recurse(root, j->rarg);
+
+ /* Apply join-type-specific optimization rules */
+ switch (j->jointype)
+ {
+ case JOIN_INNER:
+
+ /*
+ * An inner join is equivalent to a FromExpr, so if either
+ * side was simplified to an RTE_RESULT rel, we can replace
+ * the join with a FromExpr with just the other side; and if
+ * the qual is empty (JOIN ON TRUE) then we can omit the
+ * FromExpr as well.
+ *
+ * Just as in the FromExpr case, we can't simplify if the
+ * other input rel references any PHVs that are marked as to
+ * be evaluated at the RTE_RESULT rel, because we can't
+ * postpone their evaluation in that case. But we only have
+ * to check this in cases where it's syntactically legal for
+ * the other input to have a LATERAL reference to the
+ * RTE_RESULT rel. Only RHSes of inner and left joins are
+ * allowed to have such refs.
+ */
+ if ((varno = get_result_relid(root, j->larg)) != 0 &&
+ !find_dependent_phvs_in_jointree(root, j->rarg, varno))
+ {
+ remove_result_refs(root, varno, j->rarg);
+ if (j->quals)
+ jtnode = (Node *)
+ makeFromExpr(list_make1(j->rarg), j->quals);
+ else
+ jtnode = j->rarg;
+ }
+ else if ((varno = get_result_relid(root, j->rarg)) != 0)
+ {
+ remove_result_refs(root, varno, j->larg);
+ if (j->quals)
+ jtnode = (Node *)
+ makeFromExpr(list_make1(j->larg), j->quals);
+ else
+ jtnode = j->larg;
+ }
+ break;
+ case JOIN_LEFT:
+
+ /*
+ * We can simplify this case if the RHS is an RTE_RESULT, with
+ * two different possibilities:
+ *
+ * If the qual is empty (JOIN ON TRUE), then the join can be
+ * strength-reduced to a plain inner join, since each LHS row
+ * necessarily has exactly one join partner. So we can always
+ * discard the RHS, much as in the JOIN_INNER case above.
+ * (Again, the LHS could not contain a lateral reference to
+ * the RHS.)
+ *
+ * Otherwise, it's still true that each LHS row should be
+ * returned exactly once, and since the RHS returns no columns
+ * (unless there are PHVs that have to be evaluated there), we
+ * don't much care if it's null-extended or not. So in this
+ * case also, we can just ignore the qual and discard the left
+ * join.
+ */
+ if ((varno = get_result_relid(root, j->rarg)) != 0 &&
+ (j->quals == NULL ||
+ !find_dependent_phvs(root, varno)))
+ {
+ remove_result_refs(root, varno, j->larg);
+ jtnode = j->larg;
+ }
+ break;
+ case JOIN_RIGHT:
+ /* Mirror-image of the JOIN_LEFT case */
+ if ((varno = get_result_relid(root, j->larg)) != 0 &&
+ (j->quals == NULL ||
+ !find_dependent_phvs(root, varno)))
+ {
+ remove_result_refs(root, varno, j->rarg);
+ jtnode = j->rarg;
+ }
+ break;
+ case JOIN_SEMI:
+
+ /*
+ * We may simplify this case if the RHS is an RTE_RESULT; the
+ * join qual becomes effectively just a filter qual for the
+ * LHS, since we should either return the LHS row or not. For
+ * simplicity we inject the filter qual into a new FromExpr.
+ *
+ * Unlike the LEFT/RIGHT cases, we just Assert that there are
+ * no PHVs that need to be evaluated at the semijoin's RHS,
+ * since the rest of the query couldn't reference any outputs
+ * of the semijoin's RHS.
+ */
+ if ((varno = get_result_relid(root, j->rarg)) != 0)
+ {
+ Assert(!find_dependent_phvs(root, varno));
+ remove_result_refs(root, varno, j->larg);
+ if (j->quals)
+ jtnode = (Node *)
+ makeFromExpr(list_make1(j->larg), j->quals);
+ else
+ jtnode = j->larg;
+ }
+ break;
+ case JOIN_FULL:
+ case JOIN_ANTI:
+ /* We have no special smarts for these cases */
+ break;
+ default:
+ elog(ERROR, "unrecognized join type: %d",
+ (int) j->jointype);
+ break;
+ }
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+ return jtnode;
+}
+
+/*
+ * get_result_relid
+ * If jtnode is a RangeTblRef for an RTE_RESULT RTE, return its relid;
+ * otherwise return 0.
+ */
+static int
+get_result_relid(PlannerInfo *root, Node *jtnode)
+{
+ int varno;
+
+ if (!IsA(jtnode, RangeTblRef))
+ return 0;
+ varno = ((RangeTblRef *) jtnode)->rtindex;
+ if (rt_fetch(varno, root->parse->rtable)->rtekind != RTE_RESULT)
+ return 0;
+ return varno;
+}
+
+/*
+ * remove_result_refs
+ * Helper routine for dropping an unneeded RTE_RESULT RTE.
+ *
+ * This doesn't physically remove the RTE from the jointree, because that's
+ * more easily handled in remove_useless_results_recurse. What it does do
+ * is the necessary cleanup in the rest of the tree: we must adjust any PHVs
+ * that may reference the RTE. Be sure to call this at a point where the
+ * jointree is valid (no disconnected nodes).
+ *
+ * Note that we don't need to process the append_rel_list, since RTEs
+ * referenced directly in the jointree won't be appendrel members.
+ *
+ * varno is the RTE_RESULT's relid.
+ * newjtloc is the jointree location at which any PHVs referencing the
+ * RTE_RESULT should be evaluated instead.
+ */
+static void
+remove_result_refs(PlannerInfo *root, int varno, Node *newjtloc)
+{
+ /* Fix up PlaceHolderVars as needed */
+ /* If there are no PHVs anywhere, we can skip this bit */
+ if (root->glob->lastPHId != 0)
+ {
+ Relids subrelids;
+
+ subrelids = get_relids_in_jointree(newjtloc, false);
+ Assert(!bms_is_empty(subrelids));
+ substitute_phv_relids((Node *) root->parse, varno, subrelids);
+ fix_append_rel_relids(root->append_rel_list, varno, subrelids);
+ }
+
+ /*
+ * We also need to remove any PlanRowMark referencing the RTE, but we
+ * postpone that work until we return to remove_useless_result_rtes.
+ */
+}
+
+
+/*
+ * find_dependent_phvs - are there any PlaceHolderVars whose relids are
+ * exactly the given varno?
+ *
+ * find_dependent_phvs should be used when we want to see if there are
+ * any such PHVs anywhere in the Query. Another use-case is to see if
+ * a subtree of the join tree contains such PHVs; but for that, we have
+ * to look not only at the join tree nodes themselves but at the
+ * referenced RTEs. For that, use find_dependent_phvs_in_jointree.
+ */
+
+typedef struct
+{
+ Relids relids;
+ int sublevels_up;
+} find_dependent_phvs_context;
+
+static bool
+find_dependent_phvs_walker(Node *node,
+ find_dependent_phvs_context *context)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, PlaceHolderVar))
+ {
+ PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+ if (phv->phlevelsup == context->sublevels_up &&
+ bms_equal(context->relids, phv->phrels))
+ return true;
+ /* fall through to examine children */
+ }
+ if (IsA(node, Query))
+ {
+ /* Recurse into subselects */
+ bool result;
+
+ context->sublevels_up++;
+ result = query_tree_walker((Query *) node,
+ find_dependent_phvs_walker,
+ (void *) context, 0);
+ context->sublevels_up--;
+ return result;
+ }
+ /* Shouldn't need to handle planner auxiliary nodes here */
+ Assert(!IsA(node, SpecialJoinInfo));
+ Assert(!IsA(node, AppendRelInfo));
+ Assert(!IsA(node, PlaceHolderInfo));
+ Assert(!IsA(node, MinMaxAggInfo));
+
+ return expression_tree_walker(node, find_dependent_phvs_walker,
+ (void *) context);
+}
+
+static bool
+find_dependent_phvs(PlannerInfo *root, int varno)
+{
+ find_dependent_phvs_context context;
+
+ /* If there are no PHVs anywhere, we needn't work hard */
+ if (root->glob->lastPHId == 0)
+ return false;
+
+ context.relids = bms_make_singleton(varno);
+ context.sublevels_up = 0;
+
+ return query_tree_walker(root->parse,
+ find_dependent_phvs_walker,
+ (void *) &context,
+ 0);
+}
+
+static bool
+find_dependent_phvs_in_jointree(PlannerInfo *root, Node *node, int varno)
+{
+ find_dependent_phvs_context context;
+ Relids subrelids;
+ int relid;
+
+ /* If there are no PHVs anywhere, we needn't work hard */
+ if (root->glob->lastPHId == 0)
+ return false;
+
+ context.relids = bms_make_singleton(varno);
+ context.sublevels_up = 0;
+
+ /*
+ * See if the jointree fragment itself contains references (in join quals)
+ */
+ if (find_dependent_phvs_walker(node, &context))
+ return true;
+
+ /*
+ * Otherwise, identify the set of referenced RTEs (we can ignore joins,
+ * since they should be flattened already, so their join alias lists no
+ * longer matter), and tediously check each RTE. We can ignore RTEs that
+ * are not marked LATERAL, though, since they couldn't possibly contain
+ * any cross-references to other RTEs.
+ */
+ subrelids = get_relids_in_jointree(node, false);
+ relid = -1;
+ while ((relid = bms_next_member(subrelids, relid)) >= 0)
+ {
+ RangeTblEntry *rte = rt_fetch(relid, root->parse->rtable);
+
+ if (rte->lateral &&
+ range_table_entry_walker(rte,
+ find_dependent_phvs_walker,
+ (void *) &context,
+ 0))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * substitute_phv_relids - adjust PlaceHolderVar relid sets after pulling up
+ * a subquery or removing an RTE_RESULT jointree item
+ *
+ * Find any PlaceHolderVar nodes in the given tree that reference the
+ * pulled-up relid, and change them to reference the replacement relid(s).
+ *
+ * NOTE: although this has the form of a walker, we cheat and modify the
+ * nodes in-place. This should be OK since the tree was copied by
+ * pullup_replace_vars earlier. Avoid scribbling on the original values of
+ * the bitmapsets, though, because expression_tree_mutator doesn't copy those.
+ */
+
+typedef struct
+{
+ int varno;
+ int sublevels_up;
+ Relids subrelids;
+} substitute_phv_relids_context;
+
+static bool
+substitute_phv_relids_walker(Node *node,
+ substitute_phv_relids_context *context)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, PlaceHolderVar))
+ {
+ PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+ if (phv->phlevelsup == context->sublevels_up &&
+ bms_is_member(context->varno, phv->phrels))
+ {
+ phv->phrels = bms_union(phv->phrels,
+ context->subrelids);
+ phv->phrels = bms_del_member(phv->phrels,
+ context->varno);
+ /* Assert we haven't broken the PHV */
+ Assert(!bms_is_empty(phv->phrels));
+ }
+ /* fall through to examine children */
+ }
+ if (IsA(node, Query))
+ {
+ /* Recurse into subselects */
+ bool result;
+
+ context->sublevels_up++;
+ result = query_tree_walker((Query *) node,
+ substitute_phv_relids_walker,
+ (void *) context, 0);
+ context->sublevels_up--;
+ return result;
+ }
+ /* Shouldn't need to handle planner auxiliary nodes here */
+ Assert(!IsA(node, SpecialJoinInfo));
+ Assert(!IsA(node, AppendRelInfo));
+ Assert(!IsA(node, PlaceHolderInfo));
+ Assert(!IsA(node, MinMaxAggInfo));
+
+ return expression_tree_walker(node, substitute_phv_relids_walker,
+ (void *) context);
+}
+
+static void
+substitute_phv_relids(Node *node, int varno, Relids subrelids)
+{
+ substitute_phv_relids_context context;
+
+ context.varno = varno;
+ context.sublevels_up = 0;
+ context.subrelids = subrelids;
+
+ /*
+ * Must be prepared to start with a Query or a bare expression tree.
+ */
+ query_or_expression_tree_walker(node,
+ substitute_phv_relids_walker,
+ (void *) &context,
+ 0);
+}
+
+/*
+ * fix_append_rel_relids: update RT-index fields of AppendRelInfo nodes
+ *
+ * When we pull up a subquery, any AppendRelInfo references to the subquery's
+ * RT index have to be replaced by the substituted relid (and there had better
+ * be only one). We also need to apply substitute_phv_relids to their
+ * translated_vars lists, since those might contain PlaceHolderVars.
+ *
+ * We assume we may modify the AppendRelInfo nodes in-place.
+ */
+static void
+fix_append_rel_relids(List *append_rel_list, int varno, Relids subrelids)
+{
+ ListCell *l;
+ int subvarno = -1;
+
+ /*
+ * We only want to extract the member relid once, but we mustn't fail
+ * immediately if there are multiple members; it could be that none of the
+ * AppendRelInfo nodes refer to it. So compute it on first use. Note that
+ * bms_singleton_member will complain if set is not singleton.
+ */
+ foreach(l, append_rel_list)
+ {
+ AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
+
+ /* The parent_relid shouldn't ever be a pullup target */
+ Assert(appinfo->parent_relid != varno);
+
+ if (appinfo->child_relid == varno)
+ {
+ if (subvarno < 0)
+ subvarno = bms_singleton_member(subrelids);
+ appinfo->child_relid = subvarno;
+ }
+
+ /* Also fix up any PHVs in its translated vars */
+ substitute_phv_relids((Node *) appinfo->translated_vars,
+ varno, subrelids);
+ }
+}
+
+/*
+ * get_relids_in_jointree: get set of RT indexes present in a jointree
+ *
+ * If include_joins is true, join RT indexes are included; if false,
+ * only base rels are included.
+ */
+Relids
+get_relids_in_jointree(Node *jtnode, bool include_joins)
+{
+ Relids result = NULL;
+
+ if (jtnode == NULL)
+ return result;
+ if (IsA(jtnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+
+ result = bms_make_singleton(varno);
+ }
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ ListCell *l;
+
+ foreach(l, f->fromlist)
+ {
+ result = bms_join(result,
+ get_relids_in_jointree(lfirst(l),
+ include_joins));
+ }
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+
+ result = get_relids_in_jointree(j->larg, include_joins);
+ result = bms_join(result,
+ get_relids_in_jointree(j->rarg, include_joins));
+ if (include_joins && j->rtindex)
+ result = bms_add_member(result, j->rtindex);
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+ return result;
+}
+
+/*
+ * get_relids_for_join: get set of base RT indexes making up a join
+ */
+Relids
+get_relids_for_join(Query *query, int joinrelid)
+{
+ Node *jtnode;
+
+ jtnode = find_jointree_node_for_rel((Node *) query->jointree,
+ joinrelid);
+ if (!jtnode)
+ elog(ERROR, "could not find join node %d", joinrelid);
+ return get_relids_in_jointree(jtnode, false);
+}
+
+/*
+ * find_jointree_node_for_rel: locate jointree node for a base or join RT index
+ *
+ * Returns NULL if not found
+ */
+static Node *
+find_jointree_node_for_rel(Node *jtnode, int relid)
+{
+ if (jtnode == NULL)
+ return NULL;
+ if (IsA(jtnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+
+ if (relid == varno)
+ return jtnode;
+ }
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ ListCell *l;
+
+ foreach(l, f->fromlist)
+ {
+ jtnode = find_jointree_node_for_rel(lfirst(l), relid);
+ if (jtnode)
+ return jtnode;
+ }
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+
+ if (relid == j->rtindex)
+ return jtnode;
+ jtnode = find_jointree_node_for_rel(j->larg, relid);
+ if (jtnode)
+ return jtnode;
+ jtnode = find_jointree_node_for_rel(j->rarg, relid);
+ if (jtnode)
+ return jtnode;
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+ return NULL;
+}
diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c
new file mode 100644
index 0000000..42c3e4d
--- /dev/null
+++ b/src/backend/optimizer/prep/prepqual.c
@@ -0,0 +1,676 @@
+/*-------------------------------------------------------------------------
+ *
+ * prepqual.c
+ * Routines for preprocessing qualification expressions
+ *
+ *
+ * While the parser will produce flattened (N-argument) AND/OR trees from
+ * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
+ * directly underneath another AND, or OR underneath OR, if the input was
+ * oddly parenthesized. Also, rule expansion and subquery flattening could
+ * produce such parsetrees. The planner wants to flatten all such cases
+ * to ensure consistent optimization behavior.
+ *
+ * Formerly, this module was responsible for doing the initial flattening,
+ * but now we leave it to eval_const_expressions to do that since it has to
+ * make a complete pass over the expression tree anyway. Instead, we just
+ * have to ensure that our manipulations preserve AND/OR flatness.
+ * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
+ * tree after local transformations that might introduce nested AND/ORs.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/prep/prepqual.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/prep.h"
+#include "utils/lsyscache.h"
+
+
+static List *pull_ands(List *andlist);
+static List *pull_ors(List *orlist);
+static Expr *find_duplicate_ors(Expr *qual, bool is_check);
+static Expr *process_duplicate_ors(List *orlist);
+
+
+/*
+ * negate_clause
+ * Negate a Boolean expression.
+ *
+ * Input is a clause to be negated (e.g., the argument of a NOT clause).
+ * Returns a new clause equivalent to the negation of the given clause.
+ *
+ * Although this can be invoked on its own, it's mainly intended as a helper
+ * for eval_const_expressions(), and that context drives several design
+ * decisions. In particular, if the input is already AND/OR flat, we must
+ * preserve that property. We also don't bother to recurse in situations
+ * where we can assume that lower-level executions of eval_const_expressions
+ * would already have simplified sub-clauses of the input.
+ *
+ * The difference between this and a simple make_notclause() is that this
+ * tries to get rid of the NOT node by logical simplification. It's clearly
+ * always a win if the NOT node can be eliminated altogether. However, our
+ * use of DeMorgan's laws could result in having more NOT nodes rather than
+ * fewer. We do that unconditionally anyway, because in WHERE clauses it's
+ * important to expose as much top-level AND/OR structure as possible.
+ * Also, eliminating an intermediate NOT may allow us to flatten two levels
+ * of AND or OR together that we couldn't have otherwise. Finally, one of
+ * the motivations for doing this is to ensure that logically equivalent
+ * expressions will be seen as physically equal(), so we should always apply
+ * the same transformations.
+ */
+Node *
+negate_clause(Node *node)
+{
+ if (node == NULL) /* should not happen */
+ elog(ERROR, "can't negate an empty subexpression");
+ switch (nodeTag(node))
+ {
+ case T_Const:
+ {
+ Const *c = (Const *) node;
+
+ /* NOT NULL is still NULL */
+ if (c->constisnull)
+ return makeBoolConst(false, true);
+ /* otherwise pretty easy */
+ return makeBoolConst(!DatumGetBool(c->constvalue), false);
+ }
+ break;
+ case T_OpExpr:
+ {
+ /*
+ * Negate operator if possible: (NOT (< A B)) => (>= A B)
+ */
+ OpExpr *opexpr = (OpExpr *) node;
+ Oid negator = get_negator(opexpr->opno);
+
+ if (negator)
+ {
+ OpExpr *newopexpr = makeNode(OpExpr);
+
+ newopexpr->opno = negator;
+ newopexpr->opfuncid = InvalidOid;
+ newopexpr->opresulttype = opexpr->opresulttype;
+ newopexpr->opretset = opexpr->opretset;
+ newopexpr->opcollid = opexpr->opcollid;
+ newopexpr->inputcollid = opexpr->inputcollid;
+ newopexpr->args = opexpr->args;
+ newopexpr->location = opexpr->location;
+ return (Node *) newopexpr;
+ }
+ }
+ break;
+ case T_ScalarArrayOpExpr:
+ {
+ /*
+ * Negate a ScalarArrayOpExpr if its operator has a negator;
+ * for example x = ANY (list) becomes x <> ALL (list)
+ */
+ ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
+ Oid negator = get_negator(saopexpr->opno);
+
+ if (negator)
+ {
+ ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
+
+ newopexpr->opno = negator;
+ newopexpr->opfuncid = InvalidOid;
+ newopexpr->hashfuncid = InvalidOid;
+ newopexpr->useOr = !saopexpr->useOr;
+ newopexpr->inputcollid = saopexpr->inputcollid;
+ newopexpr->args = saopexpr->args;
+ newopexpr->location = saopexpr->location;
+ return (Node *) newopexpr;
+ }
+ }
+ break;
+ case T_BoolExpr:
+ {
+ BoolExpr *expr = (BoolExpr *) node;
+
+ switch (expr->boolop)
+ {
+ /*--------------------
+ * Apply DeMorgan's Laws:
+ * (NOT (AND A B)) => (OR (NOT A) (NOT B))
+ * (NOT (OR A B)) => (AND (NOT A) (NOT B))
+ * i.e., swap AND for OR and negate each subclause.
+ *
+ * If the input is already AND/OR flat and has no NOT
+ * directly above AND or OR, this transformation preserves
+ * those properties. For example, if no direct child of
+ * the given AND clause is an AND or a NOT-above-OR, then
+ * the recursive calls of negate_clause() can't return any
+ * OR clauses. So we needn't call pull_ors() before
+ * building a new OR clause. Similarly for the OR case.
+ *--------------------
+ */
+ case AND_EXPR:
+ {
+ List *nargs = NIL;
+ ListCell *lc;
+
+ foreach(lc, expr->args)
+ {
+ nargs = lappend(nargs,
+ negate_clause(lfirst(lc)));
+ }
+ return (Node *) make_orclause(nargs);
+ }
+ break;
+ case OR_EXPR:
+ {
+ List *nargs = NIL;
+ ListCell *lc;
+
+ foreach(lc, expr->args)
+ {
+ nargs = lappend(nargs,
+ negate_clause(lfirst(lc)));
+ }
+ return (Node *) make_andclause(nargs);
+ }
+ break;
+ case NOT_EXPR:
+
+ /*
+ * NOT underneath NOT: they cancel. We assume the
+ * input is already simplified, so no need to recurse.
+ */
+ return (Node *) linitial(expr->args);
+ default:
+ elog(ERROR, "unrecognized boolop: %d",
+ (int) expr->boolop);
+ break;
+ }
+ }
+ break;
+ case T_NullTest:
+ {
+ NullTest *expr = (NullTest *) node;
+
+ /*
+ * In the rowtype case, the two flavors of NullTest are *not*
+ * logical inverses, so we can't simplify. But it does work
+ * for scalar datatypes.
+ */
+ if (!expr->argisrow)
+ {
+ NullTest *newexpr = makeNode(NullTest);
+
+ newexpr->arg = expr->arg;
+ newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
+ IS_NOT_NULL : IS_NULL);
+ newexpr->argisrow = expr->argisrow;
+ newexpr->location = expr->location;
+ return (Node *) newexpr;
+ }
+ }
+ break;
+ case T_BooleanTest:
+ {
+ BooleanTest *expr = (BooleanTest *) node;
+ BooleanTest *newexpr = makeNode(BooleanTest);
+
+ newexpr->arg = expr->arg;
+ switch (expr->booltesttype)
+ {
+ case IS_TRUE:
+ newexpr->booltesttype = IS_NOT_TRUE;
+ break;
+ case IS_NOT_TRUE:
+ newexpr->booltesttype = IS_TRUE;
+ break;
+ case IS_FALSE:
+ newexpr->booltesttype = IS_NOT_FALSE;
+ break;
+ case IS_NOT_FALSE:
+ newexpr->booltesttype = IS_FALSE;
+ break;
+ case IS_UNKNOWN:
+ newexpr->booltesttype = IS_NOT_UNKNOWN;
+ break;
+ case IS_NOT_UNKNOWN:
+ newexpr->booltesttype = IS_UNKNOWN;
+ break;
+ default:
+ elog(ERROR, "unrecognized booltesttype: %d",
+ (int) expr->booltesttype);
+ break;
+ }
+ newexpr->location = expr->location;
+ return (Node *) newexpr;
+ }
+ break;
+ default:
+ /* else fall through */
+ break;
+ }
+
+ /*
+ * Otherwise we don't know how to simplify this, so just tack on an
+ * explicit NOT node.
+ */
+ return (Node *) make_notclause((Expr *) node);
+}
+
+
+/*
+ * canonicalize_qual
+ * Convert a qualification expression to the most useful form.
+ *
+ * This is primarily intended to be used on top-level WHERE (or JOIN/ON)
+ * clauses. It can also be used on top-level CHECK constraints, for which
+ * pass is_check = true. DO NOT call it on any expression that is not known
+ * to be one or the other, as it might apply inappropriate simplifications.
+ *
+ * The name of this routine is a holdover from a time when it would try to
+ * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
+ * Eventually, we recognized that that had more theoretical purity than
+ * actual usefulness, and so now the transformation doesn't involve any
+ * notion of reaching a canonical form.
+ *
+ * NOTE: we assume the input has already been through eval_const_expressions
+ * and therefore possesses AND/OR flatness. Formerly this function included
+ * its own flattening logic, but that requires a useless extra pass over the
+ * tree.
+ *
+ * Returns the modified qualification.
+ */
+Expr *
+canonicalize_qual(Expr *qual, bool is_check)
+{
+ Expr *newqual;
+
+ /* Quick exit for empty qual */
+ if (qual == NULL)
+ return NULL;
+
+ /* This should not be invoked on quals in implicit-AND format */
+ Assert(!IsA(qual, List));
+
+ /*
+ * Pull up redundant subclauses in OR-of-AND trees. We do this only
+ * within the top-level AND/OR structure; there's no point in looking
+ * deeper. Also remove any NULL constants in the top-level structure.
+ */
+ newqual = find_duplicate_ors(qual, is_check);
+
+ return newqual;
+}
+
+
+/*
+ * pull_ands
+ * Recursively flatten nested AND clauses into a single and-clause list.
+ *
+ * Input is the arglist of an AND clause.
+ * Returns the rebuilt arglist (note original list structure is not touched).
+ */
+static List *
+pull_ands(List *andlist)
+{
+ List *out_list = NIL;
+ ListCell *arg;
+
+ foreach(arg, andlist)
+ {
+ Node *subexpr = (Node *) lfirst(arg);
+
+ if (is_andclause(subexpr))
+ out_list = list_concat(out_list,
+ pull_ands(((BoolExpr *) subexpr)->args));
+ else
+ out_list = lappend(out_list, subexpr);
+ }
+ return out_list;
+}
+
+/*
+ * pull_ors
+ * Recursively flatten nested OR clauses into a single or-clause list.
+ *
+ * Input is the arglist of an OR clause.
+ * Returns the rebuilt arglist (note original list structure is not touched).
+ */
+static List *
+pull_ors(List *orlist)
+{
+ List *out_list = NIL;
+ ListCell *arg;
+
+ foreach(arg, orlist)
+ {
+ Node *subexpr = (Node *) lfirst(arg);
+
+ if (is_orclause(subexpr))
+ out_list = list_concat(out_list,
+ pull_ors(((BoolExpr *) subexpr)->args));
+ else
+ out_list = lappend(out_list, subexpr);
+ }
+ return out_list;
+}
+
+
+/*--------------------
+ * The following code attempts to apply the inverse OR distributive law:
+ * ((A AND B) OR (A AND C)) => (A AND (B OR C))
+ * That is, locate OR clauses in which every subclause contains an
+ * identical term, and pull out the duplicated terms.
+ *
+ * This may seem like a fairly useless activity, but it turns out to be
+ * applicable to many machine-generated queries, and there are also queries
+ * in some of the TPC benchmarks that need it. This was in fact almost the
+ * sole useful side-effect of the old prepqual code that tried to force
+ * the query into canonical AND-of-ORs form: the canonical equivalent of
+ * ((A AND B) OR (A AND C))
+ * is
+ * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
+ * which the code was able to simplify to
+ * (A AND (A OR C) AND (B OR A) AND (B OR C))
+ * thus successfully extracting the common condition A --- but at the cost
+ * of cluttering the qual with many redundant clauses.
+ *--------------------
+ */
+
+/*
+ * find_duplicate_ors
+ * Given a qualification tree with the NOTs pushed down, search for
+ * OR clauses to which the inverse OR distributive law might apply.
+ * Only the top-level AND/OR structure is searched.
+ *
+ * While at it, we remove any NULL constants within the top-level AND/OR
+ * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
+ * In general that would change the result, so eval_const_expressions can't
+ * do it; but at top level of WHERE, we don't need to distinguish between
+ * FALSE and NULL results, so it's valid to treat NULL::boolean the same
+ * as FALSE and then simplify AND/OR accordingly. Conversely, in a top-level
+ * CHECK constraint, we may treat a NULL the same as TRUE.
+ *
+ * Returns the modified qualification. AND/OR flatness is preserved.
+ */
+static Expr *
+find_duplicate_ors(Expr *qual, bool is_check)
+{
+ if (is_orclause(qual))
+ {
+ List *orlist = NIL;
+ ListCell *temp;
+
+ /* Recurse */
+ foreach(temp, ((BoolExpr *) qual)->args)
+ {
+ Expr *arg = (Expr *) lfirst(temp);
+
+ arg = find_duplicate_ors(arg, is_check);
+
+ /* Get rid of any constant inputs */
+ if (arg && IsA(arg, Const))
+ {
+ Const *carg = (Const *) arg;
+
+ if (is_check)
+ {
+ /* Within OR in CHECK, drop constant FALSE */
+ if (!carg->constisnull && !DatumGetBool(carg->constvalue))
+ continue;
+ /* Constant TRUE or NULL, so OR reduces to TRUE */
+ return (Expr *) makeBoolConst(true, false);
+ }
+ else
+ {
+ /* Within OR in WHERE, drop constant FALSE or NULL */
+ if (carg->constisnull || !DatumGetBool(carg->constvalue))
+ continue;
+ /* Constant TRUE, so OR reduces to TRUE */
+ return arg;
+ }
+ }
+
+ orlist = lappend(orlist, arg);
+ }
+
+ /* Flatten any ORs pulled up to just below here */
+ orlist = pull_ors(orlist);
+
+ /* Now we can look for duplicate ORs */
+ return process_duplicate_ors(orlist);
+ }
+ else if (is_andclause(qual))
+ {
+ List *andlist = NIL;
+ ListCell *temp;
+
+ /* Recurse */
+ foreach(temp, ((BoolExpr *) qual)->args)
+ {
+ Expr *arg = (Expr *) lfirst(temp);
+
+ arg = find_duplicate_ors(arg, is_check);
+
+ /* Get rid of any constant inputs */
+ if (arg && IsA(arg, Const))
+ {
+ Const *carg = (Const *) arg;
+
+ if (is_check)
+ {
+ /* Within AND in CHECK, drop constant TRUE or NULL */
+ if (carg->constisnull || DatumGetBool(carg->constvalue))
+ continue;
+ /* Constant FALSE, so AND reduces to FALSE */
+ return arg;
+ }
+ else
+ {
+ /* Within AND in WHERE, drop constant TRUE */
+ if (!carg->constisnull && DatumGetBool(carg->constvalue))
+ continue;
+ /* Constant FALSE or NULL, so AND reduces to FALSE */
+ return (Expr *) makeBoolConst(false, false);
+ }
+ }
+
+ andlist = lappend(andlist, arg);
+ }
+
+ /* Flatten any ANDs introduced just below here */
+ andlist = pull_ands(andlist);
+
+ /* AND of no inputs reduces to TRUE */
+ if (andlist == NIL)
+ return (Expr *) makeBoolConst(true, false);
+
+ /* Single-expression AND just reduces to that expression */
+ if (list_length(andlist) == 1)
+ return (Expr *) linitial(andlist);
+
+ /* Else we still need an AND node */
+ return make_andclause(andlist);
+ }
+ else
+ return qual;
+}
+
+/*
+ * process_duplicate_ors
+ * Given a list of exprs which are ORed together, try to apply
+ * the inverse OR distributive law.
+ *
+ * Returns the resulting expression (could be an AND clause, an OR
+ * clause, or maybe even a single subexpression).
+ */
+static Expr *
+process_duplicate_ors(List *orlist)
+{
+ List *reference = NIL;
+ int num_subclauses = 0;
+ List *winners;
+ List *neworlist;
+ ListCell *temp;
+
+ /* OR of no inputs reduces to FALSE */
+ if (orlist == NIL)
+ return (Expr *) makeBoolConst(false, false);
+
+ /* Single-expression OR just reduces to that expression */
+ if (list_length(orlist) == 1)
+ return (Expr *) linitial(orlist);
+
+ /*
+ * Choose the shortest AND clause as the reference list --- obviously, any
+ * subclause not in this clause isn't in all the clauses. If we find a
+ * clause that's not an AND, we can treat it as a one-element AND clause,
+ * which necessarily wins as shortest.
+ */
+ foreach(temp, orlist)
+ {
+ Expr *clause = (Expr *) lfirst(temp);
+
+ if (is_andclause(clause))
+ {
+ List *subclauses = ((BoolExpr *) clause)->args;
+ int nclauses = list_length(subclauses);
+
+ if (reference == NIL || nclauses < num_subclauses)
+ {
+ reference = subclauses;
+ num_subclauses = nclauses;
+ }
+ }
+ else
+ {
+ reference = list_make1(clause);
+ break;
+ }
+ }
+
+ /*
+ * Just in case, eliminate any duplicates in the reference list.
+ */
+ reference = list_union(NIL, reference);
+
+ /*
+ * Check each element of the reference list to see if it's in all the OR
+ * clauses. Build a new list of winning clauses.
+ */
+ winners = NIL;
+ foreach(temp, reference)
+ {
+ Expr *refclause = (Expr *) lfirst(temp);
+ bool win = true;
+ ListCell *temp2;
+
+ foreach(temp2, orlist)
+ {
+ Expr *clause = (Expr *) lfirst(temp2);
+
+ if (is_andclause(clause))
+ {
+ if (!list_member(((BoolExpr *) clause)->args, refclause))
+ {
+ win = false;
+ break;
+ }
+ }
+ else
+ {
+ if (!equal(refclause, clause))
+ {
+ win = false;
+ break;
+ }
+ }
+ }
+
+ if (win)
+ winners = lappend(winners, refclause);
+ }
+
+ /*
+ * If no winners, we can't transform the OR
+ */
+ if (winners == NIL)
+ return make_orclause(orlist);
+
+ /*
+ * Generate new OR list consisting of the remaining sub-clauses.
+ *
+ * If any clause degenerates to empty, then we have a situation like (A
+ * AND B) OR (A), which can be reduced to just A --- that is, the
+ * additional conditions in other arms of the OR are irrelevant.
+ *
+ * Note that because we use list_difference, any multiple occurrences of a
+ * winning clause in an AND sub-clause will be removed automatically.
+ */
+ neworlist = NIL;
+ foreach(temp, orlist)
+ {
+ Expr *clause = (Expr *) lfirst(temp);
+
+ if (is_andclause(clause))
+ {
+ List *subclauses = ((BoolExpr *) clause)->args;
+
+ subclauses = list_difference(subclauses, winners);
+ if (subclauses != NIL)
+ {
+ if (list_length(subclauses) == 1)
+ neworlist = lappend(neworlist, linitial(subclauses));
+ else
+ neworlist = lappend(neworlist, make_andclause(subclauses));
+ }
+ else
+ {
+ neworlist = NIL; /* degenerate case, see above */
+ break;
+ }
+ }
+ else
+ {
+ if (!list_member(winners, clause))
+ neworlist = lappend(neworlist, clause);
+ else
+ {
+ neworlist = NIL; /* degenerate case, see above */
+ break;
+ }
+ }
+ }
+
+ /*
+ * Append reduced OR to the winners list, if it's not degenerate, handling
+ * the special case of one element correctly (can that really happen?).
+ * Also be careful to maintain AND/OR flatness in case we pulled up a
+ * sub-sub-OR-clause.
+ */
+ if (neworlist != NIL)
+ {
+ if (list_length(neworlist) == 1)
+ winners = lappend(winners, linitial(neworlist));
+ else
+ winners = lappend(winners, make_orclause(pull_ors(neworlist)));
+ }
+
+ /*
+ * And return the constructed AND clause, again being wary of a single
+ * element and AND/OR flatness.
+ */
+ if (list_length(winners) == 1)
+ return (Expr *) linitial(winners);
+ else
+ return make_andclause(pull_ands(winners));
+}
diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c
new file mode 100644
index 0000000..e943458
--- /dev/null
+++ b/src/backend/optimizer/prep/preptlist.c
@@ -0,0 +1,437 @@
+/*-------------------------------------------------------------------------
+ *
+ * preptlist.c
+ * Routines to preprocess the parse tree target list
+ *
+ * For an INSERT, the targetlist must contain an entry for each attribute of
+ * the target relation in the correct order.
+ *
+ * For an UPDATE, the targetlist just contains the expressions for the new
+ * column values.
+ *
+ * For UPDATE and DELETE queries, the targetlist must also contain "junk"
+ * tlist entries needed to allow the executor to identify the rows to be
+ * updated or deleted; for example, the ctid of a heap row. (The planner
+ * adds these; they're not in what we receive from the planner/rewriter.)
+ *
+ * For all query types, there can be additional junk tlist entries, such as
+ * sort keys, Vars needed for a RETURNING list, and row ID information needed
+ * for SELECT FOR UPDATE locking and/or EvalPlanQual checking.
+ *
+ * The query rewrite phase also does preprocessing of the targetlist (see
+ * rewriteTargetListIU). The division of labor between here and there is
+ * partially historical, but it's not entirely arbitrary. The stuff done
+ * here is closely connected to physical access to tables, whereas the
+ * rewriter's work is more concerned with SQL semantics.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/prep/preptlist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/table.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/appendinfo.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/prep.h"
+#include "optimizer/tlist.h"
+#include "parser/parse_coerce.h"
+#include "parser/parsetree.h"
+#include "utils/rel.h"
+
+static List *expand_insert_targetlist(List *tlist, Relation rel);
+
+
+/*
+ * preprocess_targetlist
+ * Driver for preprocessing the parse tree targetlist.
+ *
+ * The preprocessed targetlist is returned in root->processed_tlist.
+ * Also, if this is an UPDATE, we return a list of target column numbers
+ * in root->update_colnos. (Resnos in processed_tlist will be consecutive,
+ * so do not look at that to find out which columns are targets!)
+ */
+void
+preprocess_targetlist(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ int result_relation = parse->resultRelation;
+ List *range_table = parse->rtable;
+ CmdType command_type = parse->commandType;
+ RangeTblEntry *target_rte = NULL;
+ Relation target_relation = NULL;
+ List *tlist;
+ ListCell *lc;
+
+ /*
+ * If there is a result relation, open it so we can look for missing
+ * columns and so on. We assume that previous code already acquired at
+ * least AccessShareLock on the relation, so we need no lock here.
+ */
+ if (result_relation)
+ {
+ target_rte = rt_fetch(result_relation, range_table);
+
+ /*
+ * Sanity check: it'd better be a real relation not, say, a subquery.
+ * Else parser or rewriter messed up.
+ */
+ if (target_rte->rtekind != RTE_RELATION)
+ elog(ERROR, "result relation must be a regular relation");
+
+ target_relation = table_open(target_rte->relid, NoLock);
+ }
+ else
+ Assert(command_type == CMD_SELECT);
+
+ /*
+ * In an INSERT, the executor expects the targetlist to match the exact
+ * order of the target table's attributes, including entries for
+ * attributes not mentioned in the source query.
+ *
+ * In an UPDATE, we don't rearrange the tlist order, but we need to make a
+ * separate list of the target attribute numbers, in tlist order, and then
+ * renumber the processed_tlist entries to be consecutive.
+ */
+ tlist = parse->targetList;
+ if (command_type == CMD_INSERT)
+ tlist = expand_insert_targetlist(tlist, target_relation);
+ else if (command_type == CMD_UPDATE)
+ root->update_colnos = extract_update_targetlist_colnos(tlist);
+
+ /*
+ * For non-inherited UPDATE/DELETE, register any junk column(s) needed to
+ * allow the executor to identify the rows to be updated or deleted. In
+ * the inheritance case, we do nothing now, leaving this to be dealt with
+ * when expand_inherited_rtentry() makes the leaf target relations. (But
+ * there might not be any leaf target relations, in which case we must do
+ * this in distribute_row_identity_vars().)
+ */
+ if ((command_type == CMD_UPDATE || command_type == CMD_DELETE) &&
+ !target_rte->inh)
+ {
+ /* row-identity logic expects to add stuff to processed_tlist */
+ root->processed_tlist = tlist;
+ add_row_identity_columns(root, result_relation,
+ target_rte, target_relation);
+ tlist = root->processed_tlist;
+ }
+
+ /*
+ * Add necessary junk columns for rowmarked rels. These values are needed
+ * for locking of rels selected FOR UPDATE/SHARE, and to do EvalPlanQual
+ * rechecking. See comments for PlanRowMark in plannodes.h. If you
+ * change this stanza, see also expand_inherited_rtentry(), which has to
+ * be able to add on junk columns equivalent to these.
+ *
+ * (Someday it might be useful to fold these resjunk columns into the
+ * row-identity-column management used for UPDATE/DELETE. Today is not
+ * that day, however. One notable issue is that it seems important that
+ * the whole-row Vars made here use the real table rowtype, not RECORD, so
+ * that conversion to/from child relations' rowtypes will happen. Also,
+ * since these entries don't potentially bloat with more and more child
+ * relations, there's not really much need for column sharing.)
+ */
+ foreach(lc, root->rowMarks)
+ {
+ PlanRowMark *rc = (PlanRowMark *) lfirst(lc);
+ Var *var;
+ char resname[32];
+ TargetEntry *tle;
+
+ /* child rels use the same junk attrs as their parents */
+ if (rc->rti != rc->prti)
+ continue;
+
+ if (rc->allMarkTypes & ~(1 << ROW_MARK_COPY))
+ {
+ /* Need to fetch TID */
+ var = makeVar(rc->rti,
+ SelfItemPointerAttributeNumber,
+ TIDOID,
+ -1,
+ InvalidOid,
+ 0);
+ snprintf(resname, sizeof(resname), "ctid%u", rc->rowmarkId);
+ tle = makeTargetEntry((Expr *) var,
+ list_length(tlist) + 1,
+ pstrdup(resname),
+ true);
+ tlist = lappend(tlist, tle);
+ }
+ if (rc->allMarkTypes & (1 << ROW_MARK_COPY))
+ {
+ /* Need the whole row as a junk var */
+ var = makeWholeRowVar(rt_fetch(rc->rti, range_table),
+ rc->rti,
+ 0,
+ false);
+ snprintf(resname, sizeof(resname), "wholerow%u", rc->rowmarkId);
+ tle = makeTargetEntry((Expr *) var,
+ list_length(tlist) + 1,
+ pstrdup(resname),
+ true);
+ tlist = lappend(tlist, tle);
+ }
+
+ /* If parent of inheritance tree, always fetch the tableoid too. */
+ if (rc->isParent)
+ {
+ var = makeVar(rc->rti,
+ TableOidAttributeNumber,
+ OIDOID,
+ -1,
+ InvalidOid,
+ 0);
+ snprintf(resname, sizeof(resname), "tableoid%u", rc->rowmarkId);
+ tle = makeTargetEntry((Expr *) var,
+ list_length(tlist) + 1,
+ pstrdup(resname),
+ true);
+ tlist = lappend(tlist, tle);
+ }
+ }
+
+ /*
+ * If the query has a RETURNING list, add resjunk entries for any Vars
+ * used in RETURNING that belong to other relations. We need to do this
+ * to make these Vars available for the RETURNING calculation. Vars that
+ * belong to the result rel don't need to be added, because they will be
+ * made to refer to the actual heap tuple.
+ */
+ if (parse->returningList && list_length(parse->rtable) > 1)
+ {
+ List *vars;
+ ListCell *l;
+
+ vars = pull_var_clause((Node *) parse->returningList,
+ PVC_RECURSE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
+ PVC_INCLUDE_PLACEHOLDERS);
+ foreach(l, vars)
+ {
+ Var *var = (Var *) lfirst(l);
+ TargetEntry *tle;
+
+ if (IsA(var, Var) &&
+ var->varno == result_relation)
+ continue; /* don't need it */
+
+ if (tlist_member((Expr *) var, tlist))
+ continue; /* already got it */
+
+ tle = makeTargetEntry((Expr *) var,
+ list_length(tlist) + 1,
+ NULL,
+ true);
+
+ tlist = lappend(tlist, tle);
+ }
+ list_free(vars);
+ }
+
+ root->processed_tlist = tlist;
+
+ if (target_relation)
+ table_close(target_relation, NoLock);
+}
+
+/*
+ * extract_update_targetlist_colnos
+ * Extract a list of the target-table column numbers that
+ * an UPDATE's targetlist wants to assign to, then renumber.
+ *
+ * The convention in the parser and rewriter is that the resnos in an
+ * UPDATE's non-resjunk TLE entries are the target column numbers
+ * to assign to. Here, we extract that info into a separate list, and
+ * then convert the tlist to the sequential-numbering convention that's
+ * used by all other query types.
+ *
+ * This is also applied to the tlist associated with INSERT ... ON CONFLICT
+ * ... UPDATE, although not till much later in planning.
+ */
+List *
+extract_update_targetlist_colnos(List *tlist)
+{
+ List *update_colnos = NIL;
+ AttrNumber nextresno = 1;
+ ListCell *lc;
+
+ foreach(lc, tlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+ if (!tle->resjunk)
+ update_colnos = lappend_int(update_colnos, tle->resno);
+ tle->resno = nextresno++;
+ }
+ return update_colnos;
+}
+
+
+/*****************************************************************************
+ *
+ * TARGETLIST EXPANSION
+ *
+ *****************************************************************************/
+
+/*
+ * expand_insert_targetlist
+ * Given a target list as generated by the parser and a result relation,
+ * add targetlist entries for any missing attributes, and ensure the
+ * non-junk attributes appear in proper field order.
+ *
+ * Once upon a time we also did more or less this with UPDATE targetlists,
+ * but now this code is only applied to INSERT targetlists.
+ */
+static List *
+expand_insert_targetlist(List *tlist, Relation rel)
+{
+ List *new_tlist = NIL;
+ ListCell *tlist_item;
+ int attrno,
+ numattrs;
+
+ tlist_item = list_head(tlist);
+
+ /*
+ * The rewriter should have already ensured that the TLEs are in correct
+ * order; but we have to insert TLEs for any missing attributes.
+ *
+ * Scan the tuple description in the relation's relcache entry to make
+ * sure we have all the user attributes in the right order.
+ */
+ numattrs = RelationGetNumberOfAttributes(rel);
+
+ for (attrno = 1; attrno <= numattrs; attrno++)
+ {
+ Form_pg_attribute att_tup = TupleDescAttr(rel->rd_att, attrno - 1);
+ TargetEntry *new_tle = NULL;
+
+ if (tlist_item != NULL)
+ {
+ TargetEntry *old_tle = (TargetEntry *) lfirst(tlist_item);
+
+ if (!old_tle->resjunk && old_tle->resno == attrno)
+ {
+ new_tle = old_tle;
+ tlist_item = lnext(tlist, tlist_item);
+ }
+ }
+
+ if (new_tle == NULL)
+ {
+ /*
+ * Didn't find a matching tlist entry, so make one.
+ *
+ * INSERTs should insert NULL in this case. (We assume the
+ * rewriter would have inserted any available non-NULL default
+ * value.) Also, if the column isn't dropped, apply any domain
+ * constraints that might exist --- this is to catch domain NOT
+ * NULL.
+ *
+ * When generating a NULL constant for a dropped column, we label
+ * it INT4 (any other guaranteed-to-exist datatype would do as
+ * well). We can't label it with the dropped column's datatype
+ * since that might not exist anymore. It does not really matter
+ * what we claim the type is, since NULL is NULL --- its
+ * representation is datatype-independent. This could perhaps
+ * confuse code comparing the finished plan to the target
+ * relation, however.
+ */
+ Oid atttype = att_tup->atttypid;
+ Oid attcollation = att_tup->attcollation;
+ Node *new_expr;
+
+ if (!att_tup->attisdropped)
+ {
+ new_expr = (Node *) makeConst(atttype,
+ -1,
+ attcollation,
+ att_tup->attlen,
+ (Datum) 0,
+ true, /* isnull */
+ att_tup->attbyval);
+ new_expr = coerce_to_domain(new_expr,
+ InvalidOid, -1,
+ atttype,
+ COERCION_IMPLICIT,
+ COERCE_IMPLICIT_CAST,
+ -1,
+ false);
+ }
+ else
+ {
+ /* Insert NULL for dropped column */
+ new_expr = (Node *) makeConst(INT4OID,
+ -1,
+ InvalidOid,
+ sizeof(int32),
+ (Datum) 0,
+ true, /* isnull */
+ true /* byval */ );
+ }
+
+ new_tle = makeTargetEntry((Expr *) new_expr,
+ attrno,
+ pstrdup(NameStr(att_tup->attname)),
+ false);
+ }
+
+ new_tlist = lappend(new_tlist, new_tle);
+ }
+
+ /*
+ * The remaining tlist entries should be resjunk; append them all to the
+ * end of the new tlist, making sure they have resnos higher than the last
+ * real attribute. (Note: although the rewriter already did such
+ * renumbering, we have to do it again here in case we added NULL entries
+ * above.)
+ */
+ while (tlist_item)
+ {
+ TargetEntry *old_tle = (TargetEntry *) lfirst(tlist_item);
+
+ if (!old_tle->resjunk)
+ elog(ERROR, "targetlist is not sorted correctly");
+ /* Get the resno right, but don't copy unnecessarily */
+ if (old_tle->resno != attrno)
+ {
+ old_tle = flatCopyTargetEntry(old_tle);
+ old_tle->resno = attrno;
+ }
+ new_tlist = lappend(new_tlist, old_tle);
+ attrno++;
+ tlist_item = lnext(tlist, tlist_item);
+ }
+
+ return new_tlist;
+}
+
+
+/*
+ * Locate PlanRowMark for given RT index, or return NULL if none
+ *
+ * This probably ought to be elsewhere, but there's no very good place
+ */
+PlanRowMark *
+get_plan_rowmark(List *rowmarks, Index rtindex)
+{
+ ListCell *l;
+
+ foreach(l, rowmarks)
+ {
+ PlanRowMark *rc = (PlanRowMark *) lfirst(l);
+
+ if (rc->rti == rtindex)
+ return rc;
+ }
+ return NULL;
+}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
new file mode 100644
index 0000000..e9256a2
--- /dev/null
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -0,0 +1,1420 @@
+/*-------------------------------------------------------------------------
+ *
+ * prepunion.c
+ * Routines to plan set-operation queries. The filename is a leftover
+ * from a time when only UNIONs were implemented.
+ *
+ * There are two code paths in the planner for set-operation queries.
+ * If a subquery consists entirely of simple UNION ALL operations, it
+ * is converted into an "append relation". Otherwise, it is handled
+ * by the general code in this module (plan_set_operations and its
+ * subroutines). There is some support code here for the append-relation
+ * case, but most of the heavy lifting for that is done elsewhere,
+ * notably in prepjointree.c and allpaths.c.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/prep/prepunion.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/sysattr.h"
+#include "catalog/partition.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_type.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/planmain.h"
+#include "optimizer/planner.h"
+#include "optimizer/prep.h"
+#include "optimizer/tlist.h"
+#include "parser/parse_coerce.h"
+#include "parser/parsetree.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/selfuncs.h"
+#include "utils/syscache.h"
+
+
+static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
+ List *colTypes, List *colCollations,
+ bool junkOK,
+ int flag, List *refnames_tlist,
+ List **pTargetList,
+ double *pNumGroups);
+static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
+ PlannerInfo *root,
+ List *refnames_tlist,
+ List **pTargetList);
+static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
+ List *refnames_tlist,
+ List **pTargetList);
+static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
+ List *refnames_tlist,
+ List **pTargetList);
+static List *plan_union_children(PlannerInfo *root,
+ SetOperationStmt *top_union,
+ List *refnames_tlist,
+ List **tlist_list);
+static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
+ PlannerInfo *root);
+static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
+static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
+ Path *input_path,
+ double dNumGroups, double dNumOutputRows,
+ const char *construct);
+static List *generate_setop_tlist(List *colTypes, List *colCollations,
+ int flag,
+ Index varno,
+ bool hack_constants,
+ List *input_tlist,
+ List *refnames_tlist);
+static List *generate_append_tlist(List *colTypes, List *colCollations,
+ bool flag,
+ List *input_tlists,
+ List *refnames_tlist);
+static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
+
+
+/*
+ * plan_set_operations
+ *
+ * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
+ *
+ * This routine only deals with the setOperations tree of the given query.
+ * Any top-level ORDER BY requested in root->parse->sortClause will be handled
+ * when we return to grouping_planner; likewise for LIMIT.
+ *
+ * What we return is an "upperrel" RelOptInfo containing at least one Path
+ * that implements the set-operation tree. In addition, root->processed_tlist
+ * receives a targetlist representing the output of the topmost setop node.
+ */
+RelOptInfo *
+plan_set_operations(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
+ Node *node;
+ RangeTblEntry *leftmostRTE;
+ Query *leftmostQuery;
+ RelOptInfo *setop_rel;
+ List *top_tlist;
+
+ Assert(topop);
+
+ /* check for unsupported stuff */
+ Assert(parse->jointree->fromlist == NIL);
+ Assert(parse->jointree->quals == NULL);
+ Assert(parse->groupClause == NIL);
+ Assert(parse->havingQual == NULL);
+ Assert(parse->windowClause == NIL);
+ Assert(parse->distinctClause == NIL);
+
+ /*
+ * In the outer query level, we won't have any true equivalences to deal
+ * with; but we do want to be able to make pathkeys, which will require
+ * single-member EquivalenceClasses. Indicate that EC merging is complete
+ * so that pathkeys.c won't complain.
+ */
+ Assert(root->eq_classes == NIL);
+ root->ec_merging_done = true;
+
+ /*
+ * We'll need to build RelOptInfos for each of the leaf subqueries, which
+ * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
+ * arrays for those, and for AppendRelInfos in case they're needed.
+ */
+ setup_simple_rel_arrays(root);
+
+ /*
+ * Find the leftmost component Query. We need to use its column names for
+ * all generated tlists (else SELECT INTO won't work right).
+ */
+ node = topop->larg;
+ while (node && IsA(node, SetOperationStmt))
+ node = ((SetOperationStmt *) node)->larg;
+ Assert(node && IsA(node, RangeTblRef));
+ leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
+ leftmostQuery = leftmostRTE->subquery;
+ Assert(leftmostQuery != NULL);
+
+ /*
+ * If the topmost node is a recursive union, it needs special processing.
+ */
+ if (root->hasRecursion)
+ {
+ setop_rel = generate_recursion_path(topop, root,
+ leftmostQuery->targetList,
+ &top_tlist);
+ }
+ else
+ {
+ /*
+ * Recurse on setOperations tree to generate paths for set ops. The
+ * final output paths should have just the column types shown as the
+ * output from the top-level node, plus possibly resjunk working
+ * columns (we can rely on upper-level nodes to deal with that).
+ */
+ setop_rel = recurse_set_operations((Node *) topop, root,
+ topop->colTypes, topop->colCollations,
+ true, -1,
+ leftmostQuery->targetList,
+ &top_tlist,
+ NULL);
+ }
+
+ /* Must return the built tlist into root->processed_tlist. */
+ root->processed_tlist = top_tlist;
+
+ return setop_rel;
+}
+
+/*
+ * recurse_set_operations
+ * Recursively handle one step in a tree of set operations
+ *
+ * colTypes: OID list of set-op's result column datatypes
+ * colCollations: OID list of set-op's result column collations
+ * junkOK: if true, child resjunk columns may be left in the result
+ * flag: if >= 0, add a resjunk output column indicating value of flag
+ * refnames_tlist: targetlist to take column names from
+ *
+ * Returns a RelOptInfo for the subtree, as well as these output parameters:
+ * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
+ * *pNumGroups: if not NULL, we estimate the number of distinct groups
+ * in the result, and store it there
+ *
+ * The pTargetList output parameter is mostly redundant with the pathtarget
+ * of the returned RelOptInfo, but for the moment we need it because much of
+ * the logic in this file depends on flag columns being marked resjunk.
+ * Pending a redesign of how that works, this is the easy way out.
+ *
+ * We don't have to care about typmods here: the only allowed difference
+ * between set-op input and output typmods is input is a specific typmod
+ * and output is -1, and that does not require a coercion.
+ */
+static RelOptInfo *
+recurse_set_operations(Node *setOp, PlannerInfo *root,
+ List *colTypes, List *colCollations,
+ bool junkOK,
+ int flag, List *refnames_tlist,
+ List **pTargetList,
+ double *pNumGroups)
+{
+ RelOptInfo *rel = NULL; /* keep compiler quiet */
+
+ /* Guard against stack overflow due to overly complex setop nests */
+ check_stack_depth();
+
+ if (IsA(setOp, RangeTblRef))
+ {
+ RangeTblRef *rtr = (RangeTblRef *) setOp;
+ RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
+ Query *subquery = rte->subquery;
+ PlannerInfo *subroot;
+ RelOptInfo *final_rel;
+ Path *subpath;
+ Path *path;
+ List *tlist;
+
+ Assert(subquery != NULL);
+
+ /* Build a RelOptInfo for this leaf subquery. */
+ rel = build_simple_rel(root, rtr->rtindex, NULL);
+
+ /* plan_params should not be in use in current query level */
+ Assert(root->plan_params == NIL);
+
+ /* Generate a subroot and Paths for the subquery */
+ subroot = rel->subroot = subquery_planner(root->glob, subquery,
+ root,
+ false,
+ root->tuple_fraction);
+
+ /*
+ * It should not be possible for the primitive query to contain any
+ * cross-references to other primitive queries in the setop tree.
+ */
+ if (root->plan_params)
+ elog(ERROR, "unexpected outer reference in set operation subquery");
+
+ /* Figure out the appropriate target list for this subquery. */
+ tlist = generate_setop_tlist(colTypes, colCollations,
+ flag,
+ rtr->rtindex,
+ true,
+ subroot->processed_tlist,
+ refnames_tlist);
+ rel->reltarget = create_pathtarget(root, tlist);
+
+ /* Return the fully-fledged tlist to caller, too */
+ *pTargetList = tlist;
+
+ /*
+ * Mark rel with estimated output rows, width, etc. Note that we have
+ * to do this before generating outer-query paths, else
+ * cost_subqueryscan is not happy.
+ */
+ set_subquery_size_estimates(root, rel);
+
+ /*
+ * Since we may want to add a partial path to this relation, we must
+ * set its consider_parallel flag correctly.
+ */
+ final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
+ rel->consider_parallel = final_rel->consider_parallel;
+
+ /*
+ * For the moment, we consider only a single Path for the subquery.
+ * This should change soon (make it look more like
+ * set_subquery_pathlist).
+ */
+ subpath = get_cheapest_fractional_path(final_rel,
+ root->tuple_fraction);
+
+ /*
+ * Stick a SubqueryScanPath atop that.
+ *
+ * We don't bother to determine the subquery's output ordering since
+ * it won't be reflected in the set-op result anyhow; so just label
+ * the SubqueryScanPath with nil pathkeys. (XXX that should change
+ * soon too, likely.)
+ */
+ path = (Path *) create_subqueryscan_path(root, rel, subpath,
+ NIL, NULL);
+
+ add_path(rel, path);
+
+ /*
+ * If we have a partial path for the child relation, we can use that
+ * to build a partial path for this relation. But there's no point in
+ * considering any path but the cheapest.
+ */
+ if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
+ final_rel->partial_pathlist != NIL)
+ {
+ Path *partial_subpath;
+ Path *partial_path;
+
+ partial_subpath = linitial(final_rel->partial_pathlist);
+ partial_path = (Path *)
+ create_subqueryscan_path(root, rel, partial_subpath,
+ NIL, NULL);
+ add_partial_path(rel, partial_path);
+ }
+
+ /*
+ * Estimate number of groups if caller wants it. If the subquery used
+ * grouping or aggregation, its output is probably mostly unique
+ * anyway; otherwise do statistical estimation.
+ *
+ * XXX you don't really want to know about this: we do the estimation
+ * using the subquery's original targetlist expressions, not the
+ * subroot->processed_tlist which might seem more appropriate. The
+ * reason is that if the subquery is itself a setop, it may return a
+ * processed_tlist containing "varno 0" Vars generated by
+ * generate_append_tlist, and those would confuse estimate_num_groups
+ * mightily. We ought to get rid of the "varno 0" hack, but that
+ * requires a redesign of the parsetree representation of setops, so
+ * that there can be an RTE corresponding to each setop's output.
+ */
+ if (pNumGroups)
+ {
+ if (subquery->groupClause || subquery->groupingSets ||
+ subquery->distinctClause ||
+ subroot->hasHavingQual || subquery->hasAggs)
+ *pNumGroups = subpath->rows;
+ else
+ *pNumGroups = estimate_num_groups(subroot,
+ get_tlist_exprs(subquery->targetList, false),
+ subpath->rows,
+ NULL,
+ NULL);
+ }
+ }
+ else if (IsA(setOp, SetOperationStmt))
+ {
+ SetOperationStmt *op = (SetOperationStmt *) setOp;
+
+ /* UNIONs are much different from INTERSECT/EXCEPT */
+ if (op->op == SETOP_UNION)
+ rel = generate_union_paths(op, root,
+ refnames_tlist,
+ pTargetList);
+ else
+ rel = generate_nonunion_paths(op, root,
+ refnames_tlist,
+ pTargetList);
+ if (pNumGroups)
+ *pNumGroups = rel->rows;
+
+ /*
+ * If necessary, add a Result node to project the caller-requested
+ * output columns.
+ *
+ * XXX you don't really want to know about this: setrefs.c will apply
+ * fix_upper_expr() to the Result node's tlist. This would fail if the
+ * Vars generated by generate_setop_tlist() were not exactly equal()
+ * to the corresponding tlist entries of the subplan. However, since
+ * the subplan was generated by generate_union_paths() or
+ * generate_nonunion_paths(), and hence its tlist was generated by
+ * generate_append_tlist(), this will work. We just tell
+ * generate_setop_tlist() to use varno 0.
+ */
+ if (flag >= 0 ||
+ !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
+ !tlist_same_collations(*pTargetList, colCollations, junkOK))
+ {
+ PathTarget *target;
+ ListCell *lc;
+
+ *pTargetList = generate_setop_tlist(colTypes, colCollations,
+ flag,
+ 0,
+ false,
+ *pTargetList,
+ refnames_tlist);
+ target = create_pathtarget(root, *pTargetList);
+
+ /* Apply projection to each path */
+ foreach(lc, rel->pathlist)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+ Path *path;
+
+ Assert(subpath->param_info == NULL);
+ path = apply_projection_to_path(root, subpath->parent,
+ subpath, target);
+ /* If we had to add a Result, path is different from subpath */
+ if (path != subpath)
+ lfirst(lc) = path;
+ }
+
+ /* Apply projection to each partial path */
+ foreach(lc, rel->partial_pathlist)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+ Path *path;
+
+ Assert(subpath->param_info == NULL);
+
+ /* avoid apply_projection_to_path, in case of multiple refs */
+ path = (Path *) create_projection_path(root, subpath->parent,
+ subpath, target);
+ lfirst(lc) = path;
+ }
+ }
+ }
+ else
+ {
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(setOp));
+ *pTargetList = NIL;
+ }
+
+ postprocess_setop_rel(root, rel);
+
+ return rel;
+}
+
+/*
+ * Generate paths for a recursive UNION node
+ */
+static RelOptInfo *
+generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
+ List *refnames_tlist,
+ List **pTargetList)
+{
+ RelOptInfo *result_rel;
+ Path *path;
+ RelOptInfo *lrel,
+ *rrel;
+ Path *lpath;
+ Path *rpath;
+ List *lpath_tlist;
+ List *rpath_tlist;
+ List *tlist;
+ List *groupList;
+ double dNumGroups;
+
+ /* Parser should have rejected other cases */
+ if (setOp->op != SETOP_UNION)
+ elog(ERROR, "only UNION queries can be recursive");
+ /* Worktable ID should be assigned */
+ Assert(root->wt_param_id >= 0);
+
+ /*
+ * Unlike a regular UNION node, process the left and right inputs
+ * separately without any intention of combining them into one Append.
+ */
+ lrel = recurse_set_operations(setOp->larg, root,
+ setOp->colTypes, setOp->colCollations,
+ false, -1,
+ refnames_tlist,
+ &lpath_tlist,
+ NULL);
+ lpath = lrel->cheapest_total_path;
+ /* The right path will want to look at the left one ... */
+ root->non_recursive_path = lpath;
+ rrel = recurse_set_operations(setOp->rarg, root,
+ setOp->colTypes, setOp->colCollations,
+ false, -1,
+ refnames_tlist,
+ &rpath_tlist,
+ NULL);
+ rpath = rrel->cheapest_total_path;
+ root->non_recursive_path = NULL;
+
+ /*
+ * Generate tlist for RecursiveUnion path node --- same as in Append cases
+ */
+ tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
+ list_make2(lpath_tlist, rpath_tlist),
+ refnames_tlist);
+
+ *pTargetList = tlist;
+
+ /* Build result relation. */
+ result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
+ bms_union(lrel->relids, rrel->relids));
+ result_rel->reltarget = create_pathtarget(root, tlist);
+
+ /*
+ * If UNION, identify the grouping operators
+ */
+ if (setOp->all)
+ {
+ groupList = NIL;
+ dNumGroups = 0;
+ }
+ else
+ {
+ /* Identify the grouping semantics */
+ groupList = generate_setop_grouplist(setOp, tlist);
+
+ /* We only support hashing here */
+ if (!grouping_is_hashable(groupList))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not implement recursive UNION"),
+ errdetail("All column datatypes must be hashable.")));
+
+ /*
+ * For the moment, take the number of distinct groups as equal to the
+ * total input size, ie, the worst case.
+ */
+ dNumGroups = lpath->rows + rpath->rows * 10;
+ }
+
+ /*
+ * And make the path node.
+ */
+ path = (Path *) create_recursiveunion_path(root,
+ result_rel,
+ lpath,
+ rpath,
+ result_rel->reltarget,
+ groupList,
+ root->wt_param_id,
+ dNumGroups);
+
+ add_path(result_rel, path);
+ postprocess_setop_rel(root, result_rel);
+ return result_rel;
+}
+
+/*
+ * Generate paths for a UNION or UNION ALL node
+ */
+static RelOptInfo *
+generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
+ List *refnames_tlist,
+ List **pTargetList)
+{
+ Relids relids = NULL;
+ RelOptInfo *result_rel;
+ double save_fraction = root->tuple_fraction;
+ ListCell *lc;
+ List *pathlist = NIL;
+ List *partial_pathlist = NIL;
+ bool partial_paths_valid = true;
+ bool consider_parallel = true;
+ List *rellist;
+ List *tlist_list;
+ List *tlist;
+ Path *path;
+
+ /*
+ * If plain UNION, tell children to fetch all tuples.
+ *
+ * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
+ * each arm of the UNION ALL. One could make a case for reducing the
+ * tuple fraction for later arms (discounting by the expected size of the
+ * earlier arms' results) but it seems not worth the trouble. The normal
+ * case where tuple_fraction isn't already zero is a LIMIT at top level,
+ * and passing it down as-is is usually enough to get the desired result
+ * of preferring fast-start plans.
+ */
+ if (!op->all)
+ root->tuple_fraction = 0.0;
+
+ /*
+ * If any of my children are identical UNION nodes (same op, all-flag, and
+ * colTypes) then they can be merged into this node so that we generate
+ * only one Append and unique-ification for the lot. Recurse to find such
+ * nodes and compute their children's paths.
+ */
+ rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
+
+ /*
+ * Generate tlist for Append plan node.
+ *
+ * The tlist for an Append plan isn't important as far as the Append is
+ * concerned, but we must make it look real anyway for the benefit of the
+ * next plan level up.
+ */
+ tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
+ tlist_list, refnames_tlist);
+
+ *pTargetList = tlist;
+
+ /* Build path lists and relid set. */
+ foreach(lc, rellist)
+ {
+ RelOptInfo *rel = lfirst(lc);
+
+ pathlist = lappend(pathlist, rel->cheapest_total_path);
+
+ if (consider_parallel)
+ {
+ if (!rel->consider_parallel)
+ {
+ consider_parallel = false;
+ partial_paths_valid = false;
+ }
+ else if (rel->partial_pathlist == NIL)
+ partial_paths_valid = false;
+ else
+ partial_pathlist = lappend(partial_pathlist,
+ linitial(rel->partial_pathlist));
+ }
+
+ relids = bms_union(relids, rel->relids);
+ }
+
+ /* Build result relation. */
+ result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
+ result_rel->reltarget = create_pathtarget(root, tlist);
+ result_rel->consider_parallel = consider_parallel;
+
+ /*
+ * Append the child results together.
+ */
+ path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
+ NIL, NULL, 0, false, -1);
+
+ /*
+ * For UNION ALL, we just need the Append path. For UNION, need to add
+ * node(s) to remove duplicates.
+ */
+ if (!op->all)
+ path = make_union_unique(op, path, tlist, root);
+
+ add_path(result_rel, path);
+
+ /*
+ * Estimate number of groups. For now we just assume the output is unique
+ * --- this is certainly true for the UNION case, and we want worst-case
+ * estimates anyway.
+ */
+ result_rel->rows = path->rows;
+
+ /*
+ * Now consider doing the same thing using the partial paths plus Append
+ * plus Gather.
+ */
+ if (partial_paths_valid)
+ {
+ Path *ppath;
+ ListCell *lc;
+ int parallel_workers = 0;
+
+ /* Find the highest number of workers requested for any subpath. */
+ foreach(lc, partial_pathlist)
+ {
+ Path *path = lfirst(lc);
+
+ parallel_workers = Max(parallel_workers, path->parallel_workers);
+ }
+ Assert(parallel_workers > 0);
+
+ /*
+ * If the use of parallel append is permitted, always request at least
+ * log2(# of children) paths. We assume it can be useful to have
+ * extra workers in this case because they will be spread out across
+ * the children. The precise formula is just a guess; see
+ * add_paths_to_append_rel.
+ */
+ if (enable_parallel_append)
+ {
+ parallel_workers = Max(parallel_workers,
+ fls(list_length(partial_pathlist)));
+ parallel_workers = Min(parallel_workers,
+ max_parallel_workers_per_gather);
+ }
+ Assert(parallel_workers > 0);
+
+ ppath = (Path *)
+ create_append_path(root, result_rel, NIL, partial_pathlist,
+ NIL, NULL,
+ parallel_workers, enable_parallel_append,
+ -1);
+ ppath = (Path *)
+ create_gather_path(root, result_rel, ppath,
+ result_rel->reltarget, NULL, NULL);
+ if (!op->all)
+ ppath = make_union_unique(op, ppath, tlist, root);
+ add_path(result_rel, ppath);
+ }
+
+ /* Undo effects of possibly forcing tuple_fraction to 0 */
+ root->tuple_fraction = save_fraction;
+
+ return result_rel;
+}
+
+/*
+ * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
+ */
+static RelOptInfo *
+generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
+ List *refnames_tlist,
+ List **pTargetList)
+{
+ RelOptInfo *result_rel;
+ RelOptInfo *lrel,
+ *rrel;
+ double save_fraction = root->tuple_fraction;
+ Path *lpath,
+ *rpath,
+ *path;
+ List *lpath_tlist,
+ *rpath_tlist,
+ *tlist_list,
+ *tlist,
+ *groupList,
+ *pathlist;
+ double dLeftGroups,
+ dRightGroups,
+ dNumGroups,
+ dNumOutputRows;
+ bool use_hash;
+ SetOpCmd cmd;
+ int firstFlag;
+
+ /*
+ * Tell children to fetch all tuples.
+ */
+ root->tuple_fraction = 0.0;
+
+ /* Recurse on children, ensuring their outputs are marked */
+ lrel = recurse_set_operations(op->larg, root,
+ op->colTypes, op->colCollations,
+ false, 0,
+ refnames_tlist,
+ &lpath_tlist,
+ &dLeftGroups);
+ lpath = lrel->cheapest_total_path;
+ rrel = recurse_set_operations(op->rarg, root,
+ op->colTypes, op->colCollations,
+ false, 1,
+ refnames_tlist,
+ &rpath_tlist,
+ &dRightGroups);
+ rpath = rrel->cheapest_total_path;
+
+ /* Undo effects of forcing tuple_fraction to 0 */
+ root->tuple_fraction = save_fraction;
+
+ /*
+ * For EXCEPT, we must put the left input first. For INTERSECT, either
+ * order should give the same results, and we prefer to put the smaller
+ * input first in order to minimize the size of the hash table in the
+ * hashing case. "Smaller" means the one with the fewer groups.
+ */
+ if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
+ {
+ pathlist = list_make2(lpath, rpath);
+ tlist_list = list_make2(lpath_tlist, rpath_tlist);
+ firstFlag = 0;
+ }
+ else
+ {
+ pathlist = list_make2(rpath, lpath);
+ tlist_list = list_make2(rpath_tlist, lpath_tlist);
+ firstFlag = 1;
+ }
+
+ /*
+ * Generate tlist for Append plan node.
+ *
+ * The tlist for an Append plan isn't important as far as the Append is
+ * concerned, but we must make it look real anyway for the benefit of the
+ * next plan level up. In fact, it has to be real enough that the flag
+ * column is shown as a variable not a constant, else setrefs.c will get
+ * confused.
+ */
+ tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
+ tlist_list, refnames_tlist);
+
+ *pTargetList = tlist;
+
+ /* Build result relation. */
+ result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
+ bms_union(lrel->relids, rrel->relids));
+ result_rel->reltarget = create_pathtarget(root, tlist);
+
+ /*
+ * Append the child results together.
+ */
+ path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
+ NIL, NULL, 0, false, -1);
+
+ /* Identify the grouping semantics */
+ groupList = generate_setop_grouplist(op, tlist);
+
+ /*
+ * Estimate number of distinct groups that we'll need hashtable entries
+ * for; this is the size of the left-hand input for EXCEPT, or the smaller
+ * input for INTERSECT. Also estimate the number of eventual output rows.
+ * In non-ALL cases, we estimate each group produces one output row; in
+ * ALL cases use the relevant relation size. These are worst-case
+ * estimates, of course, but we need to be conservative.
+ */
+ if (op->op == SETOP_EXCEPT)
+ {
+ dNumGroups = dLeftGroups;
+ dNumOutputRows = op->all ? lpath->rows : dNumGroups;
+ }
+ else
+ {
+ dNumGroups = Min(dLeftGroups, dRightGroups);
+ dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
+ }
+
+ /*
+ * Decide whether to hash or sort, and add a sort node if needed.
+ */
+ use_hash = choose_hashed_setop(root, groupList, path,
+ dNumGroups, dNumOutputRows,
+ (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
+
+ if (groupList && !use_hash)
+ path = (Path *) create_sort_path(root,
+ result_rel,
+ path,
+ make_pathkeys_for_sortclauses(root,
+ groupList,
+ tlist),
+ -1.0);
+
+ /*
+ * Finally, add a SetOp path node to generate the correct output.
+ */
+ switch (op->op)
+ {
+ case SETOP_INTERSECT:
+ cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
+ break;
+ case SETOP_EXCEPT:
+ cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
+ break;
+ default:
+ elog(ERROR, "unrecognized set op: %d", (int) op->op);
+ cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
+ break;
+ }
+ path = (Path *) create_setop_path(root,
+ result_rel,
+ path,
+ cmd,
+ use_hash ? SETOP_HASHED : SETOP_SORTED,
+ groupList,
+ list_length(op->colTypes) + 1,
+ use_hash ? firstFlag : -1,
+ dNumGroups,
+ dNumOutputRows);
+
+ result_rel->rows = path->rows;
+ add_path(result_rel, path);
+ return result_rel;
+}
+
+/*
+ * Pull up children of a UNION node that are identically-propertied UNIONs.
+ *
+ * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
+ * output rows will be lost anyway.
+ *
+ * NOTE: currently, we ignore collations while determining if a child has
+ * the same properties. This is semantically sound only so long as all
+ * collations have the same notion of equality. It is valid from an
+ * implementation standpoint because we don't care about the ordering of
+ * a UNION child's result: UNION ALL results are always unordered, and
+ * generate_union_paths will force a fresh sort if the top level is a UNION.
+ */
+static List *
+plan_union_children(PlannerInfo *root,
+ SetOperationStmt *top_union,
+ List *refnames_tlist,
+ List **tlist_list)
+{
+ List *pending_rels = list_make1(top_union);
+ List *result = NIL;
+ List *child_tlist;
+
+ *tlist_list = NIL;
+
+ while (pending_rels != NIL)
+ {
+ Node *setOp = linitial(pending_rels);
+
+ pending_rels = list_delete_first(pending_rels);
+
+ if (IsA(setOp, SetOperationStmt))
+ {
+ SetOperationStmt *op = (SetOperationStmt *) setOp;
+
+ if (op->op == top_union->op &&
+ (op->all == top_union->all || op->all) &&
+ equal(op->colTypes, top_union->colTypes))
+ {
+ /* Same UNION, so fold children into parent */
+ pending_rels = lcons(op->rarg, pending_rels);
+ pending_rels = lcons(op->larg, pending_rels);
+ continue;
+ }
+ }
+
+ /*
+ * Not same, so plan this child separately.
+ *
+ * Note we disallow any resjunk columns in child results. This is
+ * necessary since the Append node that implements the union won't do
+ * any projection, and upper levels will get confused if some of our
+ * output tuples have junk and some don't. This case only arises when
+ * we have an EXCEPT or INTERSECT as child, else there won't be
+ * resjunk anyway.
+ */
+ result = lappend(result, recurse_set_operations(setOp, root,
+ top_union->colTypes,
+ top_union->colCollations,
+ false, -1,
+ refnames_tlist,
+ &child_tlist,
+ NULL));
+ *tlist_list = lappend(*tlist_list, child_tlist);
+ }
+
+ return result;
+}
+
+/*
+ * Add nodes to the given path tree to unique-ify the result of a UNION.
+ */
+static Path *
+make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
+ PlannerInfo *root)
+{
+ RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
+ List *groupList;
+ double dNumGroups;
+
+ /* Identify the grouping semantics */
+ groupList = generate_setop_grouplist(op, tlist);
+
+ /*
+ * XXX for the moment, take the number of distinct groups as equal to the
+ * total input size, ie, the worst case. This is too conservative, but
+ * it's not clear how to get a decent estimate of the true size. One
+ * should note as well the propensity of novices to write UNION rather
+ * than UNION ALL even when they don't expect any duplicates...
+ */
+ dNumGroups = path->rows;
+
+ /* Decide whether to hash or sort */
+ if (choose_hashed_setop(root, groupList, path,
+ dNumGroups, dNumGroups,
+ "UNION"))
+ {
+ /* Hashed aggregate plan --- no sort needed */
+ path = (Path *) create_agg_path(root,
+ result_rel,
+ path,
+ create_pathtarget(root, tlist),
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ groupList,
+ NIL,
+ NULL,
+ dNumGroups);
+ }
+ else
+ {
+ /* Sort and Unique */
+ if (groupList)
+ path = (Path *)
+ create_sort_path(root,
+ result_rel,
+ path,
+ make_pathkeys_for_sortclauses(root,
+ groupList,
+ tlist),
+ -1.0);
+ path = (Path *) create_upper_unique_path(root,
+ result_rel,
+ path,
+ list_length(path->pathkeys),
+ dNumGroups);
+ }
+
+ return path;
+}
+
+/*
+ * postprocess_setop_rel - perform steps required after adding paths
+ */
+static void
+postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
+{
+ /*
+ * We don't currently worry about allowing FDWs to contribute paths to
+ * this relation, but give extensions a chance.
+ */
+ if (create_upper_paths_hook)
+ (*create_upper_paths_hook) (root, UPPERREL_SETOP,
+ NULL, rel, NULL);
+
+ /* Select cheapest path */
+ set_cheapest(rel);
+}
+
+/*
+ * choose_hashed_setop - should we use hashing for a set operation?
+ */
+static bool
+choose_hashed_setop(PlannerInfo *root, List *groupClauses,
+ Path *input_path,
+ double dNumGroups, double dNumOutputRows,
+ const char *construct)
+{
+ int numGroupCols = list_length(groupClauses);
+ Size hash_mem_limit = get_hash_memory_limit();
+ bool can_sort;
+ bool can_hash;
+ Size hashentrysize;
+ Path hashed_p;
+ Path sorted_p;
+ double tuple_fraction;
+
+ /* Check whether the operators support sorting or hashing */
+ can_sort = grouping_is_sortable(groupClauses);
+ can_hash = grouping_is_hashable(groupClauses);
+ if (can_hash && can_sort)
+ {
+ /* we have a meaningful choice to make, continue ... */
+ }
+ else if (can_hash)
+ return true;
+ else if (can_sort)
+ return false;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ /* translator: %s is UNION, INTERSECT, or EXCEPT */
+ errmsg("could not implement %s", construct),
+ errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+
+ /* Prefer sorting when enable_hashagg is off */
+ if (!enable_hashagg)
+ return false;
+
+ /*
+ * Don't do it if it doesn't look like the hashtable will fit into
+ * hash_mem.
+ */
+ hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
+
+ if (hashentrysize * dNumGroups > hash_mem_limit)
+ return false;
+
+ /*
+ * See if the estimated cost is no more than doing it the other way.
+ *
+ * We need to consider input_plan + hashagg versus input_plan + sort +
+ * group. Note that the actual result plan might involve a SetOp or
+ * Unique node, not Agg or Group, but the cost estimates for Agg and Group
+ * should be close enough for our purposes here.
+ *
+ * These path variables are dummies that just hold cost fields; we don't
+ * make actual Paths for these steps.
+ */
+ cost_agg(&hashed_p, root, AGG_HASHED, NULL,
+ numGroupCols, dNumGroups,
+ NIL,
+ input_path->startup_cost, input_path->total_cost,
+ input_path->rows, input_path->pathtarget->width);
+
+ /*
+ * Now for the sorted case. Note that the input is *always* unsorted,
+ * since it was made by appending unrelated sub-relations together.
+ */
+ sorted_p.startup_cost = input_path->startup_cost;
+ sorted_p.total_cost = input_path->total_cost;
+ /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
+ cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
+ input_path->rows, input_path->pathtarget->width,
+ 0.0, work_mem, -1.0);
+ cost_group(&sorted_p, root, numGroupCols, dNumGroups,
+ NIL,
+ sorted_p.startup_cost, sorted_p.total_cost,
+ input_path->rows);
+
+ /*
+ * Now make the decision using the top-level tuple fraction. First we
+ * have to convert an absolute count (LIMIT) into fractional form.
+ */
+ tuple_fraction = root->tuple_fraction;
+ if (tuple_fraction >= 1.0)
+ tuple_fraction /= dNumOutputRows;
+
+ if (compare_fractional_path_costs(&hashed_p, &sorted_p,
+ tuple_fraction) < 0)
+ {
+ /* Hashed is cheaper, so use it */
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Generate targetlist for a set-operation plan node
+ *
+ * colTypes: OID list of set-op's result column datatypes
+ * colCollations: OID list of set-op's result column collations
+ * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
+ * varno: varno to use in generated Vars
+ * hack_constants: true to copy up constants (see comments in code)
+ * input_tlist: targetlist of this node's input node
+ * refnames_tlist: targetlist to take column names from
+ */
+static List *
+generate_setop_tlist(List *colTypes, List *colCollations,
+ int flag,
+ Index varno,
+ bool hack_constants,
+ List *input_tlist,
+ List *refnames_tlist)
+{
+ List *tlist = NIL;
+ int resno = 1;
+ ListCell *ctlc,
+ *cclc,
+ *itlc,
+ *rtlc;
+ TargetEntry *tle;
+ Node *expr;
+
+ forfour(ctlc, colTypes, cclc, colCollations,
+ itlc, input_tlist, rtlc, refnames_tlist)
+ {
+ Oid colType = lfirst_oid(ctlc);
+ Oid colColl = lfirst_oid(cclc);
+ TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
+ TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
+
+ Assert(inputtle->resno == resno);
+ Assert(reftle->resno == resno);
+ Assert(!inputtle->resjunk);
+ Assert(!reftle->resjunk);
+
+ /*
+ * Generate columns referencing input columns and having appropriate
+ * data types and column names. Insert datatype coercions where
+ * necessary.
+ *
+ * HACK: constants in the input's targetlist are copied up as-is
+ * rather than being referenced as subquery outputs. This is mainly
+ * to ensure that when we try to coerce them to the output column's
+ * datatype, the right things happen for UNKNOWN constants. But do
+ * this only at the first level of subquery-scan plans; we don't want
+ * phony constants appearing in the output tlists of upper-level
+ * nodes!
+ */
+ if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
+ expr = (Node *) inputtle->expr;
+ else
+ expr = (Node *) makeVar(varno,
+ inputtle->resno,
+ exprType((Node *) inputtle->expr),
+ exprTypmod((Node *) inputtle->expr),
+ exprCollation((Node *) inputtle->expr),
+ 0);
+
+ if (exprType(expr) != colType)
+ {
+ /*
+ * Note: it's not really cool to be applying coerce_to_common_type
+ * here; one notable point is that assign_expr_collations never
+ * gets run on any generated nodes. For the moment that's not a
+ * problem because we force the correct exposed collation below.
+ * It would likely be best to make the parser generate the correct
+ * output tlist for every set-op to begin with, though.
+ */
+ expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
+ expr,
+ colType,
+ "UNION/INTERSECT/EXCEPT");
+ }
+
+ /*
+ * Ensure the tlist entry's exposed collation matches the set-op. This
+ * is necessary because plan_set_operations() reports the result
+ * ordering as a list of SortGroupClauses, which don't carry collation
+ * themselves but just refer to tlist entries. If we don't show the
+ * right collation then planner.c might do the wrong thing in
+ * higher-level queries.
+ *
+ * Note we use RelabelType, not CollateExpr, since this expression
+ * will reach the executor without any further processing.
+ */
+ if (exprCollation(expr) != colColl)
+ expr = applyRelabelType(expr,
+ exprType(expr), exprTypmod(expr), colColl,
+ COERCE_IMPLICIT_CAST, -1, false);
+
+ tle = makeTargetEntry((Expr *) expr,
+ (AttrNumber) resno++,
+ pstrdup(reftle->resname),
+ false);
+
+ /*
+ * By convention, all non-resjunk columns in a setop tree have
+ * ressortgroupref equal to their resno. In some cases the ref isn't
+ * needed, but this is a cleaner way than modifying the tlist later.
+ */
+ tle->ressortgroupref = tle->resno;
+
+ tlist = lappend(tlist, tle);
+ }
+
+ if (flag >= 0)
+ {
+ /* Add a resjunk flag column */
+ /* flag value is the given constant */
+ expr = (Node *) makeConst(INT4OID,
+ -1,
+ InvalidOid,
+ sizeof(int32),
+ Int32GetDatum(flag),
+ false,
+ true);
+ tle = makeTargetEntry((Expr *) expr,
+ (AttrNumber) resno++,
+ pstrdup("flag"),
+ true);
+ tlist = lappend(tlist, tle);
+ }
+
+ return tlist;
+}
+
+/*
+ * Generate targetlist for a set-operation Append node
+ *
+ * colTypes: OID list of set-op's result column datatypes
+ * colCollations: OID list of set-op's result column collations
+ * flag: true to create a flag column copied up from subplans
+ * input_tlists: list of tlists for sub-plans of the Append
+ * refnames_tlist: targetlist to take column names from
+ *
+ * The entries in the Append's targetlist should always be simple Vars;
+ * we just have to make sure they have the right datatypes/typmods/collations.
+ * The Vars are always generated with varno 0.
+ *
+ * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
+ * cannot figure out a realistic width for the tlist we make here. But we
+ * ought to refactor this code to produce a PathTarget directly, anyway.
+ */
+static List *
+generate_append_tlist(List *colTypes, List *colCollations,
+ bool flag,
+ List *input_tlists,
+ List *refnames_tlist)
+{
+ List *tlist = NIL;
+ int resno = 1;
+ ListCell *curColType;
+ ListCell *curColCollation;
+ ListCell *ref_tl_item;
+ int colindex;
+ TargetEntry *tle;
+ Node *expr;
+ ListCell *tlistl;
+ int32 *colTypmods;
+
+ /*
+ * First extract typmods to use.
+ *
+ * If the inputs all agree on type and typmod of a particular column, use
+ * that typmod; else use -1.
+ */
+ colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
+
+ foreach(tlistl, input_tlists)
+ {
+ List *subtlist = (List *) lfirst(tlistl);
+ ListCell *subtlistl;
+
+ curColType = list_head(colTypes);
+ colindex = 0;
+ foreach(subtlistl, subtlist)
+ {
+ TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
+
+ if (subtle->resjunk)
+ continue;
+ Assert(curColType != NULL);
+ if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
+ {
+ /* If first subplan, copy the typmod; else compare */
+ int32 subtypmod = exprTypmod((Node *) subtle->expr);
+
+ if (tlistl == list_head(input_tlists))
+ colTypmods[colindex] = subtypmod;
+ else if (subtypmod != colTypmods[colindex])
+ colTypmods[colindex] = -1;
+ }
+ else
+ {
+ /* types disagree, so force typmod to -1 */
+ colTypmods[colindex] = -1;
+ }
+ curColType = lnext(colTypes, curColType);
+ colindex++;
+ }
+ Assert(curColType == NULL);
+ }
+
+ /*
+ * Now we can build the tlist for the Append.
+ */
+ colindex = 0;
+ forthree(curColType, colTypes, curColCollation, colCollations,
+ ref_tl_item, refnames_tlist)
+ {
+ Oid colType = lfirst_oid(curColType);
+ int32 colTypmod = colTypmods[colindex++];
+ Oid colColl = lfirst_oid(curColCollation);
+ TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
+
+ Assert(reftle->resno == resno);
+ Assert(!reftle->resjunk);
+ expr = (Node *) makeVar(0,
+ resno,
+ colType,
+ colTypmod,
+ colColl,
+ 0);
+ tle = makeTargetEntry((Expr *) expr,
+ (AttrNumber) resno++,
+ pstrdup(reftle->resname),
+ false);
+
+ /*
+ * By convention, all non-resjunk columns in a setop tree have
+ * ressortgroupref equal to their resno. In some cases the ref isn't
+ * needed, but this is a cleaner way than modifying the tlist later.
+ */
+ tle->ressortgroupref = tle->resno;
+
+ tlist = lappend(tlist, tle);
+ }
+
+ if (flag)
+ {
+ /* Add a resjunk flag column */
+ /* flag value is shown as copied up from subplan */
+ expr = (Node *) makeVar(0,
+ resno,
+ INT4OID,
+ -1,
+ InvalidOid,
+ 0);
+ tle = makeTargetEntry((Expr *) expr,
+ (AttrNumber) resno++,
+ pstrdup("flag"),
+ true);
+ tlist = lappend(tlist, tle);
+ }
+
+ pfree(colTypmods);
+
+ return tlist;
+}
+
+/*
+ * generate_setop_grouplist
+ * Build a SortGroupClause list defining the sort/grouping properties
+ * of the setop's output columns.
+ *
+ * Parse analysis already determined the properties and built a suitable
+ * list, except that the entries do not have sortgrouprefs set because
+ * the parser output representation doesn't include a tlist for each
+ * setop. So what we need to do here is copy that list and install
+ * proper sortgrouprefs into it (copying those from the targetlist).
+ */
+static List *
+generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
+{
+ List *grouplist = copyObject(op->groupClauses);
+ ListCell *lg;
+ ListCell *lt;
+
+ lg = list_head(grouplist);
+ foreach(lt, targetlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lt);
+ SortGroupClause *sgc;
+
+ if (tle->resjunk)
+ {
+ /* resjunk columns should not have sortgrouprefs */
+ Assert(tle->ressortgroupref == 0);
+ continue; /* ignore resjunk columns */
+ }
+
+ /* non-resjunk columns should have sortgroupref = resno */
+ Assert(tle->ressortgroupref == tle->resno);
+
+ /* non-resjunk columns should have grouping clauses */
+ Assert(lg != NULL);
+ sgc = (SortGroupClause *) lfirst(lg);
+ lg = lnext(grouplist, lg);
+ Assert(sgc->tleSortGroupRef == 0);
+
+ sgc->tleSortGroupRef = tle->ressortgroupref;
+ }
+ Assert(lg == NULL);
+ return grouplist;
+}