From 5e45211a64149b3c659b90ff2de6fa982a5a93ed Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sat, 4 May 2024 14:17:33 +0200
Subject: Adding upstream version 15.5.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 src/backend/optimizer/util/orclauses.c | 360 +++++++++++++++++++++++++++++++++
 1 file changed, 360 insertions(+)
 create mode 100644 src/backend/optimizer/util/orclauses.c

(limited to 'src/backend/optimizer/util/orclauses.c')

diff --git a/src/backend/optimizer/util/orclauses.c b/src/backend/optimizer/util/orclauses.c
new file mode 100644
index 0000000..b1363df
--- /dev/null
+++ b/src/backend/optimizer/util/orclauses.c
@@ -0,0 +1,360 @@
+/*-------------------------------------------------------------------------
+ *
+ * orclauses.c
+ *	  Routines to extract restriction OR clauses from join OR clauses
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/optimizer/util/orclauses.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/orclauses.h"
+#include "optimizer/restrictinfo.h"
+
+
+static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
+static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
+static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
+								   Expr *orclause, RestrictInfo *join_or_rinfo);
+
+
+/*
+ * extract_restriction_or_clauses
+ *	  Examine join OR-of-AND clauses to see if any useful restriction OR
+ *	  clauses can be extracted.  If so, add them to the query.
+ *
+ * Although a join clause must reference multiple relations overall,
+ * an OR of ANDs clause might contain sub-clauses that reference just one
+ * relation and can be used to build a restriction clause for that rel.
+ * For example consider
+ *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
+ * We can transform this into
+ *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
+ *			AND (a.x = 42 OR a.x = 44)
+ *			AND (b.y = 43 OR b.z = 45);
+ * which allows the latter clauses to be applied during the scans of a and b,
+ * perhaps as index qualifications, and in any case reducing the number of
+ * rows arriving at the join.  In essence this is a partial transformation to
+ * CNF (AND of ORs format).  It is not complete, however, because we do not
+ * unravel the original OR --- doing so would usually bloat the qualification
+ * expression to little gain.
+ *
+ * The added quals are partially redundant with the original OR, and therefore
+ * would cause the size of the joinrel to be underestimated when it is finally
+ * formed.  (This would be true of a full transformation to CNF as well; the
+ * fault is not really in the transformation, but in clauselist_selectivity's
+ * inability to recognize redundant conditions.)  We can compensate for this
+ * redundancy by changing the cached selectivity of the original OR clause,
+ * canceling out the (valid) reduction in the estimated sizes of the base
+ * relations so that the estimated joinrel size remains the same.  This is
+ * a MAJOR HACK: it depends on the fact that clause selectivities are cached
+ * and on the fact that the same RestrictInfo node will appear in every
+ * joininfo list that might be used when the joinrel is formed.
+ * And it doesn't work in cases where the size estimation is nonlinear
+ * (i.e., outer and IN joins).  But it beats not doing anything.
+ *
+ * We examine each base relation to see if join clauses associated with it
+ * contain extractable restriction conditions.  If so, add those conditions
+ * to the rel's baserestrictinfo and update the cached selectivities of the
+ * join clauses.  Note that the same join clause will be examined afresh
+ * from the point of view of each baserel that participates in it, so its
+ * cached selectivity may get updated multiple times.
+ */
+void
+extract_restriction_or_clauses(PlannerInfo *root)
+{
+	Index		rti;
+
+	/* Examine each baserel for potential join OR clauses */
+	for (rti = 1; rti < root->simple_rel_array_size; rti++)
+	{
+		RelOptInfo *rel = root->simple_rel_array[rti];
+		ListCell   *lc;
+
+		/* there may be empty slots corresponding to non-baserel RTEs */
+		if (rel == NULL)
+			continue;
+
+		Assert(rel->relid == rti);	/* sanity check on array */
+
+		/* ignore RTEs that are "other rels" */
+		if (rel->reloptkind != RELOPT_BASEREL)
+			continue;
+
+		/*
+		 * Find potentially interesting OR joinclauses.  We can use any
+		 * joinclause that is considered safe to move to this rel by the
+		 * parameterized-path machinery, even though what we are going to do
+		 * with it is not exactly a parameterized path.
+		 *
+		 * However, it seems best to ignore clauses that have been marked
+		 * redundant (by setting norm_selec > 1).  That likely can't happen
+		 * for OR clauses, but let's be safe.
+		 */
+		foreach(lc, rel->joininfo)
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+			if (restriction_is_or_clause(rinfo) &&
+				join_clause_is_movable_to(rinfo, rel) &&
+				rinfo->norm_selec <= 1)
+			{
+				/* Try to extract a qual for this rel only */
+				Expr	   *orclause = extract_or_clause(rinfo, rel);
+
+				/*
+				 * If successful, decide whether we want to use the clause,
+				 * and insert it into the rel's restrictinfo list if so.
+				 */
+				if (orclause)
+					consider_new_or_clause(root, rel, orclause, rinfo);
+			}
+		}
+	}
+}
+
+/*
+ * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
+ */
+static bool
+is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+	/*
+	 * We want clauses that mention the rel, and only the rel.  So in
+	 * particular pseudoconstant clauses can be rejected quickly.  Then check
+	 * the clause's Var membership.
+	 */
+	if (rinfo->pseudoconstant)
+		return false;
+	if (!bms_equal(rinfo->clause_relids, rel->relids))
+		return false;
+
+	/* We don't want extra evaluations of any volatile functions */
+	if (contain_volatile_functions((Node *) rinfo->clause))
+		return false;
+
+	return true;
+}
+
+/*
+ * Try to extract a restriction clause mentioning only "rel" from the given
+ * join OR-clause.
+ *
+ * We must be able to extract at least one qual for this rel from each of
+ * the arms of the OR, else we can't use it.
+ *
+ * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
+ * if no OR clause could be extracted.
+ */
+static Expr *
+extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
+{
+	List	   *clauselist = NIL;
+	ListCell   *lc;
+
+	/*
+	 * Scan each arm of the input OR clause.  Notice we descend into
+	 * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
+	 * toplevel OR/AND structure.  This is useful because we can use the info
+	 * in those nodes to make is_safe_restriction_clause_for()'s checks
+	 * cheaper.  We'll strip those nodes from the returned tree, though,
+	 * meaning that fresh ones will be built if the clause is accepted as a
+	 * restriction clause.  This might seem wasteful --- couldn't we re-use
+	 * the existing RestrictInfos?	But that'd require assuming that
+	 * selectivity and other cached data is computed exactly the same way for
+	 * a restriction clause as for a join clause, which seems undesirable.
+	 */
+	Assert(is_orclause(or_rinfo->orclause));
+	foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
+	{
+		Node	   *orarg = (Node *) lfirst(lc);
+		List	   *subclauses = NIL;
+		Node	   *subclause;
+
+		/* OR arguments should be ANDs or sub-RestrictInfos */
+		if (is_andclause(orarg))
+		{
+			List	   *andargs = ((BoolExpr *) orarg)->args;
+			ListCell   *lc2;
+
+			foreach(lc2, andargs)
+			{
+				RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
+
+				if (restriction_is_or_clause(rinfo))
+				{
+					/*
+					 * Recurse to deal with nested OR.  Note we *must* recurse
+					 * here, this isn't just overly-tense optimization: we
+					 * have to descend far enough to find and strip all
+					 * RestrictInfos in the expression.
+					 */
+					Expr	   *suborclause;
+
+					suborclause = extract_or_clause(rinfo, rel);
+					if (suborclause)
+						subclauses = lappend(subclauses, suborclause);
+				}
+				else if (is_safe_restriction_clause_for(rinfo, rel))
+					subclauses = lappend(subclauses, rinfo->clause);
+			}
+		}
+		else
+		{
+			RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
+
+			Assert(!restriction_is_or_clause(rinfo));
+			if (is_safe_restriction_clause_for(rinfo, rel))
+				subclauses = lappend(subclauses, rinfo->clause);
+		}
+
+		/*
+		 * If nothing could be extracted from this arm, we can't do anything
+		 * with this OR clause.
+		 */
+		if (subclauses == NIL)
+			return NULL;
+
+		/*
+		 * OK, add subclause(s) to the result OR.  If we found more than one,
+		 * we need an AND node.  But if we found only one, and it is itself an
+		 * OR node, add its subclauses to the result instead; this is needed
+		 * to preserve AND/OR flatness (ie, no OR directly underneath OR).
+		 */
+		subclause = (Node *) make_ands_explicit(subclauses);
+		if (is_orclause(subclause))
+			clauselist = list_concat(clauselist,
+									 ((BoolExpr *) subclause)->args);
+		else
+			clauselist = lappend(clauselist, subclause);
+	}
+
+	/*
+	 * If we got a restriction clause from every arm, wrap them up in an OR
+	 * node.  (In theory the OR node might be unnecessary, if there was only
+	 * one arm --- but then the input OR node was also redundant.)
+	 */
+	if (clauselist != NIL)
+		return make_orclause(clauselist);
+	return NULL;
+}
+
+/*
+ * Consider whether a successfully-extracted restriction OR clause is
+ * actually worth using.  If so, add it to the planner's data structures,
+ * and adjust the original join clause (join_or_rinfo) to compensate.
+ */
+static void
+consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
+					   Expr *orclause, RestrictInfo *join_or_rinfo)
+{
+	RestrictInfo *or_rinfo;
+	Selectivity or_selec,
+				orig_selec;
+
+	/*
+	 * Build a RestrictInfo from the new OR clause.  We can assume it's valid
+	 * as a base restriction clause.
+	 */
+	or_rinfo = make_restrictinfo(root,
+								 orclause,
+								 true,
+								 false,
+								 false,
+								 join_or_rinfo->security_level,
+								 NULL,
+								 NULL,
+								 NULL);
+
+	/*
+	 * Estimate its selectivity.  (We could have done this earlier, but doing
+	 * it on the RestrictInfo representation allows the result to get cached,
+	 * saving work later.)
+	 */
+	or_selec = clause_selectivity(root, (Node *) or_rinfo,
+								  0, JOIN_INNER, NULL);
+
+	/*
+	 * The clause is only worth adding to the query if it rejects a useful
+	 * fraction of the base relation's rows; otherwise, it's just going to
+	 * cause duplicate computation (since we will still have to check the
+	 * original OR clause when the join is formed).  Somewhat arbitrarily, we
+	 * set the selectivity threshold at 0.9.
+	 */
+	if (or_selec > 0.9)
+		return;					/* forget it */
+
+	/*
+	 * OK, add it to the rel's restriction-clause list.
+	 */
+	rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
+	rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
+										 or_rinfo->security_level);
+
+	/*
+	 * Adjust the original join OR clause's cached selectivity to compensate
+	 * for the selectivity of the added (but redundant) lower-level qual. This
+	 * should result in the join rel getting approximately the same rows
+	 * estimate as it would have gotten without all these shenanigans.
+	 *
+	 * XXX major hack alert: this depends on the assumption that the
+	 * selectivity will stay cached.
+	 *
+	 * XXX another major hack: we adjust only norm_selec, the cached
+	 * selectivity for JOIN_INNER semantics, even though the join clause
+	 * might've been an outer-join clause.  This is partly because we can't
+	 * easily identify the relevant SpecialJoinInfo here, and partly because
+	 * the linearity assumption we're making would fail anyway.  (If it is an
+	 * outer-join clause, "rel" must be on the nullable side, else we'd not
+	 * have gotten here.  So the computation of the join size is going to be
+	 * quite nonlinear with respect to the size of "rel", so it's not clear
+	 * how we ought to adjust outer_selec even if we could compute its
+	 * original value correctly.)
+	 */
+	if (or_selec > 0)
+	{
+		SpecialJoinInfo sjinfo;
+
+		/*
+		 * Make up a SpecialJoinInfo for JOIN_INNER semantics.  (Compare
+		 * approx_tuple_count() in costsize.c.)
+		 */
+		sjinfo.type = T_SpecialJoinInfo;
+		sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
+											 rel->relids);
+		sjinfo.min_righthand = rel->relids;
+		sjinfo.syn_lefthand = sjinfo.min_lefthand;
+		sjinfo.syn_righthand = sjinfo.min_righthand;
+		sjinfo.jointype = JOIN_INNER;
+		/* we don't bother trying to make the remaining fields valid */
+		sjinfo.lhs_strict = false;
+		sjinfo.delay_upper_joins = false;
+		sjinfo.semi_can_btree = false;
+		sjinfo.semi_can_hash = false;
+		sjinfo.semi_operators = NIL;
+		sjinfo.semi_rhs_exprs = NIL;
+
+		/* Compute inner-join size */
+		orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
+										0, JOIN_INNER, &sjinfo);
+
+		/* And hack cached selectivity so join size remains the same */
+		join_or_rinfo->norm_selec = orig_selec / or_selec;
+		/* ensure result stays in sane range, in particular not "redundant" */
+		if (join_or_rinfo->norm_selec > 1)
+			join_or_rinfo->norm_selec = 1;
+		/* as explained above, we don't touch outer_selec */
+	}
+}
-- 
cgit v1.2.3