diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
commit | 46651ce6fe013220ed397add242004d764fc0153 (patch) | |
tree | 6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/optimizer/util/orclauses.c | |
parent | Initial commit. (diff) | |
download | postgresql-14-upstream.tar.xz postgresql-14-upstream.zip |
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/optimizer/util/orclauses.c')
-rw-r--r-- | src/backend/optimizer/util/orclauses.c | 360 |
1 files changed, 360 insertions, 0 deletions
diff --git a/src/backend/optimizer/util/orclauses.c b/src/backend/optimizer/util/orclauses.c new file mode 100644 index 0000000..d559f33 --- /dev/null +++ b/src/backend/optimizer/util/orclauses.c @@ -0,0 +1,360 @@ +/*------------------------------------------------------------------------- + * + * orclauses.c + * Routines to extract restriction OR clauses from join OR clauses + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/orclauses.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/orclauses.h" +#include "optimizer/restrictinfo.h" + + +static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel); +static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel); +static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, + Expr *orclause, RestrictInfo *join_or_rinfo); + + +/* + * extract_restriction_or_clauses + * Examine join OR-of-AND clauses to see if any useful restriction OR + * clauses can be extracted. If so, add them to the query. + * + * Although a join clause must reference multiple relations overall, + * an OR of ANDs clause might contain sub-clauses that reference just one + * relation and can be used to build a restriction clause for that rel. + * For example consider + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); + * We can transform this into + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) + * AND (a.x = 42 OR a.x = 44) + * AND (b.y = 43 OR b.z = 45); + * which allows the latter clauses to be applied during the scans of a and b, + * perhaps as index qualifications, and in any case reducing the number of + * rows arriving at the join. In essence this is a partial transformation to + * CNF (AND of ORs format). It is not complete, however, because we do not + * unravel the original OR --- doing so would usually bloat the qualification + * expression to little gain. + * + * The added quals are partially redundant with the original OR, and therefore + * would cause the size of the joinrel to be underestimated when it is finally + * formed. (This would be true of a full transformation to CNF as well; the + * fault is not really in the transformation, but in clauselist_selectivity's + * inability to recognize redundant conditions.) We can compensate for this + * redundancy by changing the cached selectivity of the original OR clause, + * canceling out the (valid) reduction in the estimated sizes of the base + * relations so that the estimated joinrel size remains the same. This is + * a MAJOR HACK: it depends on the fact that clause selectivities are cached + * and on the fact that the same RestrictInfo node will appear in every + * joininfo list that might be used when the joinrel is formed. + * And it doesn't work in cases where the size estimation is nonlinear + * (i.e., outer and IN joins). But it beats not doing anything. + * + * We examine each base relation to see if join clauses associated with it + * contain extractable restriction conditions. If so, add those conditions + * to the rel's baserestrictinfo and update the cached selectivities of the + * join clauses. Note that the same join clause will be examined afresh + * from the point of view of each baserel that participates in it, so its + * cached selectivity may get updated multiple times. + */ +void +extract_restriction_or_clauses(PlannerInfo *root) +{ + Index rti; + + /* Examine each baserel for potential join OR clauses */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + ListCell *lc; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + /* + * Find potentially interesting OR joinclauses. We can use any + * joinclause that is considered safe to move to this rel by the + * parameterized-path machinery, even though what we are going to do + * with it is not exactly a parameterized path. + * + * However, it seems best to ignore clauses that have been marked + * redundant (by setting norm_selec > 1). That likely can't happen + * for OR clauses, but let's be safe. + */ + foreach(lc, rel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (restriction_is_or_clause(rinfo) && + join_clause_is_movable_to(rinfo, rel) && + rinfo->norm_selec <= 1) + { + /* Try to extract a qual for this rel only */ + Expr *orclause = extract_or_clause(rinfo, rel); + + /* + * If successful, decide whether we want to use the clause, + * and insert it into the rel's restrictinfo list if so. + */ + if (orclause) + consider_new_or_clause(root, rel, orclause, rinfo); + } + } + } +} + +/* + * Is the given primitive (non-OR) RestrictInfo safe to move to the rel? + */ +static bool +is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel) +{ + /* + * We want clauses that mention the rel, and only the rel. So in + * particular pseudoconstant clauses can be rejected quickly. Then check + * the clause's Var membership. + */ + if (rinfo->pseudoconstant) + return false; + if (!bms_equal(rinfo->clause_relids, rel->relids)) + return false; + + /* We don't want extra evaluations of any volatile functions */ + if (contain_volatile_functions((Node *) rinfo->clause)) + return false; + + return true; +} + +/* + * Try to extract a restriction clause mentioning only "rel" from the given + * join OR-clause. + * + * We must be able to extract at least one qual for this rel from each of + * the arms of the OR, else we can't use it. + * + * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL + * if no OR clause could be extracted. + */ +static Expr * +extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel) +{ + List *clauselist = NIL; + ListCell *lc; + + /* + * Scan each arm of the input OR clause. Notice we descend into + * or_rinfo->orclause, which has RestrictInfo nodes embedded below the + * toplevel OR/AND structure. This is useful because we can use the info + * in those nodes to make is_safe_restriction_clause_for()'s checks + * cheaper. We'll strip those nodes from the returned tree, though, + * meaning that fresh ones will be built if the clause is accepted as a + * restriction clause. This might seem wasteful --- couldn't we re-use + * the existing RestrictInfos? But that'd require assuming that + * selectivity and other cached data is computed exactly the same way for + * a restriction clause as for a join clause, which seems undesirable. + */ + Assert(is_orclause(or_rinfo->orclause)); + foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args) + { + Node *orarg = (Node *) lfirst(lc); + List *subclauses = NIL; + Node *subclause; + + /* OR arguments should be ANDs or sub-RestrictInfos */ + if (is_andclause(orarg)) + { + List *andargs = ((BoolExpr *) orarg)->args; + ListCell *lc2; + + foreach(lc2, andargs) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); + + if (restriction_is_or_clause(rinfo)) + { + /* + * Recurse to deal with nested OR. Note we *must* recurse + * here, this isn't just overly-tense optimization: we + * have to descend far enough to find and strip all + * RestrictInfos in the expression. + */ + Expr *suborclause; + + suborclause = extract_or_clause(rinfo, rel); + if (suborclause) + subclauses = lappend(subclauses, suborclause); + } + else if (is_safe_restriction_clause_for(rinfo, rel)) + subclauses = lappend(subclauses, rinfo->clause); + } + } + else + { + RestrictInfo *rinfo = castNode(RestrictInfo, orarg); + + Assert(!restriction_is_or_clause(rinfo)); + if (is_safe_restriction_clause_for(rinfo, rel)) + subclauses = lappend(subclauses, rinfo->clause); + } + + /* + * If nothing could be extracted from this arm, we can't do anything + * with this OR clause. + */ + if (subclauses == NIL) + return NULL; + + /* + * OK, add subclause(s) to the result OR. If we found more than one, + * we need an AND node. But if we found only one, and it is itself an + * OR node, add its subclauses to the result instead; this is needed + * to preserve AND/OR flatness (ie, no OR directly underneath OR). + */ + subclause = (Node *) make_ands_explicit(subclauses); + if (is_orclause(subclause)) + clauselist = list_concat(clauselist, + ((BoolExpr *) subclause)->args); + else + clauselist = lappend(clauselist, subclause); + } + + /* + * If we got a restriction clause from every arm, wrap them up in an OR + * node. (In theory the OR node might be unnecessary, if there was only + * one arm --- but then the input OR node was also redundant.) + */ + if (clauselist != NIL) + return make_orclause(clauselist); + return NULL; +} + +/* + * Consider whether a successfully-extracted restriction OR clause is + * actually worth using. If so, add it to the planner's data structures, + * and adjust the original join clause (join_or_rinfo) to compensate. + */ +static void +consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, + Expr *orclause, RestrictInfo *join_or_rinfo) +{ + RestrictInfo *or_rinfo; + Selectivity or_selec, + orig_selec; + + /* + * Build a RestrictInfo from the new OR clause. We can assume it's valid + * as a base restriction clause. + */ + or_rinfo = make_restrictinfo(root, + orclause, + true, + false, + false, + join_or_rinfo->security_level, + NULL, + NULL, + NULL); + + /* + * Estimate its selectivity. (We could have done this earlier, but doing + * it on the RestrictInfo representation allows the result to get cached, + * saving work later.) + */ + or_selec = clause_selectivity(root, (Node *) or_rinfo, + 0, JOIN_INNER, NULL); + + /* + * The clause is only worth adding to the query if it rejects a useful + * fraction of the base relation's rows; otherwise, it's just going to + * cause duplicate computation (since we will still have to check the + * original OR clause when the join is formed). Somewhat arbitrarily, we + * set the selectivity threshold at 0.9. + */ + if (or_selec > 0.9) + return; /* forget it */ + + /* + * OK, add it to the rel's restriction-clause list. + */ + rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo); + rel->baserestrict_min_security = Min(rel->baserestrict_min_security, + or_rinfo->security_level); + + /* + * Adjust the original join OR clause's cached selectivity to compensate + * for the selectivity of the added (but redundant) lower-level qual. This + * should result in the join rel getting approximately the same rows + * estimate as it would have gotten without all these shenanigans. + * + * XXX major hack alert: this depends on the assumption that the + * selectivity will stay cached. + * + * XXX another major hack: we adjust only norm_selec, the cached + * selectivity for JOIN_INNER semantics, even though the join clause + * might've been an outer-join clause. This is partly because we can't + * easily identify the relevant SpecialJoinInfo here, and partly because + * the linearity assumption we're making would fail anyway. (If it is an + * outer-join clause, "rel" must be on the nullable side, else we'd not + * have gotten here. So the computation of the join size is going to be + * quite nonlinear with respect to the size of "rel", so it's not clear + * how we ought to adjust outer_selec even if we could compute its + * original value correctly.) + */ + if (or_selec > 0) + { + SpecialJoinInfo sjinfo; + + /* + * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare + * approx_tuple_count() in costsize.c.) + */ + sjinfo.type = T_SpecialJoinInfo; + sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids, + rel->relids); + sjinfo.min_righthand = rel->relids; + sjinfo.syn_lefthand = sjinfo.min_lefthand; + sjinfo.syn_righthand = sjinfo.min_righthand; + sjinfo.jointype = JOIN_INNER; + /* we don't bother trying to make the remaining fields valid */ + sjinfo.lhs_strict = false; + sjinfo.delay_upper_joins = false; + sjinfo.semi_can_btree = false; + sjinfo.semi_can_hash = false; + sjinfo.semi_operators = NIL; + sjinfo.semi_rhs_exprs = NIL; + + /* Compute inner-join size */ + orig_selec = clause_selectivity(root, (Node *) join_or_rinfo, + 0, JOIN_INNER, &sjinfo); + + /* And hack cached selectivity so join size remains the same */ + join_or_rinfo->norm_selec = orig_selec / or_selec; + /* ensure result stays in sane range, in particular not "redundant" */ + if (join_or_rinfo->norm_selec > 1) + join_or_rinfo->norm_selec = 1; + /* as explained above, we don't touch outer_selec */ + } +} |