summaryrefslogtreecommitdiffstats
path: root/src/backend/optimizer/prep/prepqual.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/prep/prepqual.c')
-rw-r--r--src/backend/optimizer/prep/prepqual.c677
1 files changed, 677 insertions, 0 deletions
diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c
new file mode 100644
index 0000000..da01234
--- /dev/null
+++ b/src/backend/optimizer/prep/prepqual.c
@@ -0,0 +1,677 @@
+/*-------------------------------------------------------------------------
+ *
+ * prepqual.c
+ * Routines for preprocessing qualification expressions
+ *
+ *
+ * While the parser will produce flattened (N-argument) AND/OR trees from
+ * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
+ * directly underneath another AND, or OR underneath OR, if the input was
+ * oddly parenthesized. Also, rule expansion and subquery flattening could
+ * produce such parsetrees. The planner wants to flatten all such cases
+ * to ensure consistent optimization behavior.
+ *
+ * Formerly, this module was responsible for doing the initial flattening,
+ * but now we leave it to eval_const_expressions to do that since it has to
+ * make a complete pass over the expression tree anyway. Instead, we just
+ * have to ensure that our manipulations preserve AND/OR flatness.
+ * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
+ * tree after local transformations that might introduce nested AND/ORs.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/prep/prepqual.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/prep.h"
+#include "utils/lsyscache.h"
+
+
+static List *pull_ands(List *andlist);
+static List *pull_ors(List *orlist);
+static Expr *find_duplicate_ors(Expr *qual, bool is_check);
+static Expr *process_duplicate_ors(List *orlist);
+
+
+/*
+ * negate_clause
+ * Negate a Boolean expression.
+ *
+ * Input is a clause to be negated (e.g., the argument of a NOT clause).
+ * Returns a new clause equivalent to the negation of the given clause.
+ *
+ * Although this can be invoked on its own, it's mainly intended as a helper
+ * for eval_const_expressions(), and that context drives several design
+ * decisions. In particular, if the input is already AND/OR flat, we must
+ * preserve that property. We also don't bother to recurse in situations
+ * where we can assume that lower-level executions of eval_const_expressions
+ * would already have simplified sub-clauses of the input.
+ *
+ * The difference between this and a simple make_notclause() is that this
+ * tries to get rid of the NOT node by logical simplification. It's clearly
+ * always a win if the NOT node can be eliminated altogether. However, our
+ * use of DeMorgan's laws could result in having more NOT nodes rather than
+ * fewer. We do that unconditionally anyway, because in WHERE clauses it's
+ * important to expose as much top-level AND/OR structure as possible.
+ * Also, eliminating an intermediate NOT may allow us to flatten two levels
+ * of AND or OR together that we couldn't have otherwise. Finally, one of
+ * the motivations for doing this is to ensure that logically equivalent
+ * expressions will be seen as physically equal(), so we should always apply
+ * the same transformations.
+ */
+Node *
+negate_clause(Node *node)
+{
+ if (node == NULL) /* should not happen */
+ elog(ERROR, "can't negate an empty subexpression");
+ switch (nodeTag(node))
+ {
+ case T_Const:
+ {
+ Const *c = (Const *) node;
+
+ /* NOT NULL is still NULL */
+ if (c->constisnull)
+ return makeBoolConst(false, true);
+ /* otherwise pretty easy */
+ return makeBoolConst(!DatumGetBool(c->constvalue), false);
+ }
+ break;
+ case T_OpExpr:
+ {
+ /*
+ * Negate operator if possible: (NOT (< A B)) => (>= A B)
+ */
+ OpExpr *opexpr = (OpExpr *) node;
+ Oid negator = get_negator(opexpr->opno);
+
+ if (negator)
+ {
+ OpExpr *newopexpr = makeNode(OpExpr);
+
+ newopexpr->opno = negator;
+ newopexpr->opfuncid = InvalidOid;
+ newopexpr->opresulttype = opexpr->opresulttype;
+ newopexpr->opretset = opexpr->opretset;
+ newopexpr->opcollid = opexpr->opcollid;
+ newopexpr->inputcollid = opexpr->inputcollid;
+ newopexpr->args = opexpr->args;
+ newopexpr->location = opexpr->location;
+ return (Node *) newopexpr;
+ }
+ }
+ break;
+ case T_ScalarArrayOpExpr:
+ {
+ /*
+ * Negate a ScalarArrayOpExpr if its operator has a negator;
+ * for example x = ANY (list) becomes x <> ALL (list)
+ */
+ ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
+ Oid negator = get_negator(saopexpr->opno);
+
+ if (negator)
+ {
+ ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
+
+ newopexpr->opno = negator;
+ newopexpr->opfuncid = InvalidOid;
+ newopexpr->hashfuncid = InvalidOid;
+ newopexpr->negfuncid = InvalidOid;
+ newopexpr->useOr = !saopexpr->useOr;
+ newopexpr->inputcollid = saopexpr->inputcollid;
+ newopexpr->args = saopexpr->args;
+ newopexpr->location = saopexpr->location;
+ return (Node *) newopexpr;
+ }
+ }
+ break;
+ case T_BoolExpr:
+ {
+ BoolExpr *expr = (BoolExpr *) node;
+
+ switch (expr->boolop)
+ {
+ /*--------------------
+ * Apply DeMorgan's Laws:
+ * (NOT (AND A B)) => (OR (NOT A) (NOT B))
+ * (NOT (OR A B)) => (AND (NOT A) (NOT B))
+ * i.e., swap AND for OR and negate each subclause.
+ *
+ * If the input is already AND/OR flat and has no NOT
+ * directly above AND or OR, this transformation preserves
+ * those properties. For example, if no direct child of
+ * the given AND clause is an AND or a NOT-above-OR, then
+ * the recursive calls of negate_clause() can't return any
+ * OR clauses. So we needn't call pull_ors() before
+ * building a new OR clause. Similarly for the OR case.
+ *--------------------
+ */
+ case AND_EXPR:
+ {
+ List *nargs = NIL;
+ ListCell *lc;
+
+ foreach(lc, expr->args)
+ {
+ nargs = lappend(nargs,
+ negate_clause(lfirst(lc)));
+ }
+ return (Node *) make_orclause(nargs);
+ }
+ break;
+ case OR_EXPR:
+ {
+ List *nargs = NIL;
+ ListCell *lc;
+
+ foreach(lc, expr->args)
+ {
+ nargs = lappend(nargs,
+ negate_clause(lfirst(lc)));
+ }
+ return (Node *) make_andclause(nargs);
+ }
+ break;
+ case NOT_EXPR:
+
+ /*
+ * NOT underneath NOT: they cancel. We assume the
+ * input is already simplified, so no need to recurse.
+ */
+ return (Node *) linitial(expr->args);
+ default:
+ elog(ERROR, "unrecognized boolop: %d",
+ (int) expr->boolop);
+ break;
+ }
+ }
+ break;
+ case T_NullTest:
+ {
+ NullTest *expr = (NullTest *) node;
+
+ /*
+ * In the rowtype case, the two flavors of NullTest are *not*
+ * logical inverses, so we can't simplify. But it does work
+ * for scalar datatypes.
+ */
+ if (!expr->argisrow)
+ {
+ NullTest *newexpr = makeNode(NullTest);
+
+ newexpr->arg = expr->arg;
+ newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
+ IS_NOT_NULL : IS_NULL);
+ newexpr->argisrow = expr->argisrow;
+ newexpr->location = expr->location;
+ return (Node *) newexpr;
+ }
+ }
+ break;
+ case T_BooleanTest:
+ {
+ BooleanTest *expr = (BooleanTest *) node;
+ BooleanTest *newexpr = makeNode(BooleanTest);
+
+ newexpr->arg = expr->arg;
+ switch (expr->booltesttype)
+ {
+ case IS_TRUE:
+ newexpr->booltesttype = IS_NOT_TRUE;
+ break;
+ case IS_NOT_TRUE:
+ newexpr->booltesttype = IS_TRUE;
+ break;
+ case IS_FALSE:
+ newexpr->booltesttype = IS_NOT_FALSE;
+ break;
+ case IS_NOT_FALSE:
+ newexpr->booltesttype = IS_FALSE;
+ break;
+ case IS_UNKNOWN:
+ newexpr->booltesttype = IS_NOT_UNKNOWN;
+ break;
+ case IS_NOT_UNKNOWN:
+ newexpr->booltesttype = IS_UNKNOWN;
+ break;
+ default:
+ elog(ERROR, "unrecognized booltesttype: %d",
+ (int) expr->booltesttype);
+ break;
+ }
+ newexpr->location = expr->location;
+ return (Node *) newexpr;
+ }
+ break;
+ default:
+ /* else fall through */
+ break;
+ }
+
+ /*
+ * Otherwise we don't know how to simplify this, so just tack on an
+ * explicit NOT node.
+ */
+ return (Node *) make_notclause((Expr *) node);
+}
+
+
+/*
+ * canonicalize_qual
+ * Convert a qualification expression to the most useful form.
+ *
+ * This is primarily intended to be used on top-level WHERE (or JOIN/ON)
+ * clauses. It can also be used on top-level CHECK constraints, for which
+ * pass is_check = true. DO NOT call it on any expression that is not known
+ * to be one or the other, as it might apply inappropriate simplifications.
+ *
+ * The name of this routine is a holdover from a time when it would try to
+ * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
+ * Eventually, we recognized that that had more theoretical purity than
+ * actual usefulness, and so now the transformation doesn't involve any
+ * notion of reaching a canonical form.
+ *
+ * NOTE: we assume the input has already been through eval_const_expressions
+ * and therefore possesses AND/OR flatness. Formerly this function included
+ * its own flattening logic, but that requires a useless extra pass over the
+ * tree.
+ *
+ * Returns the modified qualification.
+ */
+Expr *
+canonicalize_qual(Expr *qual, bool is_check)
+{
+ Expr *newqual;
+
+ /* Quick exit for empty qual */
+ if (qual == NULL)
+ return NULL;
+
+ /* This should not be invoked on quals in implicit-AND format */
+ Assert(!IsA(qual, List));
+
+ /*
+ * Pull up redundant subclauses in OR-of-AND trees. We do this only
+ * within the top-level AND/OR structure; there's no point in looking
+ * deeper. Also remove any NULL constants in the top-level structure.
+ */
+ newqual = find_duplicate_ors(qual, is_check);
+
+ return newqual;
+}
+
+
+/*
+ * pull_ands
+ * Recursively flatten nested AND clauses into a single and-clause list.
+ *
+ * Input is the arglist of an AND clause.
+ * Returns the rebuilt arglist (note original list structure is not touched).
+ */
+static List *
+pull_ands(List *andlist)
+{
+ List *out_list = NIL;
+ ListCell *arg;
+
+ foreach(arg, andlist)
+ {
+ Node *subexpr = (Node *) lfirst(arg);
+
+ if (is_andclause(subexpr))
+ out_list = list_concat(out_list,
+ pull_ands(((BoolExpr *) subexpr)->args));
+ else
+ out_list = lappend(out_list, subexpr);
+ }
+ return out_list;
+}
+
+/*
+ * pull_ors
+ * Recursively flatten nested OR clauses into a single or-clause list.
+ *
+ * Input is the arglist of an OR clause.
+ * Returns the rebuilt arglist (note original list structure is not touched).
+ */
+static List *
+pull_ors(List *orlist)
+{
+ List *out_list = NIL;
+ ListCell *arg;
+
+ foreach(arg, orlist)
+ {
+ Node *subexpr = (Node *) lfirst(arg);
+
+ if (is_orclause(subexpr))
+ out_list = list_concat(out_list,
+ pull_ors(((BoolExpr *) subexpr)->args));
+ else
+ out_list = lappend(out_list, subexpr);
+ }
+ return out_list;
+}
+
+
+/*--------------------
+ * The following code attempts to apply the inverse OR distributive law:
+ * ((A AND B) OR (A AND C)) => (A AND (B OR C))
+ * That is, locate OR clauses in which every subclause contains an
+ * identical term, and pull out the duplicated terms.
+ *
+ * This may seem like a fairly useless activity, but it turns out to be
+ * applicable to many machine-generated queries, and there are also queries
+ * in some of the TPC benchmarks that need it. This was in fact almost the
+ * sole useful side-effect of the old prepqual code that tried to force
+ * the query into canonical AND-of-ORs form: the canonical equivalent of
+ * ((A AND B) OR (A AND C))
+ * is
+ * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
+ * which the code was able to simplify to
+ * (A AND (A OR C) AND (B OR A) AND (B OR C))
+ * thus successfully extracting the common condition A --- but at the cost
+ * of cluttering the qual with many redundant clauses.
+ *--------------------
+ */
+
+/*
+ * find_duplicate_ors
+ * Given a qualification tree with the NOTs pushed down, search for
+ * OR clauses to which the inverse OR distributive law might apply.
+ * Only the top-level AND/OR structure is searched.
+ *
+ * While at it, we remove any NULL constants within the top-level AND/OR
+ * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
+ * In general that would change the result, so eval_const_expressions can't
+ * do it; but at top level of WHERE, we don't need to distinguish between
+ * FALSE and NULL results, so it's valid to treat NULL::boolean the same
+ * as FALSE and then simplify AND/OR accordingly. Conversely, in a top-level
+ * CHECK constraint, we may treat a NULL the same as TRUE.
+ *
+ * Returns the modified qualification. AND/OR flatness is preserved.
+ */
+static Expr *
+find_duplicate_ors(Expr *qual, bool is_check)
+{
+ if (is_orclause(qual))
+ {
+ List *orlist = NIL;
+ ListCell *temp;
+
+ /* Recurse */
+ foreach(temp, ((BoolExpr *) qual)->args)
+ {
+ Expr *arg = (Expr *) lfirst(temp);
+
+ arg = find_duplicate_ors(arg, is_check);
+
+ /* Get rid of any constant inputs */
+ if (arg && IsA(arg, Const))
+ {
+ Const *carg = (Const *) arg;
+
+ if (is_check)
+ {
+ /* Within OR in CHECK, drop constant FALSE */
+ if (!carg->constisnull && !DatumGetBool(carg->constvalue))
+ continue;
+ /* Constant TRUE or NULL, so OR reduces to TRUE */
+ return (Expr *) makeBoolConst(true, false);
+ }
+ else
+ {
+ /* Within OR in WHERE, drop constant FALSE or NULL */
+ if (carg->constisnull || !DatumGetBool(carg->constvalue))
+ continue;
+ /* Constant TRUE, so OR reduces to TRUE */
+ return arg;
+ }
+ }
+
+ orlist = lappend(orlist, arg);
+ }
+
+ /* Flatten any ORs pulled up to just below here */
+ orlist = pull_ors(orlist);
+
+ /* Now we can look for duplicate ORs */
+ return process_duplicate_ors(orlist);
+ }
+ else if (is_andclause(qual))
+ {
+ List *andlist = NIL;
+ ListCell *temp;
+
+ /* Recurse */
+ foreach(temp, ((BoolExpr *) qual)->args)
+ {
+ Expr *arg = (Expr *) lfirst(temp);
+
+ arg = find_duplicate_ors(arg, is_check);
+
+ /* Get rid of any constant inputs */
+ if (arg && IsA(arg, Const))
+ {
+ Const *carg = (Const *) arg;
+
+ if (is_check)
+ {
+ /* Within AND in CHECK, drop constant TRUE or NULL */
+ if (carg->constisnull || DatumGetBool(carg->constvalue))
+ continue;
+ /* Constant FALSE, so AND reduces to FALSE */
+ return arg;
+ }
+ else
+ {
+ /* Within AND in WHERE, drop constant TRUE */
+ if (!carg->constisnull && DatumGetBool(carg->constvalue))
+ continue;
+ /* Constant FALSE or NULL, so AND reduces to FALSE */
+ return (Expr *) makeBoolConst(false, false);
+ }
+ }
+
+ andlist = lappend(andlist, arg);
+ }
+
+ /* Flatten any ANDs introduced just below here */
+ andlist = pull_ands(andlist);
+
+ /* AND of no inputs reduces to TRUE */
+ if (andlist == NIL)
+ return (Expr *) makeBoolConst(true, false);
+
+ /* Single-expression AND just reduces to that expression */
+ if (list_length(andlist) == 1)
+ return (Expr *) linitial(andlist);
+
+ /* Else we still need an AND node */
+ return make_andclause(andlist);
+ }
+ else
+ return qual;
+}
+
+/*
+ * process_duplicate_ors
+ * Given a list of exprs which are ORed together, try to apply
+ * the inverse OR distributive law.
+ *
+ * Returns the resulting expression (could be an AND clause, an OR
+ * clause, or maybe even a single subexpression).
+ */
+static Expr *
+process_duplicate_ors(List *orlist)
+{
+ List *reference = NIL;
+ int num_subclauses = 0;
+ List *winners;
+ List *neworlist;
+ ListCell *temp;
+
+ /* OR of no inputs reduces to FALSE */
+ if (orlist == NIL)
+ return (Expr *) makeBoolConst(false, false);
+
+ /* Single-expression OR just reduces to that expression */
+ if (list_length(orlist) == 1)
+ return (Expr *) linitial(orlist);
+
+ /*
+ * Choose the shortest AND clause as the reference list --- obviously, any
+ * subclause not in this clause isn't in all the clauses. If we find a
+ * clause that's not an AND, we can treat it as a one-element AND clause,
+ * which necessarily wins as shortest.
+ */
+ foreach(temp, orlist)
+ {
+ Expr *clause = (Expr *) lfirst(temp);
+
+ if (is_andclause(clause))
+ {
+ List *subclauses = ((BoolExpr *) clause)->args;
+ int nclauses = list_length(subclauses);
+
+ if (reference == NIL || nclauses < num_subclauses)
+ {
+ reference = subclauses;
+ num_subclauses = nclauses;
+ }
+ }
+ else
+ {
+ reference = list_make1(clause);
+ break;
+ }
+ }
+
+ /*
+ * Just in case, eliminate any duplicates in the reference list.
+ */
+ reference = list_union(NIL, reference);
+
+ /*
+ * Check each element of the reference list to see if it's in all the OR
+ * clauses. Build a new list of winning clauses.
+ */
+ winners = NIL;
+ foreach(temp, reference)
+ {
+ Expr *refclause = (Expr *) lfirst(temp);
+ bool win = true;
+ ListCell *temp2;
+
+ foreach(temp2, orlist)
+ {
+ Expr *clause = (Expr *) lfirst(temp2);
+
+ if (is_andclause(clause))
+ {
+ if (!list_member(((BoolExpr *) clause)->args, refclause))
+ {
+ win = false;
+ break;
+ }
+ }
+ else
+ {
+ if (!equal(refclause, clause))
+ {
+ win = false;
+ break;
+ }
+ }
+ }
+
+ if (win)
+ winners = lappend(winners, refclause);
+ }
+
+ /*
+ * If no winners, we can't transform the OR
+ */
+ if (winners == NIL)
+ return make_orclause(orlist);
+
+ /*
+ * Generate new OR list consisting of the remaining sub-clauses.
+ *
+ * If any clause degenerates to empty, then we have a situation like (A
+ * AND B) OR (A), which can be reduced to just A --- that is, the
+ * additional conditions in other arms of the OR are irrelevant.
+ *
+ * Note that because we use list_difference, any multiple occurrences of a
+ * winning clause in an AND sub-clause will be removed automatically.
+ */
+ neworlist = NIL;
+ foreach(temp, orlist)
+ {
+ Expr *clause = (Expr *) lfirst(temp);
+
+ if (is_andclause(clause))
+ {
+ List *subclauses = ((BoolExpr *) clause)->args;
+
+ subclauses = list_difference(subclauses, winners);
+ if (subclauses != NIL)
+ {
+ if (list_length(subclauses) == 1)
+ neworlist = lappend(neworlist, linitial(subclauses));
+ else
+ neworlist = lappend(neworlist, make_andclause(subclauses));
+ }
+ else
+ {
+ neworlist = NIL; /* degenerate case, see above */
+ break;
+ }
+ }
+ else
+ {
+ if (!list_member(winners, clause))
+ neworlist = lappend(neworlist, clause);
+ else
+ {
+ neworlist = NIL; /* degenerate case, see above */
+ break;
+ }
+ }
+ }
+
+ /*
+ * Append reduced OR to the winners list, if it's not degenerate, handling
+ * the special case of one element correctly (can that really happen?).
+ * Also be careful to maintain AND/OR flatness in case we pulled up a
+ * sub-sub-OR-clause.
+ */
+ if (neworlist != NIL)
+ {
+ if (list_length(neworlist) == 1)
+ winners = lappend(winners, linitial(neworlist));
+ else
+ winners = lappend(winners, make_orclause(pull_ors(neworlist)));
+ }
+
+ /*
+ * And return the constructed AND clause, again being wary of a single
+ * element and AND/OR flatness.
+ */
+ if (list_length(winners) == 1)
+ return (Expr *) linitial(winners);
+ else
+ return make_andclause(pull_ands(winners));
+}