diff options
Diffstat (limited to '')
-rw-r--r-- | src/backend/optimizer/prep/prepqual.c | 676 |
1 files changed, 676 insertions, 0 deletions
diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c new file mode 100644 index 0000000..42c3e4d --- /dev/null +++ b/src/backend/optimizer/prep/prepqual.c @@ -0,0 +1,676 @@ +/*------------------------------------------------------------------------- + * + * prepqual.c + * Routines for preprocessing qualification expressions + * + * + * While the parser will produce flattened (N-argument) AND/OR trees from + * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause + * directly underneath another AND, or OR underneath OR, if the input was + * oddly parenthesized. Also, rule expansion and subquery flattening could + * produce such parsetrees. The planner wants to flatten all such cases + * to ensure consistent optimization behavior. + * + * Formerly, this module was responsible for doing the initial flattening, + * but now we leave it to eval_const_expressions to do that since it has to + * make a complete pass over the expression tree anyway. Instead, we just + * have to ensure that our manipulations preserve AND/OR flatness. + * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR + * tree after local transformations that might introduce nested AND/ORs. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/prep/prepqual.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#include "optimizer/prep.h" +#include "utils/lsyscache.h" + + +static List *pull_ands(List *andlist); +static List *pull_ors(List *orlist); +static Expr *find_duplicate_ors(Expr *qual, bool is_check); +static Expr *process_duplicate_ors(List *orlist); + + +/* + * negate_clause + * Negate a Boolean expression. + * + * Input is a clause to be negated (e.g., the argument of a NOT clause). + * Returns a new clause equivalent to the negation of the given clause. + * + * Although this can be invoked on its own, it's mainly intended as a helper + * for eval_const_expressions(), and that context drives several design + * decisions. In particular, if the input is already AND/OR flat, we must + * preserve that property. We also don't bother to recurse in situations + * where we can assume that lower-level executions of eval_const_expressions + * would already have simplified sub-clauses of the input. + * + * The difference between this and a simple make_notclause() is that this + * tries to get rid of the NOT node by logical simplification. It's clearly + * always a win if the NOT node can be eliminated altogether. However, our + * use of DeMorgan's laws could result in having more NOT nodes rather than + * fewer. We do that unconditionally anyway, because in WHERE clauses it's + * important to expose as much top-level AND/OR structure as possible. + * Also, eliminating an intermediate NOT may allow us to flatten two levels + * of AND or OR together that we couldn't have otherwise. Finally, one of + * the motivations for doing this is to ensure that logically equivalent + * expressions will be seen as physically equal(), so we should always apply + * the same transformations. + */ +Node * +negate_clause(Node *node) +{ + if (node == NULL) /* should not happen */ + elog(ERROR, "can't negate an empty subexpression"); + switch (nodeTag(node)) + { + case T_Const: + { + Const *c = (Const *) node; + + /* NOT NULL is still NULL */ + if (c->constisnull) + return makeBoolConst(false, true); + /* otherwise pretty easy */ + return makeBoolConst(!DatumGetBool(c->constvalue), false); + } + break; + case T_OpExpr: + { + /* + * Negate operator if possible: (NOT (< A B)) => (>= A B) + */ + OpExpr *opexpr = (OpExpr *) node; + Oid negator = get_negator(opexpr->opno); + + if (negator) + { + OpExpr *newopexpr = makeNode(OpExpr); + + newopexpr->opno = negator; + newopexpr->opfuncid = InvalidOid; + newopexpr->opresulttype = opexpr->opresulttype; + newopexpr->opretset = opexpr->opretset; + newopexpr->opcollid = opexpr->opcollid; + newopexpr->inputcollid = opexpr->inputcollid; + newopexpr->args = opexpr->args; + newopexpr->location = opexpr->location; + return (Node *) newopexpr; + } + } + break; + case T_ScalarArrayOpExpr: + { + /* + * Negate a ScalarArrayOpExpr if its operator has a negator; + * for example x = ANY (list) becomes x <> ALL (list) + */ + ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node; + Oid negator = get_negator(saopexpr->opno); + + if (negator) + { + ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr); + + newopexpr->opno = negator; + newopexpr->opfuncid = InvalidOid; + newopexpr->hashfuncid = InvalidOid; + newopexpr->useOr = !saopexpr->useOr; + newopexpr->inputcollid = saopexpr->inputcollid; + newopexpr->args = saopexpr->args; + newopexpr->location = saopexpr->location; + return (Node *) newopexpr; + } + } + break; + case T_BoolExpr: + { + BoolExpr *expr = (BoolExpr *) node; + + switch (expr->boolop) + { + /*-------------------- + * Apply DeMorgan's Laws: + * (NOT (AND A B)) => (OR (NOT A) (NOT B)) + * (NOT (OR A B)) => (AND (NOT A) (NOT B)) + * i.e., swap AND for OR and negate each subclause. + * + * If the input is already AND/OR flat and has no NOT + * directly above AND or OR, this transformation preserves + * those properties. For example, if no direct child of + * the given AND clause is an AND or a NOT-above-OR, then + * the recursive calls of negate_clause() can't return any + * OR clauses. So we needn't call pull_ors() before + * building a new OR clause. Similarly for the OR case. + *-------------------- + */ + case AND_EXPR: + { + List *nargs = NIL; + ListCell *lc; + + foreach(lc, expr->args) + { + nargs = lappend(nargs, + negate_clause(lfirst(lc))); + } + return (Node *) make_orclause(nargs); + } + break; + case OR_EXPR: + { + List *nargs = NIL; + ListCell *lc; + + foreach(lc, expr->args) + { + nargs = lappend(nargs, + negate_clause(lfirst(lc))); + } + return (Node *) make_andclause(nargs); + } + break; + case NOT_EXPR: + + /* + * NOT underneath NOT: they cancel. We assume the + * input is already simplified, so no need to recurse. + */ + return (Node *) linitial(expr->args); + default: + elog(ERROR, "unrecognized boolop: %d", + (int) expr->boolop); + break; + } + } + break; + case T_NullTest: + { + NullTest *expr = (NullTest *) node; + + /* + * In the rowtype case, the two flavors of NullTest are *not* + * logical inverses, so we can't simplify. But it does work + * for scalar datatypes. + */ + if (!expr->argisrow) + { + NullTest *newexpr = makeNode(NullTest); + + newexpr->arg = expr->arg; + newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ? + IS_NOT_NULL : IS_NULL); + newexpr->argisrow = expr->argisrow; + newexpr->location = expr->location; + return (Node *) newexpr; + } + } + break; + case T_BooleanTest: + { + BooleanTest *expr = (BooleanTest *) node; + BooleanTest *newexpr = makeNode(BooleanTest); + + newexpr->arg = expr->arg; + switch (expr->booltesttype) + { + case IS_TRUE: + newexpr->booltesttype = IS_NOT_TRUE; + break; + case IS_NOT_TRUE: + newexpr->booltesttype = IS_TRUE; + break; + case IS_FALSE: + newexpr->booltesttype = IS_NOT_FALSE; + break; + case IS_NOT_FALSE: + newexpr->booltesttype = IS_FALSE; + break; + case IS_UNKNOWN: + newexpr->booltesttype = IS_NOT_UNKNOWN; + break; + case IS_NOT_UNKNOWN: + newexpr->booltesttype = IS_UNKNOWN; + break; + default: + elog(ERROR, "unrecognized booltesttype: %d", + (int) expr->booltesttype); + break; + } + newexpr->location = expr->location; + return (Node *) newexpr; + } + break; + default: + /* else fall through */ + break; + } + + /* + * Otherwise we don't know how to simplify this, so just tack on an + * explicit NOT node. + */ + return (Node *) make_notclause((Expr *) node); +} + + +/* + * canonicalize_qual + * Convert a qualification expression to the most useful form. + * + * This is primarily intended to be used on top-level WHERE (or JOIN/ON) + * clauses. It can also be used on top-level CHECK constraints, for which + * pass is_check = true. DO NOT call it on any expression that is not known + * to be one or the other, as it might apply inappropriate simplifications. + * + * The name of this routine is a holdover from a time when it would try to + * force the expression into canonical AND-of-ORs or OR-of-ANDs form. + * Eventually, we recognized that that had more theoretical purity than + * actual usefulness, and so now the transformation doesn't involve any + * notion of reaching a canonical form. + * + * NOTE: we assume the input has already been through eval_const_expressions + * and therefore possesses AND/OR flatness. Formerly this function included + * its own flattening logic, but that requires a useless extra pass over the + * tree. + * + * Returns the modified qualification. + */ +Expr * +canonicalize_qual(Expr *qual, bool is_check) +{ + Expr *newqual; + + /* Quick exit for empty qual */ + if (qual == NULL) + return NULL; + + /* This should not be invoked on quals in implicit-AND format */ + Assert(!IsA(qual, List)); + + /* + * Pull up redundant subclauses in OR-of-AND trees. We do this only + * within the top-level AND/OR structure; there's no point in looking + * deeper. Also remove any NULL constants in the top-level structure. + */ + newqual = find_duplicate_ors(qual, is_check); + + return newqual; +} + + +/* + * pull_ands + * Recursively flatten nested AND clauses into a single and-clause list. + * + * Input is the arglist of an AND clause. + * Returns the rebuilt arglist (note original list structure is not touched). + */ +static List * +pull_ands(List *andlist) +{ + List *out_list = NIL; + ListCell *arg; + + foreach(arg, andlist) + { + Node *subexpr = (Node *) lfirst(arg); + + if (is_andclause(subexpr)) + out_list = list_concat(out_list, + pull_ands(((BoolExpr *) subexpr)->args)); + else + out_list = lappend(out_list, subexpr); + } + return out_list; +} + +/* + * pull_ors + * Recursively flatten nested OR clauses into a single or-clause list. + * + * Input is the arglist of an OR clause. + * Returns the rebuilt arglist (note original list structure is not touched). + */ +static List * +pull_ors(List *orlist) +{ + List *out_list = NIL; + ListCell *arg; + + foreach(arg, orlist) + { + Node *subexpr = (Node *) lfirst(arg); + + if (is_orclause(subexpr)) + out_list = list_concat(out_list, + pull_ors(((BoolExpr *) subexpr)->args)); + else + out_list = lappend(out_list, subexpr); + } + return out_list; +} + + +/*-------------------- + * The following code attempts to apply the inverse OR distributive law: + * ((A AND B) OR (A AND C)) => (A AND (B OR C)) + * That is, locate OR clauses in which every subclause contains an + * identical term, and pull out the duplicated terms. + * + * This may seem like a fairly useless activity, but it turns out to be + * applicable to many machine-generated queries, and there are also queries + * in some of the TPC benchmarks that need it. This was in fact almost the + * sole useful side-effect of the old prepqual code that tried to force + * the query into canonical AND-of-ORs form: the canonical equivalent of + * ((A AND B) OR (A AND C)) + * is + * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C)) + * which the code was able to simplify to + * (A AND (A OR C) AND (B OR A) AND (B OR C)) + * thus successfully extracting the common condition A --- but at the cost + * of cluttering the qual with many redundant clauses. + *-------------------- + */ + +/* + * find_duplicate_ors + * Given a qualification tree with the NOTs pushed down, search for + * OR clauses to which the inverse OR distributive law might apply. + * Only the top-level AND/OR structure is searched. + * + * While at it, we remove any NULL constants within the top-level AND/OR + * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x". + * In general that would change the result, so eval_const_expressions can't + * do it; but at top level of WHERE, we don't need to distinguish between + * FALSE and NULL results, so it's valid to treat NULL::boolean the same + * as FALSE and then simplify AND/OR accordingly. Conversely, in a top-level + * CHECK constraint, we may treat a NULL the same as TRUE. + * + * Returns the modified qualification. AND/OR flatness is preserved. + */ +static Expr * +find_duplicate_ors(Expr *qual, bool is_check) +{ + if (is_orclause(qual)) + { + List *orlist = NIL; + ListCell *temp; + + /* Recurse */ + foreach(temp, ((BoolExpr *) qual)->args) + { + Expr *arg = (Expr *) lfirst(temp); + + arg = find_duplicate_ors(arg, is_check); + + /* Get rid of any constant inputs */ + if (arg && IsA(arg, Const)) + { + Const *carg = (Const *) arg; + + if (is_check) + { + /* Within OR in CHECK, drop constant FALSE */ + if (!carg->constisnull && !DatumGetBool(carg->constvalue)) + continue; + /* Constant TRUE or NULL, so OR reduces to TRUE */ + return (Expr *) makeBoolConst(true, false); + } + else + { + /* Within OR in WHERE, drop constant FALSE or NULL */ + if (carg->constisnull || !DatumGetBool(carg->constvalue)) + continue; + /* Constant TRUE, so OR reduces to TRUE */ + return arg; + } + } + + orlist = lappend(orlist, arg); + } + + /* Flatten any ORs pulled up to just below here */ + orlist = pull_ors(orlist); + + /* Now we can look for duplicate ORs */ + return process_duplicate_ors(orlist); + } + else if (is_andclause(qual)) + { + List *andlist = NIL; + ListCell *temp; + + /* Recurse */ + foreach(temp, ((BoolExpr *) qual)->args) + { + Expr *arg = (Expr *) lfirst(temp); + + arg = find_duplicate_ors(arg, is_check); + + /* Get rid of any constant inputs */ + if (arg && IsA(arg, Const)) + { + Const *carg = (Const *) arg; + + if (is_check) + { + /* Within AND in CHECK, drop constant TRUE or NULL */ + if (carg->constisnull || DatumGetBool(carg->constvalue)) + continue; + /* Constant FALSE, so AND reduces to FALSE */ + return arg; + } + else + { + /* Within AND in WHERE, drop constant TRUE */ + if (!carg->constisnull && DatumGetBool(carg->constvalue)) + continue; + /* Constant FALSE or NULL, so AND reduces to FALSE */ + return (Expr *) makeBoolConst(false, false); + } + } + + andlist = lappend(andlist, arg); + } + + /* Flatten any ANDs introduced just below here */ + andlist = pull_ands(andlist); + + /* AND of no inputs reduces to TRUE */ + if (andlist == NIL) + return (Expr *) makeBoolConst(true, false); + + /* Single-expression AND just reduces to that expression */ + if (list_length(andlist) == 1) + return (Expr *) linitial(andlist); + + /* Else we still need an AND node */ + return make_andclause(andlist); + } + else + return qual; +} + +/* + * process_duplicate_ors + * Given a list of exprs which are ORed together, try to apply + * the inverse OR distributive law. + * + * Returns the resulting expression (could be an AND clause, an OR + * clause, or maybe even a single subexpression). + */ +static Expr * +process_duplicate_ors(List *orlist) +{ + List *reference = NIL; + int num_subclauses = 0; + List *winners; + List *neworlist; + ListCell *temp; + + /* OR of no inputs reduces to FALSE */ + if (orlist == NIL) + return (Expr *) makeBoolConst(false, false); + + /* Single-expression OR just reduces to that expression */ + if (list_length(orlist) == 1) + return (Expr *) linitial(orlist); + + /* + * Choose the shortest AND clause as the reference list --- obviously, any + * subclause not in this clause isn't in all the clauses. If we find a + * clause that's not an AND, we can treat it as a one-element AND clause, + * which necessarily wins as shortest. + */ + foreach(temp, orlist) + { + Expr *clause = (Expr *) lfirst(temp); + + if (is_andclause(clause)) + { + List *subclauses = ((BoolExpr *) clause)->args; + int nclauses = list_length(subclauses); + + if (reference == NIL || nclauses < num_subclauses) + { + reference = subclauses; + num_subclauses = nclauses; + } + } + else + { + reference = list_make1(clause); + break; + } + } + + /* + * Just in case, eliminate any duplicates in the reference list. + */ + reference = list_union(NIL, reference); + + /* + * Check each element of the reference list to see if it's in all the OR + * clauses. Build a new list of winning clauses. + */ + winners = NIL; + foreach(temp, reference) + { + Expr *refclause = (Expr *) lfirst(temp); + bool win = true; + ListCell *temp2; + + foreach(temp2, orlist) + { + Expr *clause = (Expr *) lfirst(temp2); + + if (is_andclause(clause)) + { + if (!list_member(((BoolExpr *) clause)->args, refclause)) + { + win = false; + break; + } + } + else + { + if (!equal(refclause, clause)) + { + win = false; + break; + } + } + } + + if (win) + winners = lappend(winners, refclause); + } + + /* + * If no winners, we can't transform the OR + */ + if (winners == NIL) + return make_orclause(orlist); + + /* + * Generate new OR list consisting of the remaining sub-clauses. + * + * If any clause degenerates to empty, then we have a situation like (A + * AND B) OR (A), which can be reduced to just A --- that is, the + * additional conditions in other arms of the OR are irrelevant. + * + * Note that because we use list_difference, any multiple occurrences of a + * winning clause in an AND sub-clause will be removed automatically. + */ + neworlist = NIL; + foreach(temp, orlist) + { + Expr *clause = (Expr *) lfirst(temp); + + if (is_andclause(clause)) + { + List *subclauses = ((BoolExpr *) clause)->args; + + subclauses = list_difference(subclauses, winners); + if (subclauses != NIL) + { + if (list_length(subclauses) == 1) + neworlist = lappend(neworlist, linitial(subclauses)); + else + neworlist = lappend(neworlist, make_andclause(subclauses)); + } + else + { + neworlist = NIL; /* degenerate case, see above */ + break; + } + } + else + { + if (!list_member(winners, clause)) + neworlist = lappend(neworlist, clause); + else + { + neworlist = NIL; /* degenerate case, see above */ + break; + } + } + } + + /* + * Append reduced OR to the winners list, if it's not degenerate, handling + * the special case of one element correctly (can that really happen?). + * Also be careful to maintain AND/OR flatness in case we pulled up a + * sub-sub-OR-clause. + */ + if (neworlist != NIL) + { + if (list_length(neworlist) == 1) + winners = lappend(winners, linitial(neworlist)); + else + winners = lappend(winners, make_orclause(pull_ors(neworlist))); + } + + /* + * And return the constructed AND clause, again being wary of a single + * element and AND/OR flatness. + */ + if (list_length(winners) == 1) + return (Expr *) linitial(winners); + else + return make_andclause(pull_ands(winners)); +} |