From 5e45211a64149b3c659b90ff2de6fa982a5a93ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:17:33 +0200 Subject: Adding upstream version 15.5. Signed-off-by: Daniel Baumann --- src/backend/optimizer/util/Makefile | 31 + src/backend/optimizer/util/appendinfo.c | 1011 ++++++ src/backend/optimizer/util/clauses.c | 5255 +++++++++++++++++++++++++++++ src/backend/optimizer/util/inherit.c | 949 ++++++ src/backend/optimizer/util/joininfo.c | 140 + src/backend/optimizer/util/orclauses.c | 360 ++ src/backend/optimizer/util/paramassign.c | 591 ++++ src/backend/optimizer/util/pathnode.c | 4298 +++++++++++++++++++++++ src/backend/optimizer/util/placeholder.c | 477 +++ src/backend/optimizer/util/plancat.c | 2509 ++++++++++++++ src/backend/optimizer/util/predtest.c | 2224 ++++++++++++ src/backend/optimizer/util/relnode.c | 2047 +++++++++++ src/backend/optimizer/util/restrictinfo.c | 655 ++++ src/backend/optimizer/util/tlist.c | 1258 +++++++ src/backend/optimizer/util/var.c | 903 +++++ 15 files changed, 22708 insertions(+) create mode 100644 src/backend/optimizer/util/Makefile create mode 100644 src/backend/optimizer/util/appendinfo.c create mode 100644 src/backend/optimizer/util/clauses.c create mode 100644 src/backend/optimizer/util/inherit.c create mode 100644 src/backend/optimizer/util/joininfo.c create mode 100644 src/backend/optimizer/util/orclauses.c create mode 100644 src/backend/optimizer/util/paramassign.c create mode 100644 src/backend/optimizer/util/pathnode.c create mode 100644 src/backend/optimizer/util/placeholder.c create mode 100644 src/backend/optimizer/util/plancat.c create mode 100644 src/backend/optimizer/util/predtest.c create mode 100644 src/backend/optimizer/util/relnode.c create mode 100644 src/backend/optimizer/util/restrictinfo.c create mode 100644 src/backend/optimizer/util/tlist.c create mode 100644 src/backend/optimizer/util/var.c (limited to 'src/backend/optimizer/util') diff --git a/src/backend/optimizer/util/Makefile b/src/backend/optimizer/util/Makefile new file mode 100644 index 0000000..4fb115c --- /dev/null +++ b/src/backend/optimizer/util/Makefile @@ -0,0 +1,31 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for optimizer/util +# +# IDENTIFICATION +# src/backend/optimizer/util/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/optimizer/util +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = \ + appendinfo.o \ + clauses.o \ + inherit.o \ + joininfo.o \ + orclauses.o \ + paramassign.o \ + pathnode.o \ + placeholder.o \ + plancat.o \ + predtest.o \ + relnode.o \ + restrictinfo.o \ + tlist.o \ + var.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c new file mode 100644 index 0000000..5c3d5a7 --- /dev/null +++ b/src/backend/optimizer/util/appendinfo.c @@ -0,0 +1,1011 @@ +/*------------------------------------------------------------------------- + * + * appendinfo.c + * Routines for mapping between append parent(s) and children + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/appendinfo.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/table.h" +#include "foreign/fdwapi.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/appendinfo.h" +#include "optimizer/pathnode.h" +#include "optimizer/planmain.h" +#include "parser/parsetree.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +typedef struct +{ + PlannerInfo *root; + int nappinfos; + AppendRelInfo **appinfos; +} adjust_appendrel_attrs_context; + +static void make_inh_translation_list(Relation oldrelation, + Relation newrelation, + Index newvarno, + AppendRelInfo *appinfo); +static Node *adjust_appendrel_attrs_mutator(Node *node, + adjust_appendrel_attrs_context *context); + + +/* + * make_append_rel_info + * Build an AppendRelInfo for the parent-child pair + */ +AppendRelInfo * +make_append_rel_info(Relation parentrel, Relation childrel, + Index parentRTindex, Index childRTindex) +{ + AppendRelInfo *appinfo = makeNode(AppendRelInfo); + + appinfo->parent_relid = parentRTindex; + appinfo->child_relid = childRTindex; + appinfo->parent_reltype = parentrel->rd_rel->reltype; + appinfo->child_reltype = childrel->rd_rel->reltype; + make_inh_translation_list(parentrel, childrel, childRTindex, appinfo); + appinfo->parent_reloid = RelationGetRelid(parentrel); + + return appinfo; +} + +/* + * make_inh_translation_list + * Build the list of translations from parent Vars to child Vars for + * an inheritance child, as well as a reverse-translation array. + * + * The reverse-translation array has an entry for each child relation + * column, which is either the 1-based index of the corresponding parent + * column, or 0 if there's no match (that happens for dropped child columns, + * as well as child columns beyond those of the parent, which are allowed in + * traditional inheritance though not partitioning). + * + * For paranoia's sake, we match type/collation as well as attribute name. + */ +static void +make_inh_translation_list(Relation oldrelation, Relation newrelation, + Index newvarno, + AppendRelInfo *appinfo) +{ + List *vars = NIL; + AttrNumber *pcolnos; + TupleDesc old_tupdesc = RelationGetDescr(oldrelation); + TupleDesc new_tupdesc = RelationGetDescr(newrelation); + Oid new_relid = RelationGetRelid(newrelation); + int oldnatts = old_tupdesc->natts; + int newnatts = new_tupdesc->natts; + int old_attno; + int new_attno = 0; + + /* Initialize reverse-translation array with all entries zero */ + appinfo->num_child_cols = newnatts; + appinfo->parent_colnos = pcolnos = + (AttrNumber *) palloc0(newnatts * sizeof(AttrNumber)); + + for (old_attno = 0; old_attno < oldnatts; old_attno++) + { + Form_pg_attribute att; + char *attname; + Oid atttypid; + int32 atttypmod; + Oid attcollation; + + att = TupleDescAttr(old_tupdesc, old_attno); + if (att->attisdropped) + { + /* Just put NULL into this list entry */ + vars = lappend(vars, NULL); + continue; + } + attname = NameStr(att->attname); + atttypid = att->atttypid; + atttypmod = att->atttypmod; + attcollation = att->attcollation; + + /* + * When we are generating the "translation list" for the parent table + * of an inheritance set, no need to search for matches. + */ + if (oldrelation == newrelation) + { + vars = lappend(vars, makeVar(newvarno, + (AttrNumber) (old_attno + 1), + atttypid, + atttypmod, + attcollation, + 0)); + pcolnos[old_attno] = old_attno + 1; + continue; + } + + /* + * Otherwise we have to search for the matching column by name. + * There's no guarantee it'll have the same column position, because + * of cases like ALTER TABLE ADD COLUMN and multiple inheritance. + * However, in simple cases, the relative order of columns is mostly + * the same in both relations, so try the column of newrelation that + * follows immediately after the one that we just found, and if that + * fails, let syscache handle it. + */ + if (new_attno >= newnatts || + (att = TupleDescAttr(new_tupdesc, new_attno))->attisdropped || + strcmp(attname, NameStr(att->attname)) != 0) + { + HeapTuple newtup; + + newtup = SearchSysCacheAttName(new_relid, attname); + if (!HeapTupleIsValid(newtup)) + elog(ERROR, "could not find inherited attribute \"%s\" of relation \"%s\"", + attname, RelationGetRelationName(newrelation)); + new_attno = ((Form_pg_attribute) GETSTRUCT(newtup))->attnum - 1; + Assert(new_attno >= 0 && new_attno < newnatts); + ReleaseSysCache(newtup); + + att = TupleDescAttr(new_tupdesc, new_attno); + } + + /* Found it, check type and collation match */ + if (atttypid != att->atttypid || atttypmod != att->atttypmod) + elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's type", + attname, RelationGetRelationName(newrelation)); + if (attcollation != att->attcollation) + elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's collation", + attname, RelationGetRelationName(newrelation)); + + vars = lappend(vars, makeVar(newvarno, + (AttrNumber) (new_attno + 1), + atttypid, + atttypmod, + attcollation, + 0)); + pcolnos[new_attno] = old_attno + 1; + new_attno++; + } + + appinfo->translated_vars = vars; +} + +/* + * adjust_appendrel_attrs + * Copy the specified query or expression and translate Vars referring to a + * parent rel to refer to the corresponding child rel instead. We also + * update rtindexes appearing outside Vars, such as resultRelation and + * jointree relids. + * + * Note: this is only applied after conversion of sublinks to subplans, + * so we don't need to cope with recursion into sub-queries. + * + * Note: this is not hugely different from what pullup_replace_vars() does; + * maybe we should try to fold the two routines together. + */ +Node * +adjust_appendrel_attrs(PlannerInfo *root, Node *node, int nappinfos, + AppendRelInfo **appinfos) +{ + adjust_appendrel_attrs_context context; + + context.root = root; + context.nappinfos = nappinfos; + context.appinfos = appinfos; + + /* If there's nothing to adjust, don't call this function. */ + Assert(nappinfos >= 1 && appinfos != NULL); + + /* Should never be translating a Query tree. */ + Assert(node == NULL || !IsA(node, Query)); + + return adjust_appendrel_attrs_mutator(node, &context); +} + +static Node * +adjust_appendrel_attrs_mutator(Node *node, + adjust_appendrel_attrs_context *context) +{ + AppendRelInfo **appinfos = context->appinfos; + int nappinfos = context->nappinfos; + int cnt; + + if (node == NULL) + return NULL; + if (IsA(node, Var)) + { + Var *var = (Var *) copyObject(node); + AppendRelInfo *appinfo = NULL; + + if (var->varlevelsup != 0) + return (Node *) var; /* no changes needed */ + + for (cnt = 0; cnt < nappinfos; cnt++) + { + if (var->varno == appinfos[cnt]->parent_relid) + { + appinfo = appinfos[cnt]; + break; + } + } + + if (appinfo) + { + var->varno = appinfo->child_relid; + /* it's now a generated Var, so drop any syntactic labeling */ + var->varnosyn = 0; + var->varattnosyn = 0; + if (var->varattno > 0) + { + Node *newnode; + + if (var->varattno > list_length(appinfo->translated_vars)) + elog(ERROR, "attribute %d of relation \"%s\" does not exist", + var->varattno, get_rel_name(appinfo->parent_reloid)); + newnode = copyObject(list_nth(appinfo->translated_vars, + var->varattno - 1)); + if (newnode == NULL) + elog(ERROR, "attribute %d of relation \"%s\" does not exist", + var->varattno, get_rel_name(appinfo->parent_reloid)); + return newnode; + } + else if (var->varattno == 0) + { + /* + * Whole-row Var: if we are dealing with named rowtypes, we + * can use a whole-row Var for the child table plus a coercion + * step to convert the tuple layout to the parent's rowtype. + * Otherwise we have to generate a RowExpr. + */ + if (OidIsValid(appinfo->child_reltype)) + { + Assert(var->vartype == appinfo->parent_reltype); + if (appinfo->parent_reltype != appinfo->child_reltype) + { + ConvertRowtypeExpr *r = makeNode(ConvertRowtypeExpr); + + r->arg = (Expr *) var; + r->resulttype = appinfo->parent_reltype; + r->convertformat = COERCE_IMPLICIT_CAST; + r->location = -1; + /* Make sure the Var node has the right type ID, too */ + var->vartype = appinfo->child_reltype; + return (Node *) r; + } + } + else + { + /* + * Build a RowExpr containing the translated variables. + * + * In practice var->vartype will always be RECORDOID here, + * so we need to come up with some suitable column names. + * We use the parent RTE's column names. + * + * Note: we can't get here for inheritance cases, so there + * is no need to worry that translated_vars might contain + * some dummy NULLs. + */ + RowExpr *rowexpr; + List *fields; + RangeTblEntry *rte; + + rte = rt_fetch(appinfo->parent_relid, + context->root->parse->rtable); + fields = copyObject(appinfo->translated_vars); + rowexpr = makeNode(RowExpr); + rowexpr->args = fields; + rowexpr->row_typeid = var->vartype; + rowexpr->row_format = COERCE_IMPLICIT_CAST; + rowexpr->colnames = copyObject(rte->eref->colnames); + rowexpr->location = -1; + + return (Node *) rowexpr; + } + } + /* system attributes don't need any other translation */ + } + else if (var->varno == ROWID_VAR) + { + /* + * If it's a ROWID_VAR placeholder, see if we've reached a leaf + * target rel, for which we can translate the Var to a specific + * instantiation. We should never be asked to translate to a set + * of relids containing more than one leaf target rel, so the + * answer will be unique. If we're still considering non-leaf + * inheritance levels, return the ROWID_VAR Var as-is. + */ + Relids leaf_result_relids = context->root->leaf_result_relids; + Index leaf_relid = 0; + + for (cnt = 0; cnt < nappinfos; cnt++) + { + if (bms_is_member(appinfos[cnt]->child_relid, + leaf_result_relids)) + { + if (leaf_relid) + elog(ERROR, "cannot translate to multiple leaf relids"); + leaf_relid = appinfos[cnt]->child_relid; + } + } + + if (leaf_relid) + { + RowIdentityVarInfo *ridinfo = (RowIdentityVarInfo *) + list_nth(context->root->row_identity_vars, var->varattno - 1); + + if (bms_is_member(leaf_relid, ridinfo->rowidrels)) + { + /* Substitute the Var given in the RowIdentityVarInfo */ + var = copyObject(ridinfo->rowidvar); + /* ... but use the correct relid */ + var->varno = leaf_relid; + /* varnosyn in the RowIdentityVarInfo is probably wrong */ + var->varnosyn = 0; + var->varattnosyn = 0; + } + else + { + /* + * This leaf rel can't return the desired value, so + * substitute a NULL of the correct type. + */ + return (Node *) makeNullConst(var->vartype, + var->vartypmod, + var->varcollid); + } + } + } + return (Node *) var; + } + if (IsA(node, CurrentOfExpr)) + { + CurrentOfExpr *cexpr = (CurrentOfExpr *) copyObject(node); + + for (cnt = 0; cnt < nappinfos; cnt++) + { + AppendRelInfo *appinfo = appinfos[cnt]; + + if (cexpr->cvarno == appinfo->parent_relid) + { + cexpr->cvarno = appinfo->child_relid; + break; + } + } + return (Node *) cexpr; + } + if (IsA(node, PlaceHolderVar)) + { + /* Copy the PlaceHolderVar node with correct mutation of subnodes */ + PlaceHolderVar *phv; + + phv = (PlaceHolderVar *) expression_tree_mutator(node, + adjust_appendrel_attrs_mutator, + (void *) context); + /* now fix PlaceHolderVar's relid sets */ + if (phv->phlevelsup == 0) + phv->phrels = adjust_child_relids(phv->phrels, context->nappinfos, + context->appinfos); + return (Node *) phv; + } + /* Shouldn't need to handle planner auxiliary nodes here */ + Assert(!IsA(node, SpecialJoinInfo)); + Assert(!IsA(node, AppendRelInfo)); + Assert(!IsA(node, PlaceHolderInfo)); + Assert(!IsA(node, MinMaxAggInfo)); + + /* + * We have to process RestrictInfo nodes specially. (Note: although + * set_append_rel_pathlist will hide RestrictInfos in the parent's + * baserestrictinfo list from us, it doesn't hide those in joininfo.) + */ + if (IsA(node, RestrictInfo)) + { + RestrictInfo *oldinfo = (RestrictInfo *) node; + RestrictInfo *newinfo = makeNode(RestrictInfo); + + /* Copy all flat-copiable fields */ + memcpy(newinfo, oldinfo, sizeof(RestrictInfo)); + + /* Recursively fix the clause itself */ + newinfo->clause = (Expr *) + adjust_appendrel_attrs_mutator((Node *) oldinfo->clause, context); + + /* and the modified version, if an OR clause */ + newinfo->orclause = (Expr *) + adjust_appendrel_attrs_mutator((Node *) oldinfo->orclause, context); + + /* adjust relid sets too */ + newinfo->clause_relids = adjust_child_relids(oldinfo->clause_relids, + context->nappinfos, + context->appinfos); + newinfo->required_relids = adjust_child_relids(oldinfo->required_relids, + context->nappinfos, + context->appinfos); + newinfo->outer_relids = adjust_child_relids(oldinfo->outer_relids, + context->nappinfos, + context->appinfos); + newinfo->nullable_relids = adjust_child_relids(oldinfo->nullable_relids, + context->nappinfos, + context->appinfos); + newinfo->left_relids = adjust_child_relids(oldinfo->left_relids, + context->nappinfos, + context->appinfos); + newinfo->right_relids = adjust_child_relids(oldinfo->right_relids, + context->nappinfos, + context->appinfos); + + /* + * Reset cached derivative fields, since these might need to have + * different values when considering the child relation. Note we + * don't reset left_ec/right_ec: each child variable is implicitly + * equivalent to its parent, so still a member of the same EC if any. + */ + newinfo->eval_cost.startup = -1; + newinfo->norm_selec = -1; + newinfo->outer_selec = -1; + newinfo->left_em = NULL; + newinfo->right_em = NULL; + newinfo->scansel_cache = NIL; + newinfo->left_bucketsize = -1; + newinfo->right_bucketsize = -1; + newinfo->left_mcvfreq = -1; + newinfo->right_mcvfreq = -1; + + return (Node *) newinfo; + } + + /* + * NOTE: we do not need to recurse into sublinks, because they should + * already have been converted to subplans before we see them. + */ + Assert(!IsA(node, SubLink)); + Assert(!IsA(node, Query)); + /* We should never see these Query substructures, either. */ + Assert(!IsA(node, RangeTblRef)); + Assert(!IsA(node, JoinExpr)); + + return expression_tree_mutator(node, adjust_appendrel_attrs_mutator, + (void *) context); +} + +/* + * adjust_appendrel_attrs_multilevel + * Apply Var translations from a toplevel appendrel parent down to a child. + * + * In some cases we need to translate expressions referencing a parent relation + * to reference an appendrel child that's multiple levels removed from it. + */ +Node * +adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node, + Relids child_relids, + Relids top_parent_relids) +{ + AppendRelInfo **appinfos; + Bitmapset *parent_relids = NULL; + int nappinfos; + int cnt; + + Assert(bms_num_members(child_relids) == bms_num_members(top_parent_relids)); + + appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos); + + /* Construct relids set for the immediate parent of given child. */ + for (cnt = 0; cnt < nappinfos; cnt++) + { + AppendRelInfo *appinfo = appinfos[cnt]; + + parent_relids = bms_add_member(parent_relids, appinfo->parent_relid); + } + + /* Recurse if immediate parent is not the top parent. */ + if (!bms_equal(parent_relids, top_parent_relids)) + node = adjust_appendrel_attrs_multilevel(root, node, parent_relids, + top_parent_relids); + + /* Now translate for this child */ + node = adjust_appendrel_attrs(root, node, nappinfos, appinfos); + + pfree(appinfos); + + return node; +} + +/* + * Substitute child relids for parent relids in a Relid set. The array of + * appinfos specifies the substitutions to be performed. + */ +Relids +adjust_child_relids(Relids relids, int nappinfos, AppendRelInfo **appinfos) +{ + Bitmapset *result = NULL; + int cnt; + + for (cnt = 0; cnt < nappinfos; cnt++) + { + AppendRelInfo *appinfo = appinfos[cnt]; + + /* Remove parent, add child */ + if (bms_is_member(appinfo->parent_relid, relids)) + { + /* Make a copy if we are changing the set. */ + if (!result) + result = bms_copy(relids); + + result = bms_del_member(result, appinfo->parent_relid); + result = bms_add_member(result, appinfo->child_relid); + } + } + + /* If we made any changes, return the modified copy. */ + if (result) + return result; + + /* Otherwise, return the original set without modification. */ + return relids; +} + +/* + * Replace any relid present in top_parent_relids with its child in + * child_relids. Members of child_relids can be multiple levels below top + * parent in the partition hierarchy. + */ +Relids +adjust_child_relids_multilevel(PlannerInfo *root, Relids relids, + Relids child_relids, Relids top_parent_relids) +{ + AppendRelInfo **appinfos; + int nappinfos; + Relids parent_relids = NULL; + Relids result; + Relids tmp_result = NULL; + int cnt; + + /* + * If the given relids set doesn't contain any of the top parent relids, + * it will remain unchanged. + */ + if (!bms_overlap(relids, top_parent_relids)) + return relids; + + appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos); + + /* Construct relids set for the immediate parent of the given child. */ + for (cnt = 0; cnt < nappinfos; cnt++) + { + AppendRelInfo *appinfo = appinfos[cnt]; + + parent_relids = bms_add_member(parent_relids, appinfo->parent_relid); + } + + /* Recurse if immediate parent is not the top parent. */ + if (!bms_equal(parent_relids, top_parent_relids)) + { + tmp_result = adjust_child_relids_multilevel(root, relids, + parent_relids, + top_parent_relids); + relids = tmp_result; + } + + result = adjust_child_relids(relids, nappinfos, appinfos); + + /* Free memory consumed by any intermediate result. */ + if (tmp_result) + bms_free(tmp_result); + bms_free(parent_relids); + pfree(appinfos); + + return result; +} + +/* + * adjust_inherited_attnums + * Translate an integer list of attribute numbers from parent to child. + */ +List * +adjust_inherited_attnums(List *attnums, AppendRelInfo *context) +{ + List *result = NIL; + ListCell *lc; + + /* This should only happen for an inheritance case, not UNION ALL */ + Assert(OidIsValid(context->parent_reloid)); + + /* Look up each attribute in the AppendRelInfo's translated_vars list */ + foreach(lc, attnums) + { + AttrNumber parentattno = lfirst_int(lc); + Var *childvar; + + /* Look up the translation of this column: it must be a Var */ + if (parentattno <= 0 || + parentattno > list_length(context->translated_vars)) + elog(ERROR, "attribute %d of relation \"%s\" does not exist", + parentattno, get_rel_name(context->parent_reloid)); + childvar = (Var *) list_nth(context->translated_vars, parentattno - 1); + if (childvar == NULL || !IsA(childvar, Var)) + elog(ERROR, "attribute %d of relation \"%s\" does not exist", + parentattno, get_rel_name(context->parent_reloid)); + + result = lappend_int(result, childvar->varattno); + } + return result; +} + +/* + * adjust_inherited_attnums_multilevel + * As above, but traverse multiple inheritance levels as needed. + */ +List * +adjust_inherited_attnums_multilevel(PlannerInfo *root, List *attnums, + Index child_relid, Index top_parent_relid) +{ + AppendRelInfo *appinfo = root->append_rel_array[child_relid]; + + if (!appinfo) + elog(ERROR, "child rel %d not found in append_rel_array", child_relid); + + /* Recurse if immediate parent is not the top parent. */ + if (appinfo->parent_relid != top_parent_relid) + attnums = adjust_inherited_attnums_multilevel(root, attnums, + appinfo->parent_relid, + top_parent_relid); + + /* Now translate for this child */ + return adjust_inherited_attnums(attnums, appinfo); +} + +/* + * get_translated_update_targetlist + * Get the processed_tlist of an UPDATE query, translated as needed to + * match a child target relation. + * + * Optionally also return the list of target column numbers translated + * to this target relation. (The resnos in processed_tlist MUST NOT be + * relied on for this purpose.) + */ +void +get_translated_update_targetlist(PlannerInfo *root, Index relid, + List **processed_tlist, List **update_colnos) +{ + /* This is pretty meaningless for commands other than UPDATE. */ + Assert(root->parse->commandType == CMD_UPDATE); + if (relid == root->parse->resultRelation) + { + /* + * Non-inheritance case, so it's easy. The caller might be expecting + * a tree it can scribble on, though, so copy. + */ + *processed_tlist = copyObject(root->processed_tlist); + if (update_colnos) + *update_colnos = copyObject(root->update_colnos); + } + else + { + Assert(bms_is_member(relid, root->all_result_relids)); + *processed_tlist = (List *) + adjust_appendrel_attrs_multilevel(root, + (Node *) root->processed_tlist, + bms_make_singleton(relid), + bms_make_singleton(root->parse->resultRelation)); + if (update_colnos) + *update_colnos = + adjust_inherited_attnums_multilevel(root, root->update_colnos, + relid, + root->parse->resultRelation); + } +} + +/* + * find_appinfos_by_relids + * Find AppendRelInfo structures for all relations specified by relids. + * + * The AppendRelInfos are returned in an array, which can be pfree'd by the + * caller. *nappinfos is set to the number of entries in the array. + */ +AppendRelInfo ** +find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos) +{ + AppendRelInfo **appinfos; + int cnt = 0; + int i; + + *nappinfos = bms_num_members(relids); + appinfos = (AppendRelInfo **) palloc(sizeof(AppendRelInfo *) * *nappinfos); + + i = -1; + while ((i = bms_next_member(relids, i)) >= 0) + { + AppendRelInfo *appinfo = root->append_rel_array[i]; + + if (!appinfo) + elog(ERROR, "child rel %d not found in append_rel_array", i); + + appinfos[cnt++] = appinfo; + } + return appinfos; +} + + +/***************************************************************************** + * + * ROW-IDENTITY VARIABLE MANAGEMENT + * + * This code lacks a good home, perhaps. We choose to keep it here because + * adjust_appendrel_attrs_mutator() is its principal co-conspirator. That + * function does most of what is needed to expand ROWID_VAR Vars into the + * right things. + * + *****************************************************************************/ + +/* + * add_row_identity_var + * Register a row-identity column to be used in UPDATE/DELETE/MERGE. + * + * The Var must be equal(), aside from varno, to any other row-identity + * column with the same rowid_name. Thus, for example, "wholerow" + * row identities had better use vartype == RECORDOID. + * + * rtindex is currently redundant with rowid_var->varno, but we specify + * it as a separate parameter in case this is ever generalized to support + * non-Var expressions. (We could reasonably handle expressions over + * Vars of the specified rtindex, but for now that seems unnecessary.) + */ +void +add_row_identity_var(PlannerInfo *root, Var *orig_var, + Index rtindex, const char *rowid_name) +{ + TargetEntry *tle; + Var *rowid_var; + RowIdentityVarInfo *ridinfo; + ListCell *lc; + + /* For now, the argument must be just a Var of the given rtindex */ + Assert(IsA(orig_var, Var)); + Assert(orig_var->varno == rtindex); + Assert(orig_var->varlevelsup == 0); + + /* + * If we're doing non-inherited UPDATE/DELETE/MERGE, there's little need + * for ROWID_VAR shenanigans. Just shove the presented Var into the + * processed_tlist, and we're done. + */ + if (rtindex == root->parse->resultRelation) + { + tle = makeTargetEntry((Expr *) orig_var, + list_length(root->processed_tlist) + 1, + pstrdup(rowid_name), + true); + root->processed_tlist = lappend(root->processed_tlist, tle); + return; + } + + /* + * Otherwise, rtindex should reference a leaf target relation that's being + * added to the query during expand_inherited_rtentry(). + */ + Assert(bms_is_member(rtindex, root->leaf_result_relids)); + Assert(root->append_rel_array[rtindex] != NULL); + + /* + * We have to find a matching RowIdentityVarInfo, or make one if there is + * none. To allow using equal() to match the vars, change the varno to + * ROWID_VAR, leaving all else alone. + */ + rowid_var = copyObject(orig_var); + /* This could eventually become ChangeVarNodes() */ + rowid_var->varno = ROWID_VAR; + + /* Look for an existing row-id column of the same name */ + foreach(lc, root->row_identity_vars) + { + ridinfo = (RowIdentityVarInfo *) lfirst(lc); + if (strcmp(rowid_name, ridinfo->rowidname) != 0) + continue; + if (equal(rowid_var, ridinfo->rowidvar)) + { + /* Found a match; we need only record that rtindex needs it too */ + ridinfo->rowidrels = bms_add_member(ridinfo->rowidrels, rtindex); + return; + } + else + { + /* Ooops, can't handle this */ + elog(ERROR, "conflicting uses of row-identity name \"%s\"", + rowid_name); + } + } + + /* No request yet, so add a new RowIdentityVarInfo */ + ridinfo = makeNode(RowIdentityVarInfo); + ridinfo->rowidvar = copyObject(rowid_var); + /* for the moment, estimate width using just the datatype info */ + ridinfo->rowidwidth = get_typavgwidth(exprType((Node *) rowid_var), + exprTypmod((Node *) rowid_var)); + ridinfo->rowidname = pstrdup(rowid_name); + ridinfo->rowidrels = bms_make_singleton(rtindex); + + root->row_identity_vars = lappend(root->row_identity_vars, ridinfo); + + /* Change rowid_var into a reference to this row_identity_vars entry */ + rowid_var->varattno = list_length(root->row_identity_vars); + + /* Push the ROWID_VAR reference variable into processed_tlist */ + tle = makeTargetEntry((Expr *) rowid_var, + list_length(root->processed_tlist) + 1, + pstrdup(rowid_name), + true); + root->processed_tlist = lappend(root->processed_tlist, tle); +} + +/* + * add_row_identity_columns + * + * This function adds the row identity columns needed by the core code. + * FDWs might call add_row_identity_var() for themselves to add nonstandard + * columns. (Duplicate requests are fine.) + */ +void +add_row_identity_columns(PlannerInfo *root, Index rtindex, + RangeTblEntry *target_rte, + Relation target_relation) +{ + CmdType commandType = root->parse->commandType; + char relkind = target_relation->rd_rel->relkind; + Var *var; + + Assert(commandType == CMD_UPDATE || commandType == CMD_DELETE || commandType == CMD_MERGE); + + if (commandType == CMD_MERGE || + relkind == RELKIND_RELATION || + relkind == RELKIND_MATVIEW || + relkind == RELKIND_PARTITIONED_TABLE) + { + /* + * Emit CTID so that executor can find the row to merge, update or + * delete. + */ + var = makeVar(rtindex, + SelfItemPointerAttributeNumber, + TIDOID, + -1, + InvalidOid, + 0); + add_row_identity_var(root, var, rtindex, "ctid"); + } + else if (relkind == RELKIND_FOREIGN_TABLE) + { + /* + * Let the foreign table's FDW add whatever junk TLEs it wants. + */ + FdwRoutine *fdwroutine; + + fdwroutine = GetFdwRoutineForRelation(target_relation, false); + + if (fdwroutine->AddForeignUpdateTargets != NULL) + fdwroutine->AddForeignUpdateTargets(root, rtindex, + target_rte, target_relation); + + /* + * For UPDATE, we need to make the FDW fetch unchanged columns by + * asking it to fetch a whole-row Var. That's because the top-level + * targetlist only contains entries for changed columns, but + * ExecUpdate will need to build the complete new tuple. (Actually, + * we only really need this in UPDATEs that are not pushed to the + * remote side, but it's hard to tell if that will be the case at the + * point when this function is called.) + * + * We will also need the whole row if there are any row triggers, so + * that the executor will have the "old" row to pass to the trigger. + * Alas, this misses system columns. + */ + if (commandType == CMD_UPDATE || + (target_relation->trigdesc && + (target_relation->trigdesc->trig_delete_after_row || + target_relation->trigdesc->trig_delete_before_row))) + { + var = makeVar(rtindex, + InvalidAttrNumber, + RECORDOID, + -1, + InvalidOid, + 0); + add_row_identity_var(root, var, rtindex, "wholerow"); + } + } +} + +/* + * distribute_row_identity_vars + * + * After we have finished identifying all the row identity columns + * needed by an inherited UPDATE/DELETE/MERGE query, make sure that + * these columns will be generated by all the target relations. + * + * This is more or less like what build_base_rel_tlists() does, + * except that it would not understand what to do with ROWID_VAR Vars. + * Since that function runs before inheritance relations are expanded, + * it will never see any such Vars anyway. + */ +void +distribute_row_identity_vars(PlannerInfo *root) +{ + Query *parse = root->parse; + int result_relation = parse->resultRelation; + RangeTblEntry *target_rte; + RelOptInfo *target_rel; + ListCell *lc; + + /* + * There's nothing to do if this isn't an inherited UPDATE/DELETE/MERGE. + */ + if (parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE && + parse->commandType != CMD_MERGE) + { + Assert(root->row_identity_vars == NIL); + return; + } + target_rte = rt_fetch(result_relation, parse->rtable); + if (!target_rte->inh) + { + Assert(root->row_identity_vars == NIL); + return; + } + + /* + * Ordinarily, we expect that leaf result relation(s) will have added some + * ROWID_VAR Vars to the query. However, it's possible that constraint + * exclusion suppressed every leaf relation. The executor will get upset + * if the plan has no row identity columns at all, even though it will + * certainly process no rows. Handle this edge case by re-opening the top + * result relation and adding the row identity columns it would have used, + * as preprocess_targetlist() would have done if it weren't marked "inh". + * Then re-run build_base_rel_tlists() to ensure that the added columns + * get propagated to the relation's reltarget. (This is a bit ugly, but + * it seems better to confine the ugliness and extra cycles to this + * unusual corner case.) + */ + if (root->row_identity_vars == NIL) + { + Relation target_relation; + + target_relation = table_open(target_rte->relid, NoLock); + add_row_identity_columns(root, result_relation, + target_rte, target_relation); + table_close(target_relation, NoLock); + build_base_rel_tlists(root, root->processed_tlist); + /* There are no ROWID_VAR Vars in this case, so we're done. */ + return; + } + + /* + * Dig through the processed_tlist to find the ROWID_VAR reference Vars, + * and forcibly copy them into the reltarget list of the topmost target + * relation. That's sufficient because they'll be copied to the + * individual leaf target rels (with appropriate translation) later, + * during appendrel expansion --- see set_append_rel_size(). + */ + target_rel = find_base_rel(root, result_relation); + + foreach(lc, root->processed_tlist) + { + TargetEntry *tle = lfirst(lc); + Var *var = (Var *) tle->expr; + + if (var && IsA(var, Var) && var->varno == ROWID_VAR) + { + target_rel->reltarget->exprs = + lappend(target_rel->reltarget->exprs, copyObject(var)); + /* reltarget cost and width will be computed later */ + } + } +} diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c new file mode 100644 index 0000000..f2216f5 --- /dev/null +++ b/src/backend/optimizer/util/clauses.c @@ -0,0 +1,5255 @@ +/*------------------------------------------------------------------------- + * + * clauses.c + * routines to manipulate qualification clauses + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/clauses.c + * + * HISTORY + * AUTHOR DATE MAJOR EVENT + * Andrew Yu Nov 3, 1994 clause.c and clauses.c combined + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_class.h" +#include "catalog/pg_language.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "executor/functions.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/subscripting.h" +#include "nodes/supportnodes.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/plancat.h" +#include "optimizer/planmain.h" +#include "parser/analyze.h" +#include "parser/parse_agg.h" +#include "parser/parse_coerce.h" +#include "parser/parse_func.h" +#include "rewrite/rewriteHandler.h" +#include "rewrite/rewriteManip.h" +#include "tcop/tcopprot.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + +typedef struct +{ + ParamListInfo boundParams; + PlannerInfo *root; + List *active_fns; + Node *case_val; + bool estimate; +} eval_const_expressions_context; + +typedef struct +{ + int nargs; + List *args; + int *usecounts; +} substitute_actual_parameters_context; + +typedef struct +{ + int nargs; + List *args; + int sublevels_up; +} substitute_actual_srf_parameters_context; + +typedef struct +{ + char *proname; + char *prosrc; +} inline_error_callback_arg; + +typedef struct +{ + char max_hazard; /* worst proparallel hazard found so far */ + char max_interesting; /* worst proparallel hazard of interest */ + List *safe_param_ids; /* PARAM_EXEC Param IDs to treat as safe */ +} max_parallel_hazard_context; + +static bool contain_agg_clause_walker(Node *node, void *context); +static bool find_window_functions_walker(Node *node, WindowFuncLists *lists); +static bool contain_subplans_walker(Node *node, void *context); +static bool contain_mutable_functions_walker(Node *node, void *context); +static bool contain_volatile_functions_walker(Node *node, void *context); +static bool contain_volatile_functions_not_nextval_walker(Node *node, void *context); +static bool max_parallel_hazard_walker(Node *node, + max_parallel_hazard_context *context); +static bool contain_nonstrict_functions_walker(Node *node, void *context); +static bool contain_exec_param_walker(Node *node, List *param_ids); +static bool contain_context_dependent_node(Node *clause); +static bool contain_context_dependent_node_walker(Node *node, int *flags); +static bool contain_leaked_vars_walker(Node *node, void *context); +static Relids find_nonnullable_rels_walker(Node *node, bool top_level); +static List *find_nonnullable_vars_walker(Node *node, bool top_level); +static bool is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK); +static bool convert_saop_to_hashed_saop_walker(Node *node, void *context); +static Node *eval_const_expressions_mutator(Node *node, + eval_const_expressions_context *context); +static bool contain_non_const_walker(Node *node, void *context); +static bool ece_function_is_safe(Oid funcid, + eval_const_expressions_context *context); +static List *simplify_or_arguments(List *args, + eval_const_expressions_context *context, + bool *haveNull, bool *forceTrue); +static List *simplify_and_arguments(List *args, + eval_const_expressions_context *context, + bool *haveNull, bool *forceFalse); +static Node *simplify_boolean_equality(Oid opno, List *args); +static Expr *simplify_function(Oid funcid, + Oid result_type, int32 result_typmod, + Oid result_collid, Oid input_collid, List **args_p, + bool funcvariadic, bool process_args, bool allow_non_const, + eval_const_expressions_context *context); +static List *reorder_function_arguments(List *args, int pronargs, + HeapTuple func_tuple); +static List *add_function_defaults(List *args, int pronargs, + HeapTuple func_tuple); +static List *fetch_function_defaults(HeapTuple func_tuple); +static void recheck_cast_function_args(List *args, Oid result_type, + Oid *proargtypes, int pronargs, + HeapTuple func_tuple); +static Expr *evaluate_function(Oid funcid, Oid result_type, int32 result_typmod, + Oid result_collid, Oid input_collid, List *args, + bool funcvariadic, + HeapTuple func_tuple, + eval_const_expressions_context *context); +static Expr *inline_function(Oid funcid, Oid result_type, Oid result_collid, + Oid input_collid, List *args, + bool funcvariadic, + HeapTuple func_tuple, + eval_const_expressions_context *context); +static Node *substitute_actual_parameters(Node *expr, int nargs, List *args, + int *usecounts); +static Node *substitute_actual_parameters_mutator(Node *node, + substitute_actual_parameters_context *context); +static void sql_inline_error_callback(void *arg); +static Query *substitute_actual_srf_parameters(Query *expr, + int nargs, List *args); +static Node *substitute_actual_srf_parameters_mutator(Node *node, + substitute_actual_srf_parameters_context *context); +static bool pull_paramids_walker(Node *node, Bitmapset **context); + + +/***************************************************************************** + * Aggregate-function clause manipulation + *****************************************************************************/ + +/* + * contain_agg_clause + * Recursively search for Aggref/GroupingFunc nodes within a clause. + * + * Returns true if any aggregate found. + * + * This does not descend into subqueries, and so should be used only after + * reduction of sublinks to subplans, or in contexts where it's known there + * are no subqueries. There mustn't be outer-aggregate references either. + * + * (If you want something like this but able to deal with subqueries, + * see rewriteManip.c's contain_aggs_of_level().) + */ +bool +contain_agg_clause(Node *clause) +{ + return contain_agg_clause_walker(clause, NULL); +} + +static bool +contain_agg_clause_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + Assert(((Aggref *) node)->agglevelsup == 0); + return true; /* abort the tree traversal and return true */ + } + if (IsA(node, GroupingFunc)) + { + Assert(((GroupingFunc *) node)->agglevelsup == 0); + return true; /* abort the tree traversal and return true */ + } + Assert(!IsA(node, SubLink)); + return expression_tree_walker(node, contain_agg_clause_walker, context); +} + +/***************************************************************************** + * Window-function clause manipulation + *****************************************************************************/ + +/* + * contain_window_function + * Recursively search for WindowFunc nodes within a clause. + * + * Since window functions don't have level fields, but are hard-wired to + * be associated with the current query level, this is just the same as + * rewriteManip.c's function. + */ +bool +contain_window_function(Node *clause) +{ + return contain_windowfuncs(clause); +} + +/* + * find_window_functions + * Locate all the WindowFunc nodes in an expression tree, and organize + * them by winref ID number. + * + * Caller must provide an upper bound on the winref IDs expected in the tree. + */ +WindowFuncLists * +find_window_functions(Node *clause, Index maxWinRef) +{ + WindowFuncLists *lists = palloc(sizeof(WindowFuncLists)); + + lists->numWindowFuncs = 0; + lists->maxWinRef = maxWinRef; + lists->windowFuncs = (List **) palloc0((maxWinRef + 1) * sizeof(List *)); + (void) find_window_functions_walker(clause, lists); + return lists; +} + +static bool +find_window_functions_walker(Node *node, WindowFuncLists *lists) +{ + if (node == NULL) + return false; + if (IsA(node, WindowFunc)) + { + WindowFunc *wfunc = (WindowFunc *) node; + + /* winref is unsigned, so one-sided test is OK */ + if (wfunc->winref > lists->maxWinRef) + elog(ERROR, "WindowFunc contains out-of-range winref %u", + wfunc->winref); + /* eliminate duplicates, so that we avoid repeated computation */ + if (!list_member(lists->windowFuncs[wfunc->winref], wfunc)) + { + lists->windowFuncs[wfunc->winref] = + lappend(lists->windowFuncs[wfunc->winref], wfunc); + lists->numWindowFuncs++; + } + + /* + * We assume that the parser checked that there are no window + * functions in the arguments or filter clause. Hence, we need not + * recurse into them. (If either the parser or the planner screws up + * on this point, the executor will still catch it; see ExecInitExpr.) + */ + return false; + } + Assert(!IsA(node, SubLink)); + return expression_tree_walker(node, find_window_functions_walker, + (void *) lists); +} + + +/***************************************************************************** + * Support for expressions returning sets + *****************************************************************************/ + +/* + * expression_returns_set_rows + * Estimate the number of rows returned by a set-returning expression. + * The result is 1 if it's not a set-returning expression. + * + * We should only examine the top-level function or operator; it used to be + * appropriate to recurse, but not anymore. (Even if there are more SRFs in + * the function's inputs, their multipliers are accounted for separately.) + * + * Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c. + */ +double +expression_returns_set_rows(PlannerInfo *root, Node *clause) +{ + if (clause == NULL) + return 1.0; + if (IsA(clause, FuncExpr)) + { + FuncExpr *expr = (FuncExpr *) clause; + + if (expr->funcretset) + return clamp_row_est(get_function_rows(root, expr->funcid, clause)); + } + if (IsA(clause, OpExpr)) + { + OpExpr *expr = (OpExpr *) clause; + + if (expr->opretset) + { + set_opfuncid(expr); + return clamp_row_est(get_function_rows(root, expr->opfuncid, clause)); + } + } + return 1.0; +} + + +/***************************************************************************** + * Subplan clause manipulation + *****************************************************************************/ + +/* + * contain_subplans + * Recursively search for subplan nodes within a clause. + * + * If we see a SubLink node, we will return true. This is only possible if + * the expression tree hasn't yet been transformed by subselect.c. We do not + * know whether the node will produce a true subplan or just an initplan, + * but we make the conservative assumption that it will be a subplan. + * + * Returns true if any subplan found. + */ +bool +contain_subplans(Node *clause) +{ + return contain_subplans_walker(clause, NULL); +} + +static bool +contain_subplans_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, SubPlan) || + IsA(node, AlternativeSubPlan) || + IsA(node, SubLink)) + return true; /* abort the tree traversal and return true */ + return expression_tree_walker(node, contain_subplans_walker, context); +} + + +/***************************************************************************** + * Check clauses for mutable functions + *****************************************************************************/ + +/* + * contain_mutable_functions + * Recursively search for mutable functions within a clause. + * + * Returns true if any mutable function (or operator implemented by a + * mutable function) is found. This test is needed so that we don't + * mistakenly think that something like "WHERE random() < 0.5" can be treated + * as a constant qualification. + * + * We will recursively look into Query nodes (i.e., SubLink sub-selects) + * but not into SubPlans. See comments for contain_volatile_functions(). + */ +bool +contain_mutable_functions(Node *clause) +{ + return contain_mutable_functions_walker(clause, NULL); +} + +static bool +contain_mutable_functions_checker(Oid func_id, void *context) +{ + return (func_volatile(func_id) != PROVOLATILE_IMMUTABLE); +} + +static bool +contain_mutable_functions_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + /* Check for mutable functions in node itself */ + if (check_functions_in_node(node, contain_mutable_functions_checker, + context)) + return true; + + if (IsA(node, SQLValueFunction)) + { + /* all variants of SQLValueFunction are stable */ + return true; + } + + if (IsA(node, NextValueExpr)) + { + /* NextValueExpr is volatile */ + return true; + } + + /* + * It should be safe to treat MinMaxExpr as immutable, because it will + * depend on a non-cross-type btree comparison function, and those should + * always be immutable. Treating XmlExpr as immutable is more dubious, + * and treating CoerceToDomain as immutable is outright dangerous. But we + * have done so historically, and changing this would probably cause more + * problems than it would fix. In practice, if you have a non-immutable + * domain constraint you are in for pain anyhow. + */ + + /* Recurse to check arguments */ + if (IsA(node, Query)) + { + /* Recurse into subselects */ + return query_tree_walker((Query *) node, + contain_mutable_functions_walker, + context, 0); + } + return expression_tree_walker(node, contain_mutable_functions_walker, + context); +} + + +/***************************************************************************** + * Check clauses for volatile functions + *****************************************************************************/ + +/* + * contain_volatile_functions + * Recursively search for volatile functions within a clause. + * + * Returns true if any volatile function (or operator implemented by a + * volatile function) is found. This test prevents, for example, + * invalid conversions of volatile expressions into indexscan quals. + * + * We will recursively look into Query nodes (i.e., SubLink sub-selects) + * but not into SubPlans. This is a bit odd, but intentional. If we are + * looking at a SubLink, we are probably deciding whether a query tree + * transformation is safe, and a contained sub-select should affect that; + * for example, duplicating a sub-select containing a volatile function + * would be bad. However, once we've got to the stage of having SubPlans, + * subsequent planning need not consider volatility within those, since + * the executor won't change its evaluation rules for a SubPlan based on + * volatility. + * + * For some node types, for example, RestrictInfo and PathTarget, we cache + * whether we found any volatile functions or not and reuse that value in any + * future checks for that node. All of the logic for determining if the + * cached value should be set to VOLATILITY_NOVOLATILE or VOLATILITY_VOLATILE + * belongs in this function. Any code which makes changes to these nodes + * which could change the outcome this function must set the cached value back + * to VOLATILITY_UNKNOWN. That allows this function to redetermine the + * correct value during the next call, should we need to redetermine if the + * node contains any volatile functions again in the future. + */ +bool +contain_volatile_functions(Node *clause) +{ + return contain_volatile_functions_walker(clause, NULL); +} + +static bool +contain_volatile_functions_checker(Oid func_id, void *context) +{ + return (func_volatile(func_id) == PROVOLATILE_VOLATILE); +} + +static bool +contain_volatile_functions_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + /* Check for volatile functions in node itself */ + if (check_functions_in_node(node, contain_volatile_functions_checker, + context)) + return true; + + if (IsA(node, NextValueExpr)) + { + /* NextValueExpr is volatile */ + return true; + } + + if (IsA(node, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) node; + + /* + * For RestrictInfo, check if we've checked the volatility of it + * before. If so, we can just use the cached value and not bother + * checking it again. Otherwise, check it and cache if whether we + * found any volatile functions. + */ + if (rinfo->has_volatile == VOLATILITY_NOVOLATILE) + return false; + else if (rinfo->has_volatile == VOLATILITY_VOLATILE) + return true; + else + { + bool hasvolatile; + + hasvolatile = contain_volatile_functions_walker((Node *) rinfo->clause, + context); + if (hasvolatile) + rinfo->has_volatile = VOLATILITY_VOLATILE; + else + rinfo->has_volatile = VOLATILITY_NOVOLATILE; + + return hasvolatile; + } + } + + if (IsA(node, PathTarget)) + { + PathTarget *target = (PathTarget *) node; + + /* + * We also do caching for PathTarget the same as we do above for + * RestrictInfos. + */ + if (target->has_volatile_expr == VOLATILITY_NOVOLATILE) + return false; + else if (target->has_volatile_expr == VOLATILITY_VOLATILE) + return true; + else + { + bool hasvolatile; + + hasvolatile = contain_volatile_functions_walker((Node *) target->exprs, + context); + + if (hasvolatile) + target->has_volatile_expr = VOLATILITY_VOLATILE; + else + target->has_volatile_expr = VOLATILITY_NOVOLATILE; + + return hasvolatile; + } + } + + /* + * See notes in contain_mutable_functions_walker about why we treat + * MinMaxExpr, XmlExpr, and CoerceToDomain as immutable, while + * SQLValueFunction is stable. Hence, none of them are of interest here. + */ + + /* Recurse to check arguments */ + if (IsA(node, Query)) + { + /* Recurse into subselects */ + return query_tree_walker((Query *) node, + contain_volatile_functions_walker, + context, 0); + } + return expression_tree_walker(node, contain_volatile_functions_walker, + context); +} + +/* + * Special purpose version of contain_volatile_functions() for use in COPY: + * ignore nextval(), but treat all other functions normally. + */ +bool +contain_volatile_functions_not_nextval(Node *clause) +{ + return contain_volatile_functions_not_nextval_walker(clause, NULL); +} + +static bool +contain_volatile_functions_not_nextval_checker(Oid func_id, void *context) +{ + return (func_id != F_NEXTVAL && + func_volatile(func_id) == PROVOLATILE_VOLATILE); +} + +static bool +contain_volatile_functions_not_nextval_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + /* Check for volatile functions in node itself */ + if (check_functions_in_node(node, + contain_volatile_functions_not_nextval_checker, + context)) + return true; + + /* + * See notes in contain_mutable_functions_walker about why we treat + * MinMaxExpr, XmlExpr, and CoerceToDomain as immutable, while + * SQLValueFunction is stable. Hence, none of them are of interest here. + * Also, since we're intentionally ignoring nextval(), presumably we + * should ignore NextValueExpr. + */ + + /* Recurse to check arguments */ + if (IsA(node, Query)) + { + /* Recurse into subselects */ + return query_tree_walker((Query *) node, + contain_volatile_functions_not_nextval_walker, + context, 0); + } + return expression_tree_walker(node, + contain_volatile_functions_not_nextval_walker, + context); +} + + +/***************************************************************************** + * Check queries for parallel unsafe and/or restricted constructs + *****************************************************************************/ + +/* + * max_parallel_hazard + * Find the worst parallel-hazard level in the given query + * + * Returns the worst function hazard property (the earliest in this list: + * PROPARALLEL_UNSAFE, PROPARALLEL_RESTRICTED, PROPARALLEL_SAFE) that can + * be found in the given parsetree. We use this to find out whether the query + * can be parallelized at all. The caller will also save the result in + * PlannerGlobal so as to short-circuit checks of portions of the querytree + * later, in the common case where everything is SAFE. + */ +char +max_parallel_hazard(Query *parse) +{ + max_parallel_hazard_context context; + + context.max_hazard = PROPARALLEL_SAFE; + context.max_interesting = PROPARALLEL_UNSAFE; + context.safe_param_ids = NIL; + (void) max_parallel_hazard_walker((Node *) parse, &context); + return context.max_hazard; +} + +/* + * is_parallel_safe + * Detect whether the given expr contains only parallel-safe functions + * + * root->glob->maxParallelHazard must previously have been set to the + * result of max_parallel_hazard() on the whole query. + */ +bool +is_parallel_safe(PlannerInfo *root, Node *node) +{ + max_parallel_hazard_context context; + PlannerInfo *proot; + ListCell *l; + + /* + * Even if the original querytree contained nothing unsafe, we need to + * search the expression if we have generated any PARAM_EXEC Params while + * planning, because those are parallel-restricted and there might be one + * in this expression. But otherwise we don't need to look. + */ + if (root->glob->maxParallelHazard == PROPARALLEL_SAFE && + root->glob->paramExecTypes == NIL) + return true; + /* Else use max_parallel_hazard's search logic, but stop on RESTRICTED */ + context.max_hazard = PROPARALLEL_SAFE; + context.max_interesting = PROPARALLEL_RESTRICTED; + context.safe_param_ids = NIL; + + /* + * The params that refer to the same or parent query level are considered + * parallel-safe. The idea is that we compute such params at Gather or + * Gather Merge node and pass their value to workers. + */ + for (proot = root; proot != NULL; proot = proot->parent_root) + { + foreach(l, proot->init_plans) + { + SubPlan *initsubplan = (SubPlan *) lfirst(l); + + context.safe_param_ids = list_concat(context.safe_param_ids, + initsubplan->setParam); + } + } + + return !max_parallel_hazard_walker(node, &context); +} + +/* core logic for all parallel-hazard checks */ +static bool +max_parallel_hazard_test(char proparallel, max_parallel_hazard_context *context) +{ + switch (proparallel) + { + case PROPARALLEL_SAFE: + /* nothing to see here, move along */ + break; + case PROPARALLEL_RESTRICTED: + /* increase max_hazard to RESTRICTED */ + Assert(context->max_hazard != PROPARALLEL_UNSAFE); + context->max_hazard = proparallel; + /* done if we are not expecting any unsafe functions */ + if (context->max_interesting == proparallel) + return true; + break; + case PROPARALLEL_UNSAFE: + context->max_hazard = proparallel; + /* we're always done at the first unsafe construct */ + return true; + default: + elog(ERROR, "unrecognized proparallel value \"%c\"", proparallel); + break; + } + return false; +} + +/* check_functions_in_node callback */ +static bool +max_parallel_hazard_checker(Oid func_id, void *context) +{ + return max_parallel_hazard_test(func_parallel(func_id), + (max_parallel_hazard_context *) context); +} + +static bool +max_parallel_hazard_walker(Node *node, max_parallel_hazard_context *context) +{ + if (node == NULL) + return false; + + /* Check for hazardous functions in node itself */ + if (check_functions_in_node(node, max_parallel_hazard_checker, + context)) + return true; + + /* + * It should be OK to treat MinMaxExpr as parallel-safe, since btree + * opclass support functions are generally parallel-safe. XmlExpr is a + * bit more dubious but we can probably get away with it. We err on the + * side of caution by treating CoerceToDomain as parallel-restricted. + * (Note: in principle that's wrong because a domain constraint could + * contain a parallel-unsafe function; but useful constraints probably + * never would have such, and assuming they do would cripple use of + * parallel query in the presence of domain types.) SQLValueFunction + * should be safe in all cases. NextValueExpr is parallel-unsafe. + */ + if (IsA(node, CoerceToDomain)) + { + if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) + return true; + } + + else if (IsA(node, NextValueExpr)) + { + if (max_parallel_hazard_test(PROPARALLEL_UNSAFE, context)) + return true; + } + + /* + * Treat window functions as parallel-restricted because we aren't sure + * whether the input row ordering is fully deterministic, and the output + * of window functions might vary across workers if not. (In some cases, + * like where the window frame orders by a primary key, we could relax + * this restriction. But it doesn't currently seem worth expending extra + * effort to do so.) + */ + else if (IsA(node, WindowFunc)) + { + if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) + return true; + } + + /* + * As a notational convenience for callers, look through RestrictInfo. + */ + else if (IsA(node, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) node; + + return max_parallel_hazard_walker((Node *) rinfo->clause, context); + } + + /* + * Really we should not see SubLink during a max_interesting == restricted + * scan, but if we do, return true. + */ + else if (IsA(node, SubLink)) + { + if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) + return true; + } + + /* + * Only parallel-safe SubPlans can be sent to workers. Within the + * testexpr of the SubPlan, Params representing the output columns of the + * subplan can be treated as parallel-safe, so temporarily add their IDs + * to the safe_param_ids list while examining the testexpr. + */ + else if (IsA(node, SubPlan)) + { + SubPlan *subplan = (SubPlan *) node; + List *save_safe_param_ids; + + if (!subplan->parallel_safe && + max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) + return true; + save_safe_param_ids = context->safe_param_ids; + context->safe_param_ids = list_concat_copy(context->safe_param_ids, + subplan->paramIds); + if (max_parallel_hazard_walker(subplan->testexpr, context)) + return true; /* no need to restore safe_param_ids */ + list_free(context->safe_param_ids); + context->safe_param_ids = save_safe_param_ids; + /* we must also check args, but no special Param treatment there */ + if (max_parallel_hazard_walker((Node *) subplan->args, context)) + return true; + /* don't want to recurse normally, so we're done */ + return false; + } + + /* + * We can't pass Params to workers at the moment either, so they are also + * parallel-restricted, unless they are PARAM_EXTERN Params or are + * PARAM_EXEC Params listed in safe_param_ids, meaning they could be + * either generated within workers or can be computed by the leader and + * then their value can be passed to workers. + */ + else if (IsA(node, Param)) + { + Param *param = (Param *) node; + + if (param->paramkind == PARAM_EXTERN) + return false; + + if (param->paramkind != PARAM_EXEC || + !list_member_int(context->safe_param_ids, param->paramid)) + { + if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) + return true; + } + return false; /* nothing to recurse to */ + } + + /* + * When we're first invoked on a completely unplanned tree, we must + * recurse into subqueries so to as to locate parallel-unsafe constructs + * anywhere in the tree. + */ + else if (IsA(node, Query)) + { + Query *query = (Query *) node; + + /* SELECT FOR UPDATE/SHARE must be treated as unsafe */ + if (query->rowMarks != NULL) + { + context->max_hazard = PROPARALLEL_UNSAFE; + return true; + } + + /* Recurse into subselects */ + return query_tree_walker(query, + max_parallel_hazard_walker, + context, 0); + } + + /* Recurse to check arguments */ + return expression_tree_walker(node, + max_parallel_hazard_walker, + context); +} + + +/***************************************************************************** + * Check clauses for nonstrict functions + *****************************************************************************/ + +/* + * contain_nonstrict_functions + * Recursively search for nonstrict functions within a clause. + * + * Returns true if any nonstrict construct is found --- ie, anything that + * could produce non-NULL output with a NULL input. + * + * The idea here is that the caller has verified that the expression contains + * one or more Var or Param nodes (as appropriate for the caller's need), and + * now wishes to prove that the expression result will be NULL if any of these + * inputs is NULL. If we return false, then the proof succeeded. + */ +bool +contain_nonstrict_functions(Node *clause) +{ + return contain_nonstrict_functions_walker(clause, NULL); +} + +static bool +contain_nonstrict_functions_checker(Oid func_id, void *context) +{ + return !func_strict(func_id); +} + +static bool +contain_nonstrict_functions_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + /* an aggregate could return non-null with null input */ + return true; + } + if (IsA(node, GroupingFunc)) + { + /* + * A GroupingFunc doesn't evaluate its arguments, and therefore must + * be treated as nonstrict. + */ + return true; + } + if (IsA(node, WindowFunc)) + { + /* a window function could return non-null with null input */ + return true; + } + if (IsA(node, SubscriptingRef)) + { + SubscriptingRef *sbsref = (SubscriptingRef *) node; + const SubscriptRoutines *sbsroutines; + + /* Subscripting assignment is always presumed nonstrict */ + if (sbsref->refassgnexpr != NULL) + return true; + /* Otherwise we must look up the subscripting support methods */ + sbsroutines = getSubscriptingRoutines(sbsref->refcontainertype, NULL); + if (!(sbsroutines && sbsroutines->fetch_strict)) + return true; + /* else fall through to check args */ + } + if (IsA(node, DistinctExpr)) + { + /* IS DISTINCT FROM is inherently non-strict */ + return true; + } + if (IsA(node, NullIfExpr)) + { + /* NULLIF is inherently non-strict */ + return true; + } + if (IsA(node, BoolExpr)) + { + BoolExpr *expr = (BoolExpr *) node; + + switch (expr->boolop) + { + case AND_EXPR: + case OR_EXPR: + /* AND, OR are inherently non-strict */ + return true; + default: + break; + } + } + if (IsA(node, SubLink)) + { + /* In some cases a sublink might be strict, but in general not */ + return true; + } + if (IsA(node, SubPlan)) + return true; + if (IsA(node, AlternativeSubPlan)) + return true; + if (IsA(node, FieldStore)) + return true; + if (IsA(node, CoerceViaIO)) + { + /* + * CoerceViaIO is strict regardless of whether the I/O functions are, + * so just go look at its argument; asking check_functions_in_node is + * useless expense and could deliver the wrong answer. + */ + return contain_nonstrict_functions_walker((Node *) ((CoerceViaIO *) node)->arg, + context); + } + if (IsA(node, ArrayCoerceExpr)) + { + /* + * ArrayCoerceExpr is strict at the array level, regardless of what + * the per-element expression is; so we should ignore elemexpr and + * recurse only into the arg. + */ + return contain_nonstrict_functions_walker((Node *) ((ArrayCoerceExpr *) node)->arg, + context); + } + if (IsA(node, CaseExpr)) + return true; + if (IsA(node, ArrayExpr)) + return true; + if (IsA(node, RowExpr)) + return true; + if (IsA(node, RowCompareExpr)) + return true; + if (IsA(node, CoalesceExpr)) + return true; + if (IsA(node, MinMaxExpr)) + return true; + if (IsA(node, XmlExpr)) + return true; + if (IsA(node, NullTest)) + return true; + if (IsA(node, BooleanTest)) + return true; + + /* Check other function-containing nodes */ + if (check_functions_in_node(node, contain_nonstrict_functions_checker, + context)) + return true; + + return expression_tree_walker(node, contain_nonstrict_functions_walker, + context); +} + +/***************************************************************************** + * Check clauses for Params + *****************************************************************************/ + +/* + * contain_exec_param + * Recursively search for PARAM_EXEC Params within a clause. + * + * Returns true if the clause contains any PARAM_EXEC Param with a paramid + * appearing in the given list of Param IDs. Does not descend into + * subqueries! + */ +bool +contain_exec_param(Node *clause, List *param_ids) +{ + return contain_exec_param_walker(clause, param_ids); +} + +static bool +contain_exec_param_walker(Node *node, List *param_ids) +{ + if (node == NULL) + return false; + if (IsA(node, Param)) + { + Param *p = (Param *) node; + + if (p->paramkind == PARAM_EXEC && + list_member_int(param_ids, p->paramid)) + return true; + } + return expression_tree_walker(node, contain_exec_param_walker, param_ids); +} + +/***************************************************************************** + * Check clauses for context-dependent nodes + *****************************************************************************/ + +/* + * contain_context_dependent_node + * Recursively search for context-dependent nodes within a clause. + * + * CaseTestExpr nodes must appear directly within the corresponding CaseExpr, + * not nested within another one, or they'll see the wrong test value. If one + * appears "bare" in the arguments of a SQL function, then we can't inline the + * SQL function for fear of creating such a situation. The same applies for + * CaseTestExpr used within the elemexpr of an ArrayCoerceExpr. + * + * CoerceToDomainValue would have the same issue if domain CHECK expressions + * could get inlined into larger expressions, but presently that's impossible. + * Still, it might be allowed in future, or other node types with similar + * issues might get invented. So give this function a generic name, and set + * up the recursion state to allow multiple flag bits. + */ +static bool +contain_context_dependent_node(Node *clause) +{ + int flags = 0; + + return contain_context_dependent_node_walker(clause, &flags); +} + +#define CCDN_CASETESTEXPR_OK 0x0001 /* CaseTestExpr okay here? */ + +static bool +contain_context_dependent_node_walker(Node *node, int *flags) +{ + if (node == NULL) + return false; + if (IsA(node, CaseTestExpr)) + return !(*flags & CCDN_CASETESTEXPR_OK); + else if (IsA(node, CaseExpr)) + { + CaseExpr *caseexpr = (CaseExpr *) node; + + /* + * If this CASE doesn't have a test expression, then it doesn't create + * a context in which CaseTestExprs should appear, so just fall + * through and treat it as a generic expression node. + */ + if (caseexpr->arg) + { + int save_flags = *flags; + bool res; + + /* + * Note: in principle, we could distinguish the various sub-parts + * of a CASE construct and set the flag bit only for some of them, + * since we are only expecting CaseTestExprs to appear in the + * "expr" subtree of the CaseWhen nodes. But it doesn't really + * seem worth any extra code. If there are any bare CaseTestExprs + * elsewhere in the CASE, something's wrong already. + */ + *flags |= CCDN_CASETESTEXPR_OK; + res = expression_tree_walker(node, + contain_context_dependent_node_walker, + (void *) flags); + *flags = save_flags; + return res; + } + } + else if (IsA(node, ArrayCoerceExpr)) + { + ArrayCoerceExpr *ac = (ArrayCoerceExpr *) node; + int save_flags; + bool res; + + /* Check the array expression */ + if (contain_context_dependent_node_walker((Node *) ac->arg, flags)) + return true; + + /* Check the elemexpr, which is allowed to contain CaseTestExpr */ + save_flags = *flags; + *flags |= CCDN_CASETESTEXPR_OK; + res = contain_context_dependent_node_walker((Node *) ac->elemexpr, + flags); + *flags = save_flags; + return res; + } + return expression_tree_walker(node, contain_context_dependent_node_walker, + (void *) flags); +} + +/***************************************************************************** + * Check clauses for Vars passed to non-leakproof functions + *****************************************************************************/ + +/* + * contain_leaked_vars + * Recursively scan a clause to discover whether it contains any Var + * nodes (of the current query level) that are passed as arguments to + * leaky functions. + * + * Returns true if the clause contains any non-leakproof functions that are + * passed Var nodes of the current query level, and which might therefore leak + * data. Such clauses must be applied after any lower-level security barrier + * clauses. + */ +bool +contain_leaked_vars(Node *clause) +{ + return contain_leaked_vars_walker(clause, NULL); +} + +static bool +contain_leaked_vars_checker(Oid func_id, void *context) +{ + return !get_func_leakproof(func_id); +} + +static bool +contain_leaked_vars_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + + switch (nodeTag(node)) + { + case T_Var: + case T_Const: + case T_Param: + case T_ArrayExpr: + case T_FieldSelect: + case T_FieldStore: + case T_NamedArgExpr: + case T_BoolExpr: + case T_RelabelType: + case T_CollateExpr: + case T_CaseExpr: + case T_CaseTestExpr: + case T_RowExpr: + case T_SQLValueFunction: + case T_NullTest: + case T_BooleanTest: + case T_NextValueExpr: + case T_List: + + /* + * We know these node types don't contain function calls; but + * something further down in the node tree might. + */ + break; + + case T_FuncExpr: + case T_OpExpr: + case T_DistinctExpr: + case T_NullIfExpr: + case T_ScalarArrayOpExpr: + case T_CoerceViaIO: + case T_ArrayCoerceExpr: + + /* + * If node contains a leaky function call, and there's any Var + * underneath it, reject. + */ + if (check_functions_in_node(node, contain_leaked_vars_checker, + context) && + contain_var_clause(node)) + return true; + break; + + case T_SubscriptingRef: + { + SubscriptingRef *sbsref = (SubscriptingRef *) node; + const SubscriptRoutines *sbsroutines; + + /* Consult the subscripting support method info */ + sbsroutines = getSubscriptingRoutines(sbsref->refcontainertype, + NULL); + if (!sbsroutines || + !(sbsref->refassgnexpr != NULL ? + sbsroutines->store_leakproof : + sbsroutines->fetch_leakproof)) + { + /* Node is leaky, so reject if it contains Vars */ + if (contain_var_clause(node)) + return true; + } + } + break; + + case T_RowCompareExpr: + { + /* + * It's worth special-casing this because a leaky comparison + * function only compromises one pair of row elements, which + * might not contain Vars while others do. + */ + RowCompareExpr *rcexpr = (RowCompareExpr *) node; + ListCell *opid; + ListCell *larg; + ListCell *rarg; + + forthree(opid, rcexpr->opnos, + larg, rcexpr->largs, + rarg, rcexpr->rargs) + { + Oid funcid = get_opcode(lfirst_oid(opid)); + + if (!get_func_leakproof(funcid) && + (contain_var_clause((Node *) lfirst(larg)) || + contain_var_clause((Node *) lfirst(rarg)))) + return true; + } + } + break; + + case T_MinMaxExpr: + { + /* + * MinMaxExpr is leakproof if the comparison function it calls + * is leakproof. + */ + MinMaxExpr *minmaxexpr = (MinMaxExpr *) node; + TypeCacheEntry *typentry; + bool leakproof; + + /* Look up the btree comparison function for the datatype */ + typentry = lookup_type_cache(minmaxexpr->minmaxtype, + TYPECACHE_CMP_PROC); + if (OidIsValid(typentry->cmp_proc)) + leakproof = get_func_leakproof(typentry->cmp_proc); + else + { + /* + * The executor will throw an error, but here we just + * treat the missing function as leaky. + */ + leakproof = false; + } + + if (!leakproof && + contain_var_clause((Node *) minmaxexpr->args)) + return true; + } + break; + + case T_CurrentOfExpr: + + /* + * WHERE CURRENT OF doesn't contain leaky function calls. + * Moreover, it is essential that this is considered non-leaky, + * since the planner must always generate a TID scan when CURRENT + * OF is present -- cf. cost_tidscan. + */ + return false; + + default: + + /* + * If we don't recognize the node tag, assume it might be leaky. + * This prevents an unexpected security hole if someone adds a new + * node type that can call a function. + */ + return true; + } + return expression_tree_walker(node, contain_leaked_vars_walker, + context); +} + +/* + * find_nonnullable_rels + * Determine which base rels are forced nonnullable by given clause. + * + * Returns the set of all Relids that are referenced in the clause in such + * a way that the clause cannot possibly return TRUE if any of these Relids + * is an all-NULL row. (It is OK to err on the side of conservatism; hence + * the analysis here is simplistic.) + * + * The semantics here are subtly different from contain_nonstrict_functions: + * that function is concerned with NULL results from arbitrary expressions, + * but here we assume that the input is a Boolean expression, and wish to + * see if NULL inputs will provably cause a FALSE-or-NULL result. We expect + * the expression to have been AND/OR flattened and converted to implicit-AND + * format. + * + * Note: this function is largely duplicative of find_nonnullable_vars(). + * The reason not to simplify this function into a thin wrapper around + * find_nonnullable_vars() is that the tested conditions really are different: + * a clause like "t1.v1 IS NOT NULL OR t1.v2 IS NOT NULL" does not prove + * that either v1 or v2 can't be NULL, but it does prove that the t1 row + * as a whole can't be all-NULL. Also, the behavior for PHVs is different. + * + * top_level is true while scanning top-level AND/OR structure; here, showing + * the result is either FALSE or NULL is good enough. top_level is false when + * we have descended below a NOT or a strict function: now we must be able to + * prove that the subexpression goes to NULL. + * + * We don't use expression_tree_walker here because we don't want to descend + * through very many kinds of nodes; only the ones we can be sure are strict. + */ +Relids +find_nonnullable_rels(Node *clause) +{ + return find_nonnullable_rels_walker(clause, true); +} + +static Relids +find_nonnullable_rels_walker(Node *node, bool top_level) +{ + Relids result = NULL; + ListCell *l; + + if (node == NULL) + return NULL; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varlevelsup == 0) + result = bms_make_singleton(var->varno); + } + else if (IsA(node, List)) + { + /* + * At top level, we are examining an implicit-AND list: if any of the + * arms produces FALSE-or-NULL then the result is FALSE-or-NULL. If + * not at top level, we are examining the arguments of a strict + * function: if any of them produce NULL then the result of the + * function must be NULL. So in both cases, the set of nonnullable + * rels is the union of those found in the arms, and we pass down the + * top_level flag unmodified. + */ + foreach(l, (List *) node) + { + result = bms_join(result, + find_nonnullable_rels_walker(lfirst(l), + top_level)); + } + } + else if (IsA(node, FuncExpr)) + { + FuncExpr *expr = (FuncExpr *) node; + + if (func_strict(expr->funcid)) + result = find_nonnullable_rels_walker((Node *) expr->args, false); + } + else if (IsA(node, OpExpr)) + { + OpExpr *expr = (OpExpr *) node; + + set_opfuncid(expr); + if (func_strict(expr->opfuncid)) + result = find_nonnullable_rels_walker((Node *) expr->args, false); + } + else if (IsA(node, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node; + + if (is_strict_saop(expr, true)) + result = find_nonnullable_rels_walker((Node *) expr->args, false); + } + else if (IsA(node, BoolExpr)) + { + BoolExpr *expr = (BoolExpr *) node; + + switch (expr->boolop) + { + case AND_EXPR: + /* At top level we can just recurse (to the List case) */ + if (top_level) + { + result = find_nonnullable_rels_walker((Node *) expr->args, + top_level); + break; + } + + /* + * Below top level, even if one arm produces NULL, the result + * could be FALSE (hence not NULL). However, if *all* the + * arms produce NULL then the result is NULL, so we can take + * the intersection of the sets of nonnullable rels, just as + * for OR. Fall through to share code. + */ + /* FALL THRU */ + case OR_EXPR: + + /* + * OR is strict if all of its arms are, so we can take the + * intersection of the sets of nonnullable rels for each arm. + * This works for both values of top_level. + */ + foreach(l, expr->args) + { + Relids subresult; + + subresult = find_nonnullable_rels_walker(lfirst(l), + top_level); + if (result == NULL) /* first subresult? */ + result = subresult; + else + result = bms_int_members(result, subresult); + + /* + * If the intersection is empty, we can stop looking. This + * also justifies the test for first-subresult above. + */ + if (bms_is_empty(result)) + break; + } + break; + case NOT_EXPR: + /* NOT will return null if its arg is null */ + result = find_nonnullable_rels_walker((Node *) expr->args, + false); + break; + default: + elog(ERROR, "unrecognized boolop: %d", (int) expr->boolop); + break; + } + } + else if (IsA(node, RelabelType)) + { + RelabelType *expr = (RelabelType *) node; + + result = find_nonnullable_rels_walker((Node *) expr->arg, top_level); + } + else if (IsA(node, CoerceViaIO)) + { + /* not clear this is useful, but it can't hurt */ + CoerceViaIO *expr = (CoerceViaIO *) node; + + result = find_nonnullable_rels_walker((Node *) expr->arg, top_level); + } + else if (IsA(node, ArrayCoerceExpr)) + { + /* ArrayCoerceExpr is strict at the array level; ignore elemexpr */ + ArrayCoerceExpr *expr = (ArrayCoerceExpr *) node; + + result = find_nonnullable_rels_walker((Node *) expr->arg, top_level); + } + else if (IsA(node, ConvertRowtypeExpr)) + { + /* not clear this is useful, but it can't hurt */ + ConvertRowtypeExpr *expr = (ConvertRowtypeExpr *) node; + + result = find_nonnullable_rels_walker((Node *) expr->arg, top_level); + } + else if (IsA(node, CollateExpr)) + { + CollateExpr *expr = (CollateExpr *) node; + + result = find_nonnullable_rels_walker((Node *) expr->arg, top_level); + } + else if (IsA(node, NullTest)) + { + /* IS NOT NULL can be considered strict, but only at top level */ + NullTest *expr = (NullTest *) node; + + if (top_level && expr->nulltesttype == IS_NOT_NULL && !expr->argisrow) + result = find_nonnullable_rels_walker((Node *) expr->arg, false); + } + else if (IsA(node, BooleanTest)) + { + /* Boolean tests that reject NULL are strict at top level */ + BooleanTest *expr = (BooleanTest *) node; + + if (top_level && + (expr->booltesttype == IS_TRUE || + expr->booltesttype == IS_FALSE || + expr->booltesttype == IS_NOT_UNKNOWN)) + result = find_nonnullable_rels_walker((Node *) expr->arg, false); + } + else if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + /* + * If the contained expression forces any rels non-nullable, so does + * the PHV. + */ + result = find_nonnullable_rels_walker((Node *) phv->phexpr, top_level); + + /* + * If the PHV's syntactic scope is exactly one rel, it will be forced + * to be evaluated at that rel, and so it will behave like a Var of + * that rel: if the rel's entire output goes to null, so will the PHV. + * (If the syntactic scope is a join, we know that the PHV will go to + * null if the whole join does; but that is AND semantics while we + * need OR semantics for find_nonnullable_rels' result, so we can't do + * anything with the knowledge.) + */ + if (phv->phlevelsup == 0 && + bms_membership(phv->phrels) == BMS_SINGLETON) + result = bms_add_members(result, phv->phrels); + } + return result; +} + +/* + * find_nonnullable_vars + * Determine which Vars are forced nonnullable by given clause. + * + * Returns a list of all level-zero Vars that are referenced in the clause in + * such a way that the clause cannot possibly return TRUE if any of these Vars + * is NULL. (It is OK to err on the side of conservatism; hence the analysis + * here is simplistic.) + * + * The semantics here are subtly different from contain_nonstrict_functions: + * that function is concerned with NULL results from arbitrary expressions, + * but here we assume that the input is a Boolean expression, and wish to + * see if NULL inputs will provably cause a FALSE-or-NULL result. We expect + * the expression to have been AND/OR flattened and converted to implicit-AND + * format. + * + * The result is a palloc'd List, but we have not copied the member Var nodes. + * Also, we don't bother trying to eliminate duplicate entries. + * + * top_level is true while scanning top-level AND/OR structure; here, showing + * the result is either FALSE or NULL is good enough. top_level is false when + * we have descended below a NOT or a strict function: now we must be able to + * prove that the subexpression goes to NULL. + * + * We don't use expression_tree_walker here because we don't want to descend + * through very many kinds of nodes; only the ones we can be sure are strict. + */ +List * +find_nonnullable_vars(Node *clause) +{ + return find_nonnullable_vars_walker(clause, true); +} + +static List * +find_nonnullable_vars_walker(Node *node, bool top_level) +{ + List *result = NIL; + ListCell *l; + + if (node == NULL) + return NIL; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varlevelsup == 0) + result = list_make1(var); + } + else if (IsA(node, List)) + { + /* + * At top level, we are examining an implicit-AND list: if any of the + * arms produces FALSE-or-NULL then the result is FALSE-or-NULL. If + * not at top level, we are examining the arguments of a strict + * function: if any of them produce NULL then the result of the + * function must be NULL. So in both cases, the set of nonnullable + * vars is the union of those found in the arms, and we pass down the + * top_level flag unmodified. + */ + foreach(l, (List *) node) + { + result = list_concat(result, + find_nonnullable_vars_walker(lfirst(l), + top_level)); + } + } + else if (IsA(node, FuncExpr)) + { + FuncExpr *expr = (FuncExpr *) node; + + if (func_strict(expr->funcid)) + result = find_nonnullable_vars_walker((Node *) expr->args, false); + } + else if (IsA(node, OpExpr)) + { + OpExpr *expr = (OpExpr *) node; + + set_opfuncid(expr); + if (func_strict(expr->opfuncid)) + result = find_nonnullable_vars_walker((Node *) expr->args, false); + } + else if (IsA(node, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node; + + if (is_strict_saop(expr, true)) + result = find_nonnullable_vars_walker((Node *) expr->args, false); + } + else if (IsA(node, BoolExpr)) + { + BoolExpr *expr = (BoolExpr *) node; + + switch (expr->boolop) + { + case AND_EXPR: + /* At top level we can just recurse (to the List case) */ + if (top_level) + { + result = find_nonnullable_vars_walker((Node *) expr->args, + top_level); + break; + } + + /* + * Below top level, even if one arm produces NULL, the result + * could be FALSE (hence not NULL). However, if *all* the + * arms produce NULL then the result is NULL, so we can take + * the intersection of the sets of nonnullable vars, just as + * for OR. Fall through to share code. + */ + /* FALL THRU */ + case OR_EXPR: + + /* + * OR is strict if all of its arms are, so we can take the + * intersection of the sets of nonnullable vars for each arm. + * This works for both values of top_level. + */ + foreach(l, expr->args) + { + List *subresult; + + subresult = find_nonnullable_vars_walker(lfirst(l), + top_level); + if (result == NIL) /* first subresult? */ + result = subresult; + else + result = list_intersection(result, subresult); + + /* + * If the intersection is empty, we can stop looking. This + * also justifies the test for first-subresult above. + */ + if (result == NIL) + break; + } + break; + case NOT_EXPR: + /* NOT will return null if its arg is null */ + result = find_nonnullable_vars_walker((Node *) expr->args, + false); + break; + default: + elog(ERROR, "unrecognized boolop: %d", (int) expr->boolop); + break; + } + } + else if (IsA(node, RelabelType)) + { + RelabelType *expr = (RelabelType *) node; + + result = find_nonnullable_vars_walker((Node *) expr->arg, top_level); + } + else if (IsA(node, CoerceViaIO)) + { + /* not clear this is useful, but it can't hurt */ + CoerceViaIO *expr = (CoerceViaIO *) node; + + result = find_nonnullable_vars_walker((Node *) expr->arg, false); + } + else if (IsA(node, ArrayCoerceExpr)) + { + /* ArrayCoerceExpr is strict at the array level; ignore elemexpr */ + ArrayCoerceExpr *expr = (ArrayCoerceExpr *) node; + + result = find_nonnullable_vars_walker((Node *) expr->arg, top_level); + } + else if (IsA(node, ConvertRowtypeExpr)) + { + /* not clear this is useful, but it can't hurt */ + ConvertRowtypeExpr *expr = (ConvertRowtypeExpr *) node; + + result = find_nonnullable_vars_walker((Node *) expr->arg, top_level); + } + else if (IsA(node, CollateExpr)) + { + CollateExpr *expr = (CollateExpr *) node; + + result = find_nonnullable_vars_walker((Node *) expr->arg, top_level); + } + else if (IsA(node, NullTest)) + { + /* IS NOT NULL can be considered strict, but only at top level */ + NullTest *expr = (NullTest *) node; + + if (top_level && expr->nulltesttype == IS_NOT_NULL && !expr->argisrow) + result = find_nonnullable_vars_walker((Node *) expr->arg, false); + } + else if (IsA(node, BooleanTest)) + { + /* Boolean tests that reject NULL are strict at top level */ + BooleanTest *expr = (BooleanTest *) node; + + if (top_level && + (expr->booltesttype == IS_TRUE || + expr->booltesttype == IS_FALSE || + expr->booltesttype == IS_NOT_UNKNOWN)) + result = find_nonnullable_vars_walker((Node *) expr->arg, false); + } + else if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + result = find_nonnullable_vars_walker((Node *) phv->phexpr, top_level); + } + return result; +} + +/* + * find_forced_null_vars + * Determine which Vars must be NULL for the given clause to return TRUE. + * + * This is the complement of find_nonnullable_vars: find the level-zero Vars + * that must be NULL for the clause to return TRUE. (It is OK to err on the + * side of conservatism; hence the analysis here is simplistic. In fact, + * we only detect simple "var IS NULL" tests at the top level.) + * + * The result is a palloc'd List, but we have not copied the member Var nodes. + * Also, we don't bother trying to eliminate duplicate entries. + */ +List * +find_forced_null_vars(Node *node) +{ + List *result = NIL; + Var *var; + ListCell *l; + + if (node == NULL) + return NIL; + /* Check single-clause cases using subroutine */ + var = find_forced_null_var(node); + if (var) + { + result = list_make1(var); + } + /* Otherwise, handle AND-conditions */ + else if (IsA(node, List)) + { + /* + * At top level, we are examining an implicit-AND list: if any of the + * arms produces FALSE-or-NULL then the result is FALSE-or-NULL. + */ + foreach(l, (List *) node) + { + result = list_concat(result, + find_forced_null_vars(lfirst(l))); + } + } + else if (IsA(node, BoolExpr)) + { + BoolExpr *expr = (BoolExpr *) node; + + /* + * We don't bother considering the OR case, because it's fairly + * unlikely anyone would write "v1 IS NULL OR v1 IS NULL". Likewise, + * the NOT case isn't worth expending code on. + */ + if (expr->boolop == AND_EXPR) + { + /* At top level we can just recurse (to the List case) */ + result = find_forced_null_vars((Node *) expr->args); + } + } + return result; +} + +/* + * find_forced_null_var + * Return the Var forced null by the given clause, or NULL if it's + * not an IS NULL-type clause. For success, the clause must enforce + * *only* nullness of the particular Var, not any other conditions. + * + * This is just the single-clause case of find_forced_null_vars(), without + * any allowance for AND conditions. It's used by initsplan.c on individual + * qual clauses. The reason for not just applying find_forced_null_vars() + * is that if an AND of an IS NULL clause with something else were to somehow + * survive AND/OR flattening, initsplan.c might get fooled into discarding + * the whole clause when only the IS NULL part of it had been proved redundant. + */ +Var * +find_forced_null_var(Node *node) +{ + if (node == NULL) + return NULL; + if (IsA(node, NullTest)) + { + /* check for var IS NULL */ + NullTest *expr = (NullTest *) node; + + if (expr->nulltesttype == IS_NULL && !expr->argisrow) + { + Var *var = (Var *) expr->arg; + + if (var && IsA(var, Var) && + var->varlevelsup == 0) + return var; + } + } + else if (IsA(node, BooleanTest)) + { + /* var IS UNKNOWN is equivalent to var IS NULL */ + BooleanTest *expr = (BooleanTest *) node; + + if (expr->booltesttype == IS_UNKNOWN) + { + Var *var = (Var *) expr->arg; + + if (var && IsA(var, Var) && + var->varlevelsup == 0) + return var; + } + } + return NULL; +} + +/* + * Can we treat a ScalarArrayOpExpr as strict? + * + * If "falseOK" is true, then a "false" result can be considered strict, + * else we need to guarantee an actual NULL result for NULL input. + * + * "foo op ALL array" is strict if the op is strict *and* we can prove + * that the array input isn't an empty array. We can check that + * for the cases of an array constant and an ARRAY[] construct. + * + * "foo op ANY array" is strict in the falseOK sense if the op is strict. + * If not falseOK, the test is the same as for "foo op ALL array". + */ +static bool +is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK) +{ + Node *rightop; + + /* The contained operator must be strict. */ + set_sa_opfuncid(expr); + if (!func_strict(expr->opfuncid)) + return false; + /* If ANY and falseOK, that's all we need to check. */ + if (expr->useOr && falseOK) + return true; + /* Else, we have to see if the array is provably non-empty. */ + Assert(list_length(expr->args) == 2); + rightop = (Node *) lsecond(expr->args); + if (rightop && IsA(rightop, Const)) + { + Datum arraydatum = ((Const *) rightop)->constvalue; + bool arrayisnull = ((Const *) rightop)->constisnull; + ArrayType *arrayval; + int nitems; + + if (arrayisnull) + return false; + arrayval = DatumGetArrayTypeP(arraydatum); + nitems = ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval)); + if (nitems > 0) + return true; + } + else if (rightop && IsA(rightop, ArrayExpr)) + { + ArrayExpr *arrayexpr = (ArrayExpr *) rightop; + + if (arrayexpr->elements != NIL && !arrayexpr->multidims) + return true; + } + return false; +} + + +/***************************************************************************** + * Check for "pseudo-constant" clauses + *****************************************************************************/ + +/* + * is_pseudo_constant_clause + * Detect whether an expression is "pseudo constant", ie, it contains no + * variables of the current query level and no uses of volatile functions. + * Such an expr is not necessarily a true constant: it can still contain + * Params and outer-level Vars, not to mention functions whose results + * may vary from one statement to the next. However, the expr's value + * will be constant over any one scan of the current query, so it can be + * used as, eg, an indexscan key. (Actually, the condition for indexscan + * keys is weaker than this; see is_pseudo_constant_for_index().) + * + * CAUTION: this function omits to test for one very important class of + * not-constant expressions, namely aggregates (Aggrefs). In current usage + * this is only applied to WHERE clauses and so a check for Aggrefs would be + * a waste of cycles; but be sure to also check contain_agg_clause() if you + * want to know about pseudo-constness in other contexts. The same goes + * for window functions (WindowFuncs). + */ +bool +is_pseudo_constant_clause(Node *clause) +{ + /* + * We could implement this check in one recursive scan. But since the + * check for volatile functions is both moderately expensive and unlikely + * to fail, it seems better to look for Vars first and only check for + * volatile functions if we find no Vars. + */ + if (!contain_var_clause(clause) && + !contain_volatile_functions(clause)) + return true; + return false; +} + +/* + * is_pseudo_constant_clause_relids + * Same as above, except caller already has available the var membership + * of the expression; this lets us avoid the contain_var_clause() scan. + */ +bool +is_pseudo_constant_clause_relids(Node *clause, Relids relids) +{ + if (bms_is_empty(relids) && + !contain_volatile_functions(clause)) + return true; + return false; +} + + +/***************************************************************************** + * * + * General clause-manipulating routines * + * * + *****************************************************************************/ + +/* + * NumRelids + * (formerly clause_relids) + * + * Returns the number of different relations referenced in 'clause'. + */ +int +NumRelids(PlannerInfo *root, Node *clause) +{ + Relids varnos = pull_varnos(root, clause); + int result = bms_num_members(varnos); + + bms_free(varnos); + return result; +} + +/* + * CommuteOpExpr: commute a binary operator clause + * + * XXX the clause is destructively modified! + */ +void +CommuteOpExpr(OpExpr *clause) +{ + Oid opoid; + Node *temp; + + /* Sanity checks: caller is at fault if these fail */ + if (!is_opclause(clause) || + list_length(clause->args) != 2) + elog(ERROR, "cannot commute non-binary-operator clause"); + + opoid = get_commutator(clause->opno); + + if (!OidIsValid(opoid)) + elog(ERROR, "could not find commutator for operator %u", + clause->opno); + + /* + * modify the clause in-place! + */ + clause->opno = opoid; + clause->opfuncid = InvalidOid; + /* opresulttype, opretset, opcollid, inputcollid need not change */ + + temp = linitial(clause->args); + linitial(clause->args) = lsecond(clause->args); + lsecond(clause->args) = temp; +} + +/* + * Helper for eval_const_expressions: check that datatype of an attribute + * is still what it was when the expression was parsed. This is needed to + * guard against improper simplification after ALTER COLUMN TYPE. (XXX we + * may well need to make similar checks elsewhere?) + * + * rowtypeid may come from a whole-row Var, and therefore it can be a domain + * over composite, but for this purpose we only care about checking the type + * of a contained field. + */ +static bool +rowtype_field_matches(Oid rowtypeid, int fieldnum, + Oid expectedtype, int32 expectedtypmod, + Oid expectedcollation) +{ + TupleDesc tupdesc; + Form_pg_attribute attr; + + /* No issue for RECORD, since there is no way to ALTER such a type */ + if (rowtypeid == RECORDOID) + return true; + tupdesc = lookup_rowtype_tupdesc_domain(rowtypeid, -1, false); + if (fieldnum <= 0 || fieldnum > tupdesc->natts) + { + ReleaseTupleDesc(tupdesc); + return false; + } + attr = TupleDescAttr(tupdesc, fieldnum - 1); + if (attr->attisdropped || + attr->atttypid != expectedtype || + attr->atttypmod != expectedtypmod || + attr->attcollation != expectedcollation) + { + ReleaseTupleDesc(tupdesc); + return false; + } + ReleaseTupleDesc(tupdesc); + return true; +} + + +/*-------------------- + * eval_const_expressions + * + * Reduce any recognizably constant subexpressions of the given + * expression tree, for example "2 + 2" => "4". More interestingly, + * we can reduce certain boolean expressions even when they contain + * non-constant subexpressions: "x OR true" => "true" no matter what + * the subexpression x is. (XXX We assume that no such subexpression + * will have important side-effects, which is not necessarily a good + * assumption in the presence of user-defined functions; do we need a + * pg_proc flag that prevents discarding the execution of a function?) + * + * We do understand that certain functions may deliver non-constant + * results even with constant inputs, "nextval()" being the classic + * example. Functions that are not marked "immutable" in pg_proc + * will not be pre-evaluated here, although we will reduce their + * arguments as far as possible. + * + * Whenever a function is eliminated from the expression by means of + * constant-expression evaluation or inlining, we add the function to + * root->glob->invalItems. This ensures the plan is known to depend on + * such functions, even though they aren't referenced anymore. + * + * We assume that the tree has already been type-checked and contains + * only operators and functions that are reasonable to try to execute. + * + * NOTE: "root" can be passed as NULL if the caller never wants to do any + * Param substitutions nor receive info about inlined functions. + * + * NOTE: the planner assumes that this will always flatten nested AND and + * OR clauses into N-argument form. See comments in prepqual.c. + * + * NOTE: another critical effect is that any function calls that require + * default arguments will be expanded, and named-argument calls will be + * converted to positional notation. The executor won't handle either. + *-------------------- + */ +Node * +eval_const_expressions(PlannerInfo *root, Node *node) +{ + eval_const_expressions_context context; + + if (root) + context.boundParams = root->glob->boundParams; /* bound Params */ + else + context.boundParams = NULL; + context.root = root; /* for inlined-function dependencies */ + context.active_fns = NIL; /* nothing being recursively simplified */ + context.case_val = NULL; /* no CASE being examined */ + context.estimate = false; /* safe transformations only */ + return eval_const_expressions_mutator(node, &context); +} + +#define MIN_ARRAY_SIZE_FOR_HASHED_SAOP 9 +/*-------------------- + * convert_saop_to_hashed_saop + * + * Recursively search 'node' for ScalarArrayOpExprs and fill in the hash + * function for any ScalarArrayOpExpr that looks like it would be useful to + * evaluate using a hash table rather than a linear search. + * + * We'll use a hash table if all of the following conditions are met: + * 1. The 2nd argument of the array contain only Consts. + * 2. useOr is true or there is a valid negator operator for the + * ScalarArrayOpExpr's opno. + * 3. There's valid hash function for both left and righthand operands and + * these hash functions are the same. + * 4. If the array contains enough elements for us to consider it to be + * worthwhile using a hash table rather than a linear search. + */ +void +convert_saop_to_hashed_saop(Node *node) +{ + (void) convert_saop_to_hashed_saop_walker(node, NULL); +} + +static bool +convert_saop_to_hashed_saop_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + + if (IsA(node, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node; + Expr *arrayarg = (Expr *) lsecond(saop->args); + Oid lefthashfunc; + Oid righthashfunc; + + if (arrayarg && IsA(arrayarg, Const) && + !((Const *) arrayarg)->constisnull) + { + if (saop->useOr) + { + if (get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) && + lefthashfunc == righthashfunc) + { + Datum arrdatum = ((Const *) arrayarg)->constvalue; + ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum); + int nitems; + + /* + * Only fill in the hash functions if the array looks + * large enough for it to be worth hashing instead of + * doing a linear search. + */ + nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP) + { + /* Looks good. Fill in the hash functions */ + saop->hashfuncid = lefthashfunc; + } + return true; + } + } + else /* !saop->useOr */ + { + Oid negator = get_negator(saop->opno); + + /* + * Check if this is a NOT IN using an operator whose negator + * is hashable. If so we can still build a hash table and + * just ensure the lookup items are not in the hash table. + */ + if (OidIsValid(negator) && + get_op_hash_functions(negator, &lefthashfunc, &righthashfunc) && + lefthashfunc == righthashfunc) + { + Datum arrdatum = ((Const *) arrayarg)->constvalue; + ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum); + int nitems; + + /* + * Only fill in the hash functions if the array looks + * large enough for it to be worth hashing instead of + * doing a linear search. + */ + nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP) + { + /* Looks good. Fill in the hash functions */ + saop->hashfuncid = lefthashfunc; + + /* + * Also set the negfuncid. The executor will need + * that to perform hashtable lookups. + */ + saop->negfuncid = get_opcode(negator); + } + return true; + } + } + } + } + + return expression_tree_walker(node, convert_saop_to_hashed_saop_walker, NULL); +} + + +/*-------------------- + * estimate_expression_value + * + * This function attempts to estimate the value of an expression for + * planning purposes. It is in essence a more aggressive version of + * eval_const_expressions(): we will perform constant reductions that are + * not necessarily 100% safe, but are reasonable for estimation purposes. + * + * Currently the extra steps that are taken in this mode are: + * 1. Substitute values for Params, where a bound Param value has been made + * available by the caller of planner(), even if the Param isn't marked + * constant. This effectively means that we plan using the first supplied + * value of the Param. + * 2. Fold stable, as well as immutable, functions to constants. + * 3. Reduce PlaceHolderVar nodes to their contained expressions. + *-------------------- + */ +Node * +estimate_expression_value(PlannerInfo *root, Node *node) +{ + eval_const_expressions_context context; + + context.boundParams = root->glob->boundParams; /* bound Params */ + /* we do not need to mark the plan as depending on inlined functions */ + context.root = NULL; + context.active_fns = NIL; /* nothing being recursively simplified */ + context.case_val = NULL; /* no CASE being examined */ + context.estimate = true; /* unsafe transformations OK */ + return eval_const_expressions_mutator(node, &context); +} + +/* + * The generic case in eval_const_expressions_mutator is to recurse using + * expression_tree_mutator, which will copy the given node unchanged but + * const-simplify its arguments (if any) as far as possible. If the node + * itself does immutable processing, and each of its arguments were reduced + * to a Const, we can then reduce it to a Const using evaluate_expr. (Some + * node types need more complicated logic; for example, a CASE expression + * might be reducible to a constant even if not all its subtrees are.) + */ +#define ece_generic_processing(node) \ + expression_tree_mutator((Node *) (node), eval_const_expressions_mutator, \ + (void *) context) + +/* + * Check whether all arguments of the given node were reduced to Consts. + * By going directly to expression_tree_walker, contain_non_const_walker + * is not applied to the node itself, only to its children. + */ +#define ece_all_arguments_const(node) \ + (!expression_tree_walker((Node *) (node), contain_non_const_walker, NULL)) + +/* Generic macro for applying evaluate_expr */ +#define ece_evaluate_expr(node) \ + ((Node *) evaluate_expr((Expr *) (node), \ + exprType((Node *) (node)), \ + exprTypmod((Node *) (node)), \ + exprCollation((Node *) (node)))) + +/* + * Recursive guts of eval_const_expressions/estimate_expression_value + */ +static Node * +eval_const_expressions_mutator(Node *node, + eval_const_expressions_context *context) +{ + if (node == NULL) + return NULL; + switch (nodeTag(node)) + { + case T_Param: + { + Param *param = (Param *) node; + ParamListInfo paramLI = context->boundParams; + + /* Look to see if we've been given a value for this Param */ + if (param->paramkind == PARAM_EXTERN && + paramLI != NULL && + param->paramid > 0 && + param->paramid <= paramLI->numParams) + { + ParamExternData *prm; + ParamExternData prmdata; + + /* + * Give hook a chance in case parameter is dynamic. Tell + * it that this fetch is speculative, so it should avoid + * erroring out if parameter is unavailable. + */ + if (paramLI->paramFetch != NULL) + prm = paramLI->paramFetch(paramLI, param->paramid, + true, &prmdata); + else + prm = ¶mLI->params[param->paramid - 1]; + + /* + * We don't just check OidIsValid, but insist that the + * fetched type match the Param, just in case the hook did + * something unexpected. No need to throw an error here + * though; leave that for runtime. + */ + if (OidIsValid(prm->ptype) && + prm->ptype == param->paramtype) + { + /* OK to substitute parameter value? */ + if (context->estimate || + (prm->pflags & PARAM_FLAG_CONST)) + { + /* + * Return a Const representing the param value. + * Must copy pass-by-ref datatypes, since the + * Param might be in a memory context + * shorter-lived than our output plan should be. + */ + int16 typLen; + bool typByVal; + Datum pval; + Const *con; + + get_typlenbyval(param->paramtype, + &typLen, &typByVal); + if (prm->isnull || typByVal) + pval = prm->value; + else + pval = datumCopy(prm->value, typByVal, typLen); + con = makeConst(param->paramtype, + param->paramtypmod, + param->paramcollid, + (int) typLen, + pval, + prm->isnull, + typByVal); + con->location = param->location; + return (Node *) con; + } + } + } + + /* + * Not replaceable, so just copy the Param (no need to + * recurse) + */ + return (Node *) copyObject(param); + } + case T_WindowFunc: + { + WindowFunc *expr = (WindowFunc *) node; + Oid funcid = expr->winfnoid; + List *args; + Expr *aggfilter; + HeapTuple func_tuple; + WindowFunc *newexpr; + + /* + * We can't really simplify a WindowFunc node, but we mustn't + * just fall through to the default processing, because we + * have to apply expand_function_arguments to its argument + * list. That takes care of inserting default arguments and + * expanding named-argument notation. + */ + func_tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(func_tuple)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + args = expand_function_arguments(expr->args, + false, expr->wintype, + func_tuple); + + ReleaseSysCache(func_tuple); + + /* Now, recursively simplify the args (which are a List) */ + args = (List *) + expression_tree_mutator((Node *) args, + eval_const_expressions_mutator, + (void *) context); + /* ... and the filter expression, which isn't */ + aggfilter = (Expr *) + eval_const_expressions_mutator((Node *) expr->aggfilter, + context); + + /* And build the replacement WindowFunc node */ + newexpr = makeNode(WindowFunc); + newexpr->winfnoid = expr->winfnoid; + newexpr->wintype = expr->wintype; + newexpr->wincollid = expr->wincollid; + newexpr->inputcollid = expr->inputcollid; + newexpr->args = args; + newexpr->aggfilter = aggfilter; + newexpr->winref = expr->winref; + newexpr->winstar = expr->winstar; + newexpr->winagg = expr->winagg; + newexpr->location = expr->location; + + return (Node *) newexpr; + } + case T_FuncExpr: + { + FuncExpr *expr = (FuncExpr *) node; + List *args = expr->args; + Expr *simple; + FuncExpr *newexpr; + + /* + * Code for op/func reduction is pretty bulky, so split it out + * as a separate function. Note: exprTypmod normally returns + * -1 for a FuncExpr, but not when the node is recognizably a + * length coercion; we want to preserve the typmod in the + * eventual Const if so. + */ + simple = simplify_function(expr->funcid, + expr->funcresulttype, + exprTypmod(node), + expr->funccollid, + expr->inputcollid, + &args, + expr->funcvariadic, + true, + true, + context); + if (simple) /* successfully simplified it */ + return (Node *) simple; + + /* + * The expression cannot be simplified any further, so build + * and return a replacement FuncExpr node using the + * possibly-simplified arguments. Note that we have also + * converted the argument list to positional notation. + */ + newexpr = makeNode(FuncExpr); + newexpr->funcid = expr->funcid; + newexpr->funcresulttype = expr->funcresulttype; + newexpr->funcretset = expr->funcretset; + newexpr->funcvariadic = expr->funcvariadic; + newexpr->funcformat = expr->funcformat; + newexpr->funccollid = expr->funccollid; + newexpr->inputcollid = expr->inputcollid; + newexpr->args = args; + newexpr->location = expr->location; + return (Node *) newexpr; + } + case T_OpExpr: + { + OpExpr *expr = (OpExpr *) node; + List *args = expr->args; + Expr *simple; + OpExpr *newexpr; + + /* + * Need to get OID of underlying function. Okay to scribble + * on input to this extent. + */ + set_opfuncid(expr); + + /* + * Code for op/func reduction is pretty bulky, so split it out + * as a separate function. + */ + simple = simplify_function(expr->opfuncid, + expr->opresulttype, -1, + expr->opcollid, + expr->inputcollid, + &args, + false, + true, + true, + context); + if (simple) /* successfully simplified it */ + return (Node *) simple; + + /* + * If the operator is boolean equality or inequality, we know + * how to simplify cases involving one constant and one + * non-constant argument. + */ + if (expr->opno == BooleanEqualOperator || + expr->opno == BooleanNotEqualOperator) + { + simple = (Expr *) simplify_boolean_equality(expr->opno, + args); + if (simple) /* successfully simplified it */ + return (Node *) simple; + } + + /* + * The expression cannot be simplified any further, so build + * and return a replacement OpExpr node using the + * possibly-simplified arguments. + */ + newexpr = makeNode(OpExpr); + newexpr->opno = expr->opno; + newexpr->opfuncid = expr->opfuncid; + newexpr->opresulttype = expr->opresulttype; + newexpr->opretset = expr->opretset; + newexpr->opcollid = expr->opcollid; + newexpr->inputcollid = expr->inputcollid; + newexpr->args = args; + newexpr->location = expr->location; + return (Node *) newexpr; + } + case T_DistinctExpr: + { + DistinctExpr *expr = (DistinctExpr *) node; + List *args; + ListCell *arg; + bool has_null_input = false; + bool all_null_input = true; + bool has_nonconst_input = false; + Expr *simple; + DistinctExpr *newexpr; + + /* + * Reduce constants in the DistinctExpr's arguments. We know + * args is either NIL or a List node, so we can call + * expression_tree_mutator directly rather than recursing to + * self. + */ + args = (List *) expression_tree_mutator((Node *) expr->args, + eval_const_expressions_mutator, + (void *) context); + + /* + * We must do our own check for NULLs because DistinctExpr has + * different results for NULL input than the underlying + * operator does. + */ + foreach(arg, args) + { + if (IsA(lfirst(arg), Const)) + { + has_null_input |= ((Const *) lfirst(arg))->constisnull; + all_null_input &= ((Const *) lfirst(arg))->constisnull; + } + else + has_nonconst_input = true; + } + + /* all constants? then can optimize this out */ + if (!has_nonconst_input) + { + /* all nulls? then not distinct */ + if (all_null_input) + return makeBoolConst(false, false); + + /* one null? then distinct */ + if (has_null_input) + return makeBoolConst(true, false); + + /* otherwise try to evaluate the '=' operator */ + /* (NOT okay to try to inline it, though!) */ + + /* + * Need to get OID of underlying function. Okay to + * scribble on input to this extent. + */ + set_opfuncid((OpExpr *) expr); /* rely on struct + * equivalence */ + + /* + * Code for op/func reduction is pretty bulky, so split it + * out as a separate function. + */ + simple = simplify_function(expr->opfuncid, + expr->opresulttype, -1, + expr->opcollid, + expr->inputcollid, + &args, + false, + false, + false, + context); + if (simple) /* successfully simplified it */ + { + /* + * Since the underlying operator is "=", must negate + * its result + */ + Const *csimple = castNode(Const, simple); + + csimple->constvalue = + BoolGetDatum(!DatumGetBool(csimple->constvalue)); + return (Node *) csimple; + } + } + + /* + * The expression cannot be simplified any further, so build + * and return a replacement DistinctExpr node using the + * possibly-simplified arguments. + */ + newexpr = makeNode(DistinctExpr); + newexpr->opno = expr->opno; + newexpr->opfuncid = expr->opfuncid; + newexpr->opresulttype = expr->opresulttype; + newexpr->opretset = expr->opretset; + newexpr->opcollid = expr->opcollid; + newexpr->inputcollid = expr->inputcollid; + newexpr->args = args; + newexpr->location = expr->location; + return (Node *) newexpr; + } + case T_NullIfExpr: + { + NullIfExpr *expr; + ListCell *arg; + bool has_nonconst_input = false; + + /* Copy the node and const-simplify its arguments */ + expr = (NullIfExpr *) ece_generic_processing(node); + + /* If either argument is NULL they can't be equal */ + foreach(arg, expr->args) + { + if (!IsA(lfirst(arg), Const)) + has_nonconst_input = true; + else if (((Const *) lfirst(arg))->constisnull) + return (Node *) linitial(expr->args); + } + + /* + * Need to get OID of underlying function before checking if + * the function is OK to evaluate. + */ + set_opfuncid((OpExpr *) expr); + + if (!has_nonconst_input && + ece_function_is_safe(expr->opfuncid, context)) + return ece_evaluate_expr(expr); + + return (Node *) expr; + } + case T_ScalarArrayOpExpr: + { + ScalarArrayOpExpr *saop; + + /* Copy the node and const-simplify its arguments */ + saop = (ScalarArrayOpExpr *) ece_generic_processing(node); + + /* Make sure we know underlying function */ + set_sa_opfuncid(saop); + + /* + * If all arguments are Consts, and it's a safe function, we + * can fold to a constant + */ + if (ece_all_arguments_const(saop) && + ece_function_is_safe(saop->opfuncid, context)) + return ece_evaluate_expr(saop); + return (Node *) saop; + } + case T_BoolExpr: + { + BoolExpr *expr = (BoolExpr *) node; + + switch (expr->boolop) + { + case OR_EXPR: + { + List *newargs; + bool haveNull = false; + bool forceTrue = false; + + newargs = simplify_or_arguments(expr->args, + context, + &haveNull, + &forceTrue); + if (forceTrue) + return makeBoolConst(true, false); + if (haveNull) + newargs = lappend(newargs, + makeBoolConst(false, true)); + /* If all the inputs are FALSE, result is FALSE */ + if (newargs == NIL) + return makeBoolConst(false, false); + + /* + * If only one nonconst-or-NULL input, it's the + * result + */ + if (list_length(newargs) == 1) + return (Node *) linitial(newargs); + /* Else we still need an OR node */ + return (Node *) make_orclause(newargs); + } + case AND_EXPR: + { + List *newargs; + bool haveNull = false; + bool forceFalse = false; + + newargs = simplify_and_arguments(expr->args, + context, + &haveNull, + &forceFalse); + if (forceFalse) + return makeBoolConst(false, false); + if (haveNull) + newargs = lappend(newargs, + makeBoolConst(false, true)); + /* If all the inputs are TRUE, result is TRUE */ + if (newargs == NIL) + return makeBoolConst(true, false); + + /* + * If only one nonconst-or-NULL input, it's the + * result + */ + if (list_length(newargs) == 1) + return (Node *) linitial(newargs); + /* Else we still need an AND node */ + return (Node *) make_andclause(newargs); + } + case NOT_EXPR: + { + Node *arg; + + Assert(list_length(expr->args) == 1); + arg = eval_const_expressions_mutator(linitial(expr->args), + context); + + /* + * Use negate_clause() to see if we can simplify + * away the NOT. + */ + return negate_clause(arg); + } + default: + elog(ERROR, "unrecognized boolop: %d", + (int) expr->boolop); + break; + } + break; + } + case T_SubPlan: + case T_AlternativeSubPlan: + + /* + * Return a SubPlan unchanged --- too late to do anything with it. + * + * XXX should we ereport() here instead? Probably this routine + * should never be invoked after SubPlan creation. + */ + return node; + case T_RelabelType: + { + RelabelType *relabel = (RelabelType *) node; + Node *arg; + + /* Simplify the input ... */ + arg = eval_const_expressions_mutator((Node *) relabel->arg, + context); + /* ... and attach a new RelabelType node, if needed */ + return applyRelabelType(arg, + relabel->resulttype, + relabel->resulttypmod, + relabel->resultcollid, + relabel->relabelformat, + relabel->location, + true); + } + case T_CoerceViaIO: + { + CoerceViaIO *expr = (CoerceViaIO *) node; + List *args; + Oid outfunc; + bool outtypisvarlena; + Oid infunc; + Oid intypioparam; + Expr *simple; + CoerceViaIO *newexpr; + + /* Make a List so we can use simplify_function */ + args = list_make1(expr->arg); + + /* + * CoerceViaIO represents calling the source type's output + * function then the result type's input function. So, try to + * simplify it as though it were a stack of two such function + * calls. First we need to know what the functions are. + * + * Note that the coercion functions are assumed not to care + * about input collation, so we just pass InvalidOid for that. + */ + getTypeOutputInfo(exprType((Node *) expr->arg), + &outfunc, &outtypisvarlena); + getTypeInputInfo(expr->resulttype, + &infunc, &intypioparam); + + simple = simplify_function(outfunc, + CSTRINGOID, -1, + InvalidOid, + InvalidOid, + &args, + false, + true, + true, + context); + if (simple) /* successfully simplified output fn */ + { + /* + * Input functions may want 1 to 3 arguments. We always + * supply all three, trusting that nothing downstream will + * complain. + */ + args = list_make3(simple, + makeConst(OIDOID, + -1, + InvalidOid, + sizeof(Oid), + ObjectIdGetDatum(intypioparam), + false, + true), + makeConst(INT4OID, + -1, + InvalidOid, + sizeof(int32), + Int32GetDatum(-1), + false, + true)); + + simple = simplify_function(infunc, + expr->resulttype, -1, + expr->resultcollid, + InvalidOid, + &args, + false, + false, + true, + context); + if (simple) /* successfully simplified input fn */ + return (Node *) simple; + } + + /* + * The expression cannot be simplified any further, so build + * and return a replacement CoerceViaIO node using the + * possibly-simplified argument. + */ + newexpr = makeNode(CoerceViaIO); + newexpr->arg = (Expr *) linitial(args); + newexpr->resulttype = expr->resulttype; + newexpr->resultcollid = expr->resultcollid; + newexpr->coerceformat = expr->coerceformat; + newexpr->location = expr->location; + return (Node *) newexpr; + } + case T_ArrayCoerceExpr: + { + ArrayCoerceExpr *ac = makeNode(ArrayCoerceExpr); + Node *save_case_val; + + /* + * Copy the node and const-simplify its arguments. We can't + * use ece_generic_processing() here because we need to mess + * with case_val only while processing the elemexpr. + */ + memcpy(ac, node, sizeof(ArrayCoerceExpr)); + ac->arg = (Expr *) + eval_const_expressions_mutator((Node *) ac->arg, + context); + + /* + * Set up for the CaseTestExpr node contained in the elemexpr. + * We must prevent it from absorbing any outer CASE value. + */ + save_case_val = context->case_val; + context->case_val = NULL; + + ac->elemexpr = (Expr *) + eval_const_expressions_mutator((Node *) ac->elemexpr, + context); + + context->case_val = save_case_val; + + /* + * If constant argument and the per-element expression is + * immutable, we can simplify the whole thing to a constant. + * Exception: although contain_mutable_functions considers + * CoerceToDomain immutable for historical reasons, let's not + * do so here; this ensures coercion to an array-over-domain + * does not apply the domain's constraints until runtime. + */ + if (ac->arg && IsA(ac->arg, Const) && + ac->elemexpr && !IsA(ac->elemexpr, CoerceToDomain) && + !contain_mutable_functions((Node *) ac->elemexpr)) + return ece_evaluate_expr(ac); + + return (Node *) ac; + } + case T_CollateExpr: + { + /* + * We replace CollateExpr with RelabelType, so as to improve + * uniformity of expression representation and thus simplify + * comparison of expressions. Hence this looks very nearly + * the same as the RelabelType case, and we can apply the same + * optimizations to avoid unnecessary RelabelTypes. + */ + CollateExpr *collate = (CollateExpr *) node; + Node *arg; + + /* Simplify the input ... */ + arg = eval_const_expressions_mutator((Node *) collate->arg, + context); + /* ... and attach a new RelabelType node, if needed */ + return applyRelabelType(arg, + exprType(arg), + exprTypmod(arg), + collate->collOid, + COERCE_IMPLICIT_CAST, + collate->location, + true); + } + case T_CaseExpr: + { + /*---------- + * CASE expressions can be simplified if there are constant + * condition clauses: + * FALSE (or NULL): drop the alternative + * TRUE: drop all remaining alternatives + * If the first non-FALSE alternative is a constant TRUE, + * we can simplify the entire CASE to that alternative's + * expression. If there are no non-FALSE alternatives, + * we simplify the entire CASE to the default result (ELSE). + * + * If we have a simple-form CASE with constant test + * expression, we substitute the constant value for contained + * CaseTestExpr placeholder nodes, so that we have the + * opportunity to reduce constant test conditions. For + * example this allows + * CASE 0 WHEN 0 THEN 1 ELSE 1/0 END + * to reduce to 1 rather than drawing a divide-by-0 error. + * Note that when the test expression is constant, we don't + * have to include it in the resulting CASE; for example + * CASE 0 WHEN x THEN y ELSE z END + * is transformed by the parser to + * CASE 0 WHEN CaseTestExpr = x THEN y ELSE z END + * which we can simplify to + * CASE WHEN 0 = x THEN y ELSE z END + * It is not necessary for the executor to evaluate the "arg" + * expression when executing the CASE, since any contained + * CaseTestExprs that might have referred to it will have been + * replaced by the constant. + *---------- + */ + CaseExpr *caseexpr = (CaseExpr *) node; + CaseExpr *newcase; + Node *save_case_val; + Node *newarg; + List *newargs; + bool const_true_cond; + Node *defresult = NULL; + ListCell *arg; + + /* Simplify the test expression, if any */ + newarg = eval_const_expressions_mutator((Node *) caseexpr->arg, + context); + + /* Set up for contained CaseTestExpr nodes */ + save_case_val = context->case_val; + if (newarg && IsA(newarg, Const)) + { + context->case_val = newarg; + newarg = NULL; /* not needed anymore, see above */ + } + else + context->case_val = NULL; + + /* Simplify the WHEN clauses */ + newargs = NIL; + const_true_cond = false; + foreach(arg, caseexpr->args) + { + CaseWhen *oldcasewhen = lfirst_node(CaseWhen, arg); + Node *casecond; + Node *caseresult; + + /* Simplify this alternative's test condition */ + casecond = eval_const_expressions_mutator((Node *) oldcasewhen->expr, + context); + + /* + * If the test condition is constant FALSE (or NULL), then + * drop this WHEN clause completely, without processing + * the result. + */ + if (casecond && IsA(casecond, Const)) + { + Const *const_input = (Const *) casecond; + + if (const_input->constisnull || + !DatumGetBool(const_input->constvalue)) + continue; /* drop alternative with FALSE cond */ + /* Else it's constant TRUE */ + const_true_cond = true; + } + + /* Simplify this alternative's result value */ + caseresult = eval_const_expressions_mutator((Node *) oldcasewhen->result, + context); + + /* If non-constant test condition, emit a new WHEN node */ + if (!const_true_cond) + { + CaseWhen *newcasewhen = makeNode(CaseWhen); + + newcasewhen->expr = (Expr *) casecond; + newcasewhen->result = (Expr *) caseresult; + newcasewhen->location = oldcasewhen->location; + newargs = lappend(newargs, newcasewhen); + continue; + } + + /* + * Found a TRUE condition, so none of the remaining + * alternatives can be reached. We treat the result as + * the default result. + */ + defresult = caseresult; + break; + } + + /* Simplify the default result, unless we replaced it above */ + if (!const_true_cond) + defresult = eval_const_expressions_mutator((Node *) caseexpr->defresult, + context); + + context->case_val = save_case_val; + + /* + * If no non-FALSE alternatives, CASE reduces to the default + * result + */ + if (newargs == NIL) + return defresult; + /* Otherwise we need a new CASE node */ + newcase = makeNode(CaseExpr); + newcase->casetype = caseexpr->casetype; + newcase->casecollid = caseexpr->casecollid; + newcase->arg = (Expr *) newarg; + newcase->args = newargs; + newcase->defresult = (Expr *) defresult; + newcase->location = caseexpr->location; + return (Node *) newcase; + } + case T_CaseTestExpr: + { + /* + * If we know a constant test value for the current CASE + * construct, substitute it for the placeholder. Else just + * return the placeholder as-is. + */ + if (context->case_val) + return copyObject(context->case_val); + else + return copyObject(node); + } + case T_SubscriptingRef: + case T_ArrayExpr: + case T_RowExpr: + case T_MinMaxExpr: + { + /* + * Generic handling for node types whose own processing is + * known to be immutable, and for which we need no smarts + * beyond "simplify if all inputs are constants". + * + * Treating SubscriptingRef this way assumes that subscripting + * fetch and assignment are both immutable. This constrains + * type-specific subscripting implementations; maybe we should + * relax it someday. + * + * Treating MinMaxExpr this way amounts to assuming that the + * btree comparison function it calls is immutable; see the + * reasoning in contain_mutable_functions_walker. + */ + + /* Copy the node and const-simplify its arguments */ + node = ece_generic_processing(node); + /* If all arguments are Consts, we can fold to a constant */ + if (ece_all_arguments_const(node)) + return ece_evaluate_expr(node); + return node; + } + case T_CoalesceExpr: + { + CoalesceExpr *coalesceexpr = (CoalesceExpr *) node; + CoalesceExpr *newcoalesce; + List *newargs; + ListCell *arg; + + newargs = NIL; + foreach(arg, coalesceexpr->args) + { + Node *e; + + e = eval_const_expressions_mutator((Node *) lfirst(arg), + context); + + /* + * We can remove null constants from the list. For a + * non-null constant, if it has not been preceded by any + * other non-null-constant expressions then it is the + * result. Otherwise, it's the next argument, but we can + * drop following arguments since they will never be + * reached. + */ + if (IsA(e, Const)) + { + if (((Const *) e)->constisnull) + continue; /* drop null constant */ + if (newargs == NIL) + return e; /* first expr */ + newargs = lappend(newargs, e); + break; + } + newargs = lappend(newargs, e); + } + + /* + * If all the arguments were constant null, the result is just + * null + */ + if (newargs == NIL) + return (Node *) makeNullConst(coalesceexpr->coalescetype, + -1, + coalesceexpr->coalescecollid); + + newcoalesce = makeNode(CoalesceExpr); + newcoalesce->coalescetype = coalesceexpr->coalescetype; + newcoalesce->coalescecollid = coalesceexpr->coalescecollid; + newcoalesce->args = newargs; + newcoalesce->location = coalesceexpr->location; + return (Node *) newcoalesce; + } + case T_SQLValueFunction: + { + /* + * All variants of SQLValueFunction are stable, so if we are + * estimating the expression's value, we should evaluate the + * current function value. Otherwise just copy. + */ + SQLValueFunction *svf = (SQLValueFunction *) node; + + if (context->estimate) + return (Node *) evaluate_expr((Expr *) svf, + svf->type, + svf->typmod, + InvalidOid); + else + return copyObject((Node *) svf); + } + case T_FieldSelect: + { + /* + * We can optimize field selection from a whole-row Var into a + * simple Var. (This case won't be generated directly by the + * parser, because ParseComplexProjection short-circuits it. + * But it can arise while simplifying functions.) Also, we + * can optimize field selection from a RowExpr construct, or + * of course from a constant. + * + * However, replacing a whole-row Var in this way has a + * pitfall: if we've already built the rel targetlist for the + * source relation, then the whole-row Var is scheduled to be + * produced by the relation scan, but the simple Var probably + * isn't, which will lead to a failure in setrefs.c. This is + * not a problem when handling simple single-level queries, in + * which expression simplification always happens first. It + * is a risk for lateral references from subqueries, though. + * To avoid such failures, don't optimize uplevel references. + * + * We must also check that the declared type of the field is + * still the same as when the FieldSelect was created --- this + * can change if someone did ALTER COLUMN TYPE on the rowtype. + * If it isn't, we skip the optimization; the case will + * probably fail at runtime, but that's not our problem here. + */ + FieldSelect *fselect = (FieldSelect *) node; + FieldSelect *newfselect; + Node *arg; + + arg = eval_const_expressions_mutator((Node *) fselect->arg, + context); + if (arg && IsA(arg, Var) && + ((Var *) arg)->varattno == InvalidAttrNumber && + ((Var *) arg)->varlevelsup == 0) + { + if (rowtype_field_matches(((Var *) arg)->vartype, + fselect->fieldnum, + fselect->resulttype, + fselect->resulttypmod, + fselect->resultcollid)) + return (Node *) makeVar(((Var *) arg)->varno, + fselect->fieldnum, + fselect->resulttype, + fselect->resulttypmod, + fselect->resultcollid, + ((Var *) arg)->varlevelsup); + } + if (arg && IsA(arg, RowExpr)) + { + RowExpr *rowexpr = (RowExpr *) arg; + + if (fselect->fieldnum > 0 && + fselect->fieldnum <= list_length(rowexpr->args)) + { + Node *fld = (Node *) list_nth(rowexpr->args, + fselect->fieldnum - 1); + + if (rowtype_field_matches(rowexpr->row_typeid, + fselect->fieldnum, + fselect->resulttype, + fselect->resulttypmod, + fselect->resultcollid) && + fselect->resulttype == exprType(fld) && + fselect->resulttypmod == exprTypmod(fld) && + fselect->resultcollid == exprCollation(fld)) + return fld; + } + } + newfselect = makeNode(FieldSelect); + newfselect->arg = (Expr *) arg; + newfselect->fieldnum = fselect->fieldnum; + newfselect->resulttype = fselect->resulttype; + newfselect->resulttypmod = fselect->resulttypmod; + newfselect->resultcollid = fselect->resultcollid; + if (arg && IsA(arg, Const)) + { + Const *con = (Const *) arg; + + if (rowtype_field_matches(con->consttype, + newfselect->fieldnum, + newfselect->resulttype, + newfselect->resulttypmod, + newfselect->resultcollid)) + return ece_evaluate_expr(newfselect); + } + return (Node *) newfselect; + } + case T_NullTest: + { + NullTest *ntest = (NullTest *) node; + NullTest *newntest; + Node *arg; + + arg = eval_const_expressions_mutator((Node *) ntest->arg, + context); + if (ntest->argisrow && arg && IsA(arg, RowExpr)) + { + /* + * We break ROW(...) IS [NOT] NULL into separate tests on + * its component fields. This form is usually more + * efficient to evaluate, as well as being more amenable + * to optimization. + */ + RowExpr *rarg = (RowExpr *) arg; + List *newargs = NIL; + ListCell *l; + + foreach(l, rarg->args) + { + Node *relem = (Node *) lfirst(l); + + /* + * A constant field refutes the whole NullTest if it's + * of the wrong nullness; else we can discard it. + */ + if (relem && IsA(relem, Const)) + { + Const *carg = (Const *) relem; + + if (carg->constisnull ? + (ntest->nulltesttype == IS_NOT_NULL) : + (ntest->nulltesttype == IS_NULL)) + return makeBoolConst(false, false); + continue; + } + + /* + * Else, make a scalar (argisrow == false) NullTest + * for this field. Scalar semantics are required + * because IS [NOT] NULL doesn't recurse; see comments + * in ExecEvalRowNullInt(). + */ + newntest = makeNode(NullTest); + newntest->arg = (Expr *) relem; + newntest->nulltesttype = ntest->nulltesttype; + newntest->argisrow = false; + newntest->location = ntest->location; + newargs = lappend(newargs, newntest); + } + /* If all the inputs were constants, result is TRUE */ + if (newargs == NIL) + return makeBoolConst(true, false); + /* If only one nonconst input, it's the result */ + if (list_length(newargs) == 1) + return (Node *) linitial(newargs); + /* Else we need an AND node */ + return (Node *) make_andclause(newargs); + } + if (!ntest->argisrow && arg && IsA(arg, Const)) + { + Const *carg = (Const *) arg; + bool result; + + switch (ntest->nulltesttype) + { + case IS_NULL: + result = carg->constisnull; + break; + case IS_NOT_NULL: + result = !carg->constisnull; + break; + default: + elog(ERROR, "unrecognized nulltesttype: %d", + (int) ntest->nulltesttype); + result = false; /* keep compiler quiet */ + break; + } + + return makeBoolConst(result, false); + } + + newntest = makeNode(NullTest); + newntest->arg = (Expr *) arg; + newntest->nulltesttype = ntest->nulltesttype; + newntest->argisrow = ntest->argisrow; + newntest->location = ntest->location; + return (Node *) newntest; + } + case T_BooleanTest: + { + /* + * This case could be folded into the generic handling used + * for ArrayExpr etc. But because the simplification logic is + * so trivial, applying evaluate_expr() to perform it would be + * a heavy overhead. BooleanTest is probably common enough to + * justify keeping this bespoke implementation. + */ + BooleanTest *btest = (BooleanTest *) node; + BooleanTest *newbtest; + Node *arg; + + arg = eval_const_expressions_mutator((Node *) btest->arg, + context); + if (arg && IsA(arg, Const)) + { + Const *carg = (Const *) arg; + bool result; + + switch (btest->booltesttype) + { + case IS_TRUE: + result = (!carg->constisnull && + DatumGetBool(carg->constvalue)); + break; + case IS_NOT_TRUE: + result = (carg->constisnull || + !DatumGetBool(carg->constvalue)); + break; + case IS_FALSE: + result = (!carg->constisnull && + !DatumGetBool(carg->constvalue)); + break; + case IS_NOT_FALSE: + result = (carg->constisnull || + DatumGetBool(carg->constvalue)); + break; + case IS_UNKNOWN: + result = carg->constisnull; + break; + case IS_NOT_UNKNOWN: + result = !carg->constisnull; + break; + default: + elog(ERROR, "unrecognized booltesttype: %d", + (int) btest->booltesttype); + result = false; /* keep compiler quiet */ + break; + } + + return makeBoolConst(result, false); + } + + newbtest = makeNode(BooleanTest); + newbtest->arg = (Expr *) arg; + newbtest->booltesttype = btest->booltesttype; + newbtest->location = btest->location; + return (Node *) newbtest; + } + case T_CoerceToDomain: + { + /* + * If the domain currently has no constraints, we replace the + * CoerceToDomain node with a simple RelabelType, which is + * both far faster to execute and more amenable to later + * optimization. We must then mark the plan as needing to be + * rebuilt if the domain's constraints change. + * + * Also, in estimation mode, always replace CoerceToDomain + * nodes, effectively assuming that the coercion will succeed. + */ + CoerceToDomain *cdomain = (CoerceToDomain *) node; + CoerceToDomain *newcdomain; + Node *arg; + + arg = eval_const_expressions_mutator((Node *) cdomain->arg, + context); + if (context->estimate || + !DomainHasConstraints(cdomain->resulttype)) + { + /* Record dependency, if this isn't estimation mode */ + if (context->root && !context->estimate) + record_plan_type_dependency(context->root, + cdomain->resulttype); + + /* Generate RelabelType to substitute for CoerceToDomain */ + return applyRelabelType(arg, + cdomain->resulttype, + cdomain->resulttypmod, + cdomain->resultcollid, + cdomain->coercionformat, + cdomain->location, + true); + } + + newcdomain = makeNode(CoerceToDomain); + newcdomain->arg = (Expr *) arg; + newcdomain->resulttype = cdomain->resulttype; + newcdomain->resulttypmod = cdomain->resulttypmod; + newcdomain->resultcollid = cdomain->resultcollid; + newcdomain->coercionformat = cdomain->coercionformat; + newcdomain->location = cdomain->location; + return (Node *) newcdomain; + } + case T_PlaceHolderVar: + + /* + * In estimation mode, just strip the PlaceHolderVar node + * altogether; this amounts to estimating that the contained value + * won't be forced to null by an outer join. In regular mode we + * just use the default behavior (ie, simplify the expression but + * leave the PlaceHolderVar node intact). + */ + if (context->estimate) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + return eval_const_expressions_mutator((Node *) phv->phexpr, + context); + } + break; + case T_ConvertRowtypeExpr: + { + ConvertRowtypeExpr *cre = castNode(ConvertRowtypeExpr, node); + Node *arg; + ConvertRowtypeExpr *newcre; + + arg = eval_const_expressions_mutator((Node *) cre->arg, + context); + + newcre = makeNode(ConvertRowtypeExpr); + newcre->resulttype = cre->resulttype; + newcre->convertformat = cre->convertformat; + newcre->location = cre->location; + + /* + * In case of a nested ConvertRowtypeExpr, we can convert the + * leaf row directly to the topmost row format without any + * intermediate conversions. (This works because + * ConvertRowtypeExpr is used only for child->parent + * conversion in inheritance trees, which works by exact match + * of column name, and a column absent in an intermediate + * result can't be present in the final result.) + * + * No need to check more than one level deep, because the + * above recursion will have flattened anything else. + */ + if (arg != NULL && IsA(arg, ConvertRowtypeExpr)) + { + ConvertRowtypeExpr *argcre = (ConvertRowtypeExpr *) arg; + + arg = (Node *) argcre->arg; + + /* + * Make sure an outer implicit conversion can't hide an + * inner explicit one. + */ + if (newcre->convertformat == COERCE_IMPLICIT_CAST) + newcre->convertformat = argcre->convertformat; + } + + newcre->arg = (Expr *) arg; + + if (arg != NULL && IsA(arg, Const)) + return ece_evaluate_expr((Node *) newcre); + return (Node *) newcre; + } + default: + break; + } + + /* + * For any node type not handled above, copy the node unchanged but + * const-simplify its subexpressions. This is the correct thing for node + * types whose behavior might change between planning and execution, such + * as CurrentOfExpr. It's also a safe default for new node types not + * known to this routine. + */ + return ece_generic_processing(node); +} + +/* + * Subroutine for eval_const_expressions: check for non-Const nodes. + * + * We can abort recursion immediately on finding a non-Const node. This is + * critical for performance, else eval_const_expressions_mutator would take + * O(N^2) time on non-simplifiable trees. However, we do need to descend + * into List nodes since expression_tree_walker sometimes invokes the walker + * function directly on List subtrees. + */ +static bool +contain_non_const_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, Const)) + return false; + if (IsA(node, List)) + return expression_tree_walker(node, contain_non_const_walker, context); + /* Otherwise, abort the tree traversal and return true */ + return true; +} + +/* + * Subroutine for eval_const_expressions: check if a function is OK to evaluate + */ +static bool +ece_function_is_safe(Oid funcid, eval_const_expressions_context *context) +{ + char provolatile = func_volatile(funcid); + + /* + * Ordinarily we are only allowed to simplify immutable functions. But for + * purposes of estimation, we consider it okay to simplify functions that + * are merely stable; the risk that the result might change from planning + * time to execution time is worth taking in preference to not being able + * to estimate the value at all. + */ + if (provolatile == PROVOLATILE_IMMUTABLE) + return true; + if (context->estimate && provolatile == PROVOLATILE_STABLE) + return true; + return false; +} + +/* + * Subroutine for eval_const_expressions: process arguments of an OR clause + * + * This includes flattening of nested ORs as well as recursion to + * eval_const_expressions to simplify the OR arguments. + * + * After simplification, OR arguments are handled as follows: + * non constant: keep + * FALSE: drop (does not affect result) + * TRUE: force result to TRUE + * NULL: keep only one + * We must keep one NULL input because OR expressions evaluate to NULL when no + * input is TRUE and at least one is NULL. We don't actually include the NULL + * here, that's supposed to be done by the caller. + * + * The output arguments *haveNull and *forceTrue must be initialized false + * by the caller. They will be set true if a NULL constant or TRUE constant, + * respectively, is detected anywhere in the argument list. + */ +static List * +simplify_or_arguments(List *args, + eval_const_expressions_context *context, + bool *haveNull, bool *forceTrue) +{ + List *newargs = NIL; + List *unprocessed_args; + + /* + * We want to ensure that any OR immediately beneath another OR gets + * flattened into a single OR-list, so as to simplify later reasoning. + * + * To avoid stack overflow from recursion of eval_const_expressions, we + * resort to some tenseness here: we keep a list of not-yet-processed + * inputs, and handle flattening of nested ORs by prepending to the to-do + * list instead of recursing. Now that the parser generates N-argument + * ORs from simple lists, this complexity is probably less necessary than + * it once was, but we might as well keep the logic. + */ + unprocessed_args = list_copy(args); + while (unprocessed_args) + { + Node *arg = (Node *) linitial(unprocessed_args); + + unprocessed_args = list_delete_first(unprocessed_args); + + /* flatten nested ORs as per above comment */ + if (is_orclause(arg)) + { + List *subargs = ((BoolExpr *) arg)->args; + List *oldlist = unprocessed_args; + + unprocessed_args = list_concat_copy(subargs, unprocessed_args); + /* perhaps-overly-tense code to avoid leaking old lists */ + list_free(oldlist); + continue; + } + + /* If it's not an OR, simplify it */ + arg = eval_const_expressions_mutator(arg, context); + + /* + * It is unlikely but not impossible for simplification of a non-OR + * clause to produce an OR. Recheck, but don't be too tense about it + * since it's not a mainstream case. In particular we don't worry + * about const-simplifying the input twice, nor about list leakage. + */ + if (is_orclause(arg)) + { + List *subargs = ((BoolExpr *) arg)->args; + + unprocessed_args = list_concat_copy(subargs, unprocessed_args); + continue; + } + + /* + * OK, we have a const-simplified non-OR argument. Process it per + * comments above. + */ + if (IsA(arg, Const)) + { + Const *const_input = (Const *) arg; + + if (const_input->constisnull) + *haveNull = true; + else if (DatumGetBool(const_input->constvalue)) + { + *forceTrue = true; + + /* + * Once we detect a TRUE result we can just exit the loop + * immediately. However, if we ever add a notion of + * non-removable functions, we'd need to keep scanning. + */ + return NIL; + } + /* otherwise, we can drop the constant-false input */ + continue; + } + + /* else emit the simplified arg into the result list */ + newargs = lappend(newargs, arg); + } + + return newargs; +} + +/* + * Subroutine for eval_const_expressions: process arguments of an AND clause + * + * This includes flattening of nested ANDs as well as recursion to + * eval_const_expressions to simplify the AND arguments. + * + * After simplification, AND arguments are handled as follows: + * non constant: keep + * TRUE: drop (does not affect result) + * FALSE: force result to FALSE + * NULL: keep only one + * We must keep one NULL input because AND expressions evaluate to NULL when + * no input is FALSE and at least one is NULL. We don't actually include the + * NULL here, that's supposed to be done by the caller. + * + * The output arguments *haveNull and *forceFalse must be initialized false + * by the caller. They will be set true if a null constant or false constant, + * respectively, is detected anywhere in the argument list. + */ +static List * +simplify_and_arguments(List *args, + eval_const_expressions_context *context, + bool *haveNull, bool *forceFalse) +{ + List *newargs = NIL; + List *unprocessed_args; + + /* See comments in simplify_or_arguments */ + unprocessed_args = list_copy(args); + while (unprocessed_args) + { + Node *arg = (Node *) linitial(unprocessed_args); + + unprocessed_args = list_delete_first(unprocessed_args); + + /* flatten nested ANDs as per above comment */ + if (is_andclause(arg)) + { + List *subargs = ((BoolExpr *) arg)->args; + List *oldlist = unprocessed_args; + + unprocessed_args = list_concat_copy(subargs, unprocessed_args); + /* perhaps-overly-tense code to avoid leaking old lists */ + list_free(oldlist); + continue; + } + + /* If it's not an AND, simplify it */ + arg = eval_const_expressions_mutator(arg, context); + + /* + * It is unlikely but not impossible for simplification of a non-AND + * clause to produce an AND. Recheck, but don't be too tense about it + * since it's not a mainstream case. In particular we don't worry + * about const-simplifying the input twice, nor about list leakage. + */ + if (is_andclause(arg)) + { + List *subargs = ((BoolExpr *) arg)->args; + + unprocessed_args = list_concat_copy(subargs, unprocessed_args); + continue; + } + + /* + * OK, we have a const-simplified non-AND argument. Process it per + * comments above. + */ + if (IsA(arg, Const)) + { + Const *const_input = (Const *) arg; + + if (const_input->constisnull) + *haveNull = true; + else if (!DatumGetBool(const_input->constvalue)) + { + *forceFalse = true; + + /* + * Once we detect a FALSE result we can just exit the loop + * immediately. However, if we ever add a notion of + * non-removable functions, we'd need to keep scanning. + */ + return NIL; + } + /* otherwise, we can drop the constant-true input */ + continue; + } + + /* else emit the simplified arg into the result list */ + newargs = lappend(newargs, arg); + } + + return newargs; +} + +/* + * Subroutine for eval_const_expressions: try to simplify boolean equality + * or inequality condition + * + * Inputs are the operator OID and the simplified arguments to the operator. + * Returns a simplified expression if successful, or NULL if cannot + * simplify the expression. + * + * The idea here is to reduce "x = true" to "x" and "x = false" to "NOT x", + * or similarly "x <> true" to "NOT x" and "x <> false" to "x". + * This is only marginally useful in itself, but doing it in constant folding + * ensures that we will recognize these forms as being equivalent in, for + * example, partial index matching. + * + * We come here only if simplify_function has failed; therefore we cannot + * see two constant inputs, nor a constant-NULL input. + */ +static Node * +simplify_boolean_equality(Oid opno, List *args) +{ + Node *leftop; + Node *rightop; + + Assert(list_length(args) == 2); + leftop = linitial(args); + rightop = lsecond(args); + if (leftop && IsA(leftop, Const)) + { + Assert(!((Const *) leftop)->constisnull); + if (opno == BooleanEqualOperator) + { + if (DatumGetBool(((Const *) leftop)->constvalue)) + return rightop; /* true = foo */ + else + return negate_clause(rightop); /* false = foo */ + } + else + { + if (DatumGetBool(((Const *) leftop)->constvalue)) + return negate_clause(rightop); /* true <> foo */ + else + return rightop; /* false <> foo */ + } + } + if (rightop && IsA(rightop, Const)) + { + Assert(!((Const *) rightop)->constisnull); + if (opno == BooleanEqualOperator) + { + if (DatumGetBool(((Const *) rightop)->constvalue)) + return leftop; /* foo = true */ + else + return negate_clause(leftop); /* foo = false */ + } + else + { + if (DatumGetBool(((Const *) rightop)->constvalue)) + return negate_clause(leftop); /* foo <> true */ + else + return leftop; /* foo <> false */ + } + } + return NULL; +} + +/* + * Subroutine for eval_const_expressions: try to simplify a function call + * (which might originally have been an operator; we don't care) + * + * Inputs are the function OID, actual result type OID (which is needed for + * polymorphic functions), result typmod, result collation, the input + * collation to use for the function, the original argument list (not + * const-simplified yet, unless process_args is false), and some flags; + * also the context data for eval_const_expressions. + * + * Returns a simplified expression if successful, or NULL if cannot + * simplify the function call. + * + * This function is also responsible for converting named-notation argument + * lists into positional notation and/or adding any needed default argument + * expressions; which is a bit grotty, but it avoids extra fetches of the + * function's pg_proc tuple. For this reason, the args list is + * pass-by-reference. Conversion and const-simplification of the args list + * will be done even if simplification of the function call itself is not + * possible. + */ +static Expr * +simplify_function(Oid funcid, Oid result_type, int32 result_typmod, + Oid result_collid, Oid input_collid, List **args_p, + bool funcvariadic, bool process_args, bool allow_non_const, + eval_const_expressions_context *context) +{ + List *args = *args_p; + HeapTuple func_tuple; + Form_pg_proc func_form; + Expr *newexpr; + + /* + * We have three strategies for simplification: execute the function to + * deliver a constant result, use a transform function to generate a + * substitute node tree, or expand in-line the body of the function + * definition (which only works for simple SQL-language functions, but + * that is a common case). Each case needs access to the function's + * pg_proc tuple, so fetch it just once. + * + * Note: the allow_non_const flag suppresses both the second and third + * strategies; so if !allow_non_const, simplify_function can only return a + * Const or NULL. Argument-list rewriting happens anyway, though. + */ + func_tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(func_tuple)) + elog(ERROR, "cache lookup failed for function %u", funcid); + func_form = (Form_pg_proc) GETSTRUCT(func_tuple); + + /* + * Process the function arguments, unless the caller did it already. + * + * Here we must deal with named or defaulted arguments, and then + * recursively apply eval_const_expressions to the whole argument list. + */ + if (process_args) + { + args = expand_function_arguments(args, false, result_type, func_tuple); + args = (List *) expression_tree_mutator((Node *) args, + eval_const_expressions_mutator, + (void *) context); + /* Argument processing done, give it back to the caller */ + *args_p = args; + } + + /* Now attempt simplification of the function call proper. */ + + newexpr = evaluate_function(funcid, result_type, result_typmod, + result_collid, input_collid, + args, funcvariadic, + func_tuple, context); + + if (!newexpr && allow_non_const && OidIsValid(func_form->prosupport)) + { + /* + * Build a SupportRequestSimplify node to pass to the support + * function, pointing to a dummy FuncExpr node containing the + * simplified arg list. We use this approach to present a uniform + * interface to the support function regardless of how the target + * function is actually being invoked. + */ + SupportRequestSimplify req; + FuncExpr fexpr; + + fexpr.xpr.type = T_FuncExpr; + fexpr.funcid = funcid; + fexpr.funcresulttype = result_type; + fexpr.funcretset = func_form->proretset; + fexpr.funcvariadic = funcvariadic; + fexpr.funcformat = COERCE_EXPLICIT_CALL; + fexpr.funccollid = result_collid; + fexpr.inputcollid = input_collid; + fexpr.args = args; + fexpr.location = -1; + + req.type = T_SupportRequestSimplify; + req.root = context->root; + req.fcall = &fexpr; + + newexpr = (Expr *) + DatumGetPointer(OidFunctionCall1(func_form->prosupport, + PointerGetDatum(&req))); + + /* catch a possible API misunderstanding */ + Assert(newexpr != (Expr *) &fexpr); + } + + if (!newexpr && allow_non_const) + newexpr = inline_function(funcid, result_type, result_collid, + input_collid, args, funcvariadic, + func_tuple, context); + + ReleaseSysCache(func_tuple); + + return newexpr; +} + +/* + * expand_function_arguments: convert named-notation args to positional args + * and/or insert default args, as needed + * + * Returns a possibly-transformed version of the args list. + * + * If include_out_arguments is true, then the args list and the result + * include OUT arguments. + * + * The expected result type of the call must be given, for sanity-checking + * purposes. Also, we ask the caller to provide the function's actual + * pg_proc tuple, not just its OID. + * + * If we need to change anything, the input argument list is copied, not + * modified. + * + * Note: this gets applied to operator argument lists too, even though the + * cases it handles should never occur there. This should be OK since it + * will fall through very quickly if there's nothing to do. + */ +List * +expand_function_arguments(List *args, bool include_out_arguments, + Oid result_type, HeapTuple func_tuple) +{ + Form_pg_proc funcform = (Form_pg_proc) GETSTRUCT(func_tuple); + Oid *proargtypes = funcform->proargtypes.values; + int pronargs = funcform->pronargs; + bool has_named_args = false; + ListCell *lc; + + /* + * If we are asked to match to OUT arguments, then use the proallargtypes + * array (which includes those); otherwise use proargtypes (which + * doesn't). Of course, if proallargtypes is null, we always use + * proargtypes. (Fetching proallargtypes is annoyingly expensive + * considering that we may have nothing to do here, but fortunately the + * common case is include_out_arguments == false.) + */ + if (include_out_arguments) + { + Datum proallargtypes; + bool isNull; + + proallargtypes = SysCacheGetAttr(PROCOID, func_tuple, + Anum_pg_proc_proallargtypes, + &isNull); + if (!isNull) + { + ArrayType *arr = DatumGetArrayTypeP(proallargtypes); + + pronargs = ARR_DIMS(arr)[0]; + if (ARR_NDIM(arr) != 1 || + pronargs < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != OIDOID) + elog(ERROR, "proallargtypes is not a 1-D Oid array or it contains nulls"); + Assert(pronargs >= funcform->pronargs); + proargtypes = (Oid *) ARR_DATA_PTR(arr); + } + } + + /* Do we have any named arguments? */ + foreach(lc, args) + { + Node *arg = (Node *) lfirst(lc); + + if (IsA(arg, NamedArgExpr)) + { + has_named_args = true; + break; + } + } + + /* If so, we must apply reorder_function_arguments */ + if (has_named_args) + { + args = reorder_function_arguments(args, pronargs, func_tuple); + /* Recheck argument types and add casts if needed */ + recheck_cast_function_args(args, result_type, + proargtypes, pronargs, + func_tuple); + } + else if (list_length(args) < pronargs) + { + /* No named args, but we seem to be short some defaults */ + args = add_function_defaults(args, pronargs, func_tuple); + /* Recheck argument types and add casts if needed */ + recheck_cast_function_args(args, result_type, + proargtypes, pronargs, + func_tuple); + } + + return args; +} + +/* + * reorder_function_arguments: convert named-notation args to positional args + * + * This function also inserts default argument values as needed, since it's + * impossible to form a truly valid positional call without that. + */ +static List * +reorder_function_arguments(List *args, int pronargs, HeapTuple func_tuple) +{ + Form_pg_proc funcform = (Form_pg_proc) GETSTRUCT(func_tuple); + int nargsprovided = list_length(args); + Node *argarray[FUNC_MAX_ARGS]; + ListCell *lc; + int i; + + Assert(nargsprovided <= pronargs); + if (pronargs < 0 || pronargs > FUNC_MAX_ARGS) + elog(ERROR, "too many function arguments"); + memset(argarray, 0, pronargs * sizeof(Node *)); + + /* Deconstruct the argument list into an array indexed by argnumber */ + i = 0; + foreach(lc, args) + { + Node *arg = (Node *) lfirst(lc); + + if (!IsA(arg, NamedArgExpr)) + { + /* positional argument, assumed to precede all named args */ + Assert(argarray[i] == NULL); + argarray[i++] = arg; + } + else + { + NamedArgExpr *na = (NamedArgExpr *) arg; + + Assert(na->argnumber >= 0 && na->argnumber < pronargs); + Assert(argarray[na->argnumber] == NULL); + argarray[na->argnumber] = (Node *) na->arg; + } + } + + /* + * Fetch default expressions, if needed, and insert into array at proper + * locations (they aren't necessarily consecutive or all used) + */ + if (nargsprovided < pronargs) + { + List *defaults = fetch_function_defaults(func_tuple); + + i = pronargs - funcform->pronargdefaults; + foreach(lc, defaults) + { + if (argarray[i] == NULL) + argarray[i] = (Node *) lfirst(lc); + i++; + } + } + + /* Now reconstruct the args list in proper order */ + args = NIL; + for (i = 0; i < pronargs; i++) + { + Assert(argarray[i] != NULL); + args = lappend(args, argarray[i]); + } + + return args; +} + +/* + * add_function_defaults: add missing function arguments from its defaults + * + * This is used only when the argument list was positional to begin with, + * and so we know we just need to add defaults at the end. + */ +static List * +add_function_defaults(List *args, int pronargs, HeapTuple func_tuple) +{ + int nargsprovided = list_length(args); + List *defaults; + int ndelete; + + /* Get all the default expressions from the pg_proc tuple */ + defaults = fetch_function_defaults(func_tuple); + + /* Delete any unused defaults from the list */ + ndelete = nargsprovided + list_length(defaults) - pronargs; + if (ndelete < 0) + elog(ERROR, "not enough default arguments"); + if (ndelete > 0) + defaults = list_delete_first_n(defaults, ndelete); + + /* And form the combined argument list, not modifying the input list */ + return list_concat_copy(args, defaults); +} + +/* + * fetch_function_defaults: get function's default arguments as expression list + */ +static List * +fetch_function_defaults(HeapTuple func_tuple) +{ + List *defaults; + Datum proargdefaults; + bool isnull; + char *str; + + /* The error cases here shouldn't happen, but check anyway */ + proargdefaults = SysCacheGetAttr(PROCOID, func_tuple, + Anum_pg_proc_proargdefaults, + &isnull); + if (isnull) + elog(ERROR, "not enough default arguments"); + str = TextDatumGetCString(proargdefaults); + defaults = castNode(List, stringToNode(str)); + pfree(str); + return defaults; +} + +/* + * recheck_cast_function_args: recheck function args and typecast as needed + * after adding defaults. + * + * It is possible for some of the defaulted arguments to be polymorphic; + * therefore we can't assume that the default expressions have the correct + * data types already. We have to re-resolve polymorphics and do coercion + * just like the parser did. + * + * This should be a no-op if there are no polymorphic arguments, + * but we do it anyway to be sure. + * + * Note: if any casts are needed, the args list is modified in-place; + * caller should have already copied the list structure. + */ +static void +recheck_cast_function_args(List *args, Oid result_type, + Oid *proargtypes, int pronargs, + HeapTuple func_tuple) +{ + Form_pg_proc funcform = (Form_pg_proc) GETSTRUCT(func_tuple); + int nargs; + Oid actual_arg_types[FUNC_MAX_ARGS]; + Oid declared_arg_types[FUNC_MAX_ARGS]; + Oid rettype; + ListCell *lc; + + if (list_length(args) > FUNC_MAX_ARGS) + elog(ERROR, "too many function arguments"); + nargs = 0; + foreach(lc, args) + { + actual_arg_types[nargs++] = exprType((Node *) lfirst(lc)); + } + Assert(nargs == pronargs); + memcpy(declared_arg_types, proargtypes, pronargs * sizeof(Oid)); + rettype = enforce_generic_type_consistency(actual_arg_types, + declared_arg_types, + nargs, + funcform->prorettype, + false); + /* let's just check we got the same answer as the parser did ... */ + if (rettype != result_type) + elog(ERROR, "function's resolved result type changed during planning"); + + /* perform any necessary typecasting of arguments */ + make_fn_arguments(NULL, args, actual_arg_types, declared_arg_types); +} + +/* + * evaluate_function: try to pre-evaluate a function call + * + * We can do this if the function is strict and has any constant-null inputs + * (just return a null constant), or if the function is immutable and has all + * constant inputs (call it and return the result as a Const node). In + * estimation mode we are willing to pre-evaluate stable functions too. + * + * Returns a simplified expression if successful, or NULL if cannot + * simplify the function. + */ +static Expr * +evaluate_function(Oid funcid, Oid result_type, int32 result_typmod, + Oid result_collid, Oid input_collid, List *args, + bool funcvariadic, + HeapTuple func_tuple, + eval_const_expressions_context *context) +{ + Form_pg_proc funcform = (Form_pg_proc) GETSTRUCT(func_tuple); + bool has_nonconst_input = false; + bool has_null_input = false; + ListCell *arg; + FuncExpr *newexpr; + + /* + * Can't simplify if it returns a set. + */ + if (funcform->proretset) + return NULL; + + /* + * Can't simplify if it returns RECORD. The immediate problem is that it + * will be needing an expected tupdesc which we can't supply here. + * + * In the case where it has OUT parameters, it could get by without an + * expected tupdesc, but we still have issues: get_expr_result_type() + * doesn't know how to extract type info from a RECORD constant, and in + * the case of a NULL function result there doesn't seem to be any clean + * way to fix that. In view of the likelihood of there being still other + * gotchas, seems best to leave the function call unreduced. + */ + if (funcform->prorettype == RECORDOID) + return NULL; + + /* + * Check for constant inputs and especially constant-NULL inputs. + */ + foreach(arg, args) + { + if (IsA(lfirst(arg), Const)) + has_null_input |= ((Const *) lfirst(arg))->constisnull; + else + has_nonconst_input = true; + } + + /* + * If the function is strict and has a constant-NULL input, it will never + * be called at all, so we can replace the call by a NULL constant, even + * if there are other inputs that aren't constant, and even if the + * function is not otherwise immutable. + */ + if (funcform->proisstrict && has_null_input) + return (Expr *) makeNullConst(result_type, result_typmod, + result_collid); + + /* + * Otherwise, can simplify only if all inputs are constants. (For a + * non-strict function, constant NULL inputs are treated the same as + * constant non-NULL inputs.) + */ + if (has_nonconst_input) + return NULL; + + /* + * Ordinarily we are only allowed to simplify immutable functions. But for + * purposes of estimation, we consider it okay to simplify functions that + * are merely stable; the risk that the result might change from planning + * time to execution time is worth taking in preference to not being able + * to estimate the value at all. + */ + if (funcform->provolatile == PROVOLATILE_IMMUTABLE) + /* okay */ ; + else if (context->estimate && funcform->provolatile == PROVOLATILE_STABLE) + /* okay */ ; + else + return NULL; + + /* + * OK, looks like we can simplify this operator/function. + * + * Build a new FuncExpr node containing the already-simplified arguments. + */ + newexpr = makeNode(FuncExpr); + newexpr->funcid = funcid; + newexpr->funcresulttype = result_type; + newexpr->funcretset = false; + newexpr->funcvariadic = funcvariadic; + newexpr->funcformat = COERCE_EXPLICIT_CALL; /* doesn't matter */ + newexpr->funccollid = result_collid; /* doesn't matter */ + newexpr->inputcollid = input_collid; + newexpr->args = args; + newexpr->location = -1; + + return evaluate_expr((Expr *) newexpr, result_type, result_typmod, + result_collid); +} + +/* + * inline_function: try to expand a function call inline + * + * If the function is a sufficiently simple SQL-language function + * (just "SELECT expression"), then we can inline it and avoid the rather + * high per-call overhead of SQL functions. Furthermore, this can expose + * opportunities for constant-folding within the function expression. + * + * We have to beware of some special cases however. A directly or + * indirectly recursive function would cause us to recurse forever, + * so we keep track of which functions we are already expanding and + * do not re-expand them. Also, if a parameter is used more than once + * in the SQL-function body, we require it not to contain any volatile + * functions (volatiles might deliver inconsistent answers) nor to be + * unreasonably expensive to evaluate. The expensiveness check not only + * prevents us from doing multiple evaluations of an expensive parameter + * at runtime, but is a safety value to limit growth of an expression due + * to repeated inlining. + * + * We must also beware of changing the volatility or strictness status of + * functions by inlining them. + * + * Also, at the moment we can't inline functions returning RECORD. This + * doesn't work in the general case because it discards information such + * as OUT-parameter declarations. + * + * Also, context-dependent expression nodes in the argument list are trouble. + * + * Returns a simplified expression if successful, or NULL if cannot + * simplify the function. + */ +static Expr * +inline_function(Oid funcid, Oid result_type, Oid result_collid, + Oid input_collid, List *args, + bool funcvariadic, + HeapTuple func_tuple, + eval_const_expressions_context *context) +{ + Form_pg_proc funcform = (Form_pg_proc) GETSTRUCT(func_tuple); + char *src; + Datum tmp; + bool isNull; + MemoryContext oldcxt; + MemoryContext mycxt; + inline_error_callback_arg callback_arg; + ErrorContextCallback sqlerrcontext; + FuncExpr *fexpr; + SQLFunctionParseInfoPtr pinfo; + TupleDesc rettupdesc; + ParseState *pstate; + List *raw_parsetree_list; + List *querytree_list; + Query *querytree; + Node *newexpr; + int *usecounts; + ListCell *arg; + int i; + + /* + * Forget it if the function is not SQL-language or has other showstopper + * properties. (The prokind and nargs checks are just paranoia.) + */ + if (funcform->prolang != SQLlanguageId || + funcform->prokind != PROKIND_FUNCTION || + funcform->prosecdef || + funcform->proretset || + funcform->prorettype == RECORDOID || + !heap_attisnull(func_tuple, Anum_pg_proc_proconfig, NULL) || + funcform->pronargs != list_length(args)) + return NULL; + + /* Check for recursive function, and give up trying to expand if so */ + if (list_member_oid(context->active_fns, funcid)) + return NULL; + + /* Check permission to call function (fail later, if not) */ + if (pg_proc_aclcheck(funcid, GetUserId(), ACL_EXECUTE) != ACLCHECK_OK) + return NULL; + + /* Check whether a plugin wants to hook function entry/exit */ + if (FmgrHookIsNeeded(funcid)) + return NULL; + + /* + * Make a temporary memory context, so that we don't leak all the stuff + * that parsing might create. + */ + mycxt = AllocSetContextCreate(CurrentMemoryContext, + "inline_function", + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(mycxt); + + /* + * We need a dummy FuncExpr node containing the already-simplified + * arguments. (In some cases we don't really need it, but building it is + * cheap enough that it's not worth contortions to avoid.) + */ + fexpr = makeNode(FuncExpr); + fexpr->funcid = funcid; + fexpr->funcresulttype = result_type; + fexpr->funcretset = false; + fexpr->funcvariadic = funcvariadic; + fexpr->funcformat = COERCE_EXPLICIT_CALL; /* doesn't matter */ + fexpr->funccollid = result_collid; /* doesn't matter */ + fexpr->inputcollid = input_collid; + fexpr->args = args; + fexpr->location = -1; + + /* Fetch the function body */ + tmp = SysCacheGetAttr(PROCOID, + func_tuple, + Anum_pg_proc_prosrc, + &isNull); + if (isNull) + elog(ERROR, "null prosrc for function %u", funcid); + src = TextDatumGetCString(tmp); + + /* + * Setup error traceback support for ereport(). This is so that we can + * finger the function that bad information came from. + */ + callback_arg.proname = NameStr(funcform->proname); + callback_arg.prosrc = src; + + sqlerrcontext.callback = sql_inline_error_callback; + sqlerrcontext.arg = (void *) &callback_arg; + sqlerrcontext.previous = error_context_stack; + error_context_stack = &sqlerrcontext; + + /* If we have prosqlbody, pay attention to that not prosrc */ + tmp = SysCacheGetAttr(PROCOID, + func_tuple, + Anum_pg_proc_prosqlbody, + &isNull); + if (!isNull) + { + Node *n; + List *querytree_list; + + n = stringToNode(TextDatumGetCString(tmp)); + if (IsA(n, List)) + querytree_list = linitial_node(List, castNode(List, n)); + else + querytree_list = list_make1(n); + if (list_length(querytree_list) != 1) + goto fail; + querytree = linitial(querytree_list); + + /* + * Because we'll insist below that the querytree have an empty rtable + * and no sublinks, it cannot have any relation references that need + * to be locked or rewritten. So we can omit those steps. + */ + } + else + { + /* Set up to handle parameters while parsing the function body. */ + pinfo = prepare_sql_fn_parse_info(func_tuple, + (Node *) fexpr, + input_collid); + + /* + * We just do parsing and parse analysis, not rewriting, because + * rewriting will not affect table-free-SELECT-only queries, which is + * all that we care about. Also, we can punt as soon as we detect + * more than one command in the function body. + */ + raw_parsetree_list = pg_parse_query(src); + if (list_length(raw_parsetree_list) != 1) + goto fail; + + pstate = make_parsestate(NULL); + pstate->p_sourcetext = src; + sql_fn_parser_setup(pstate, pinfo); + + querytree = transformTopLevelStmt(pstate, linitial(raw_parsetree_list)); + + free_parsestate(pstate); + } + + /* + * The single command must be a simple "SELECT expression". + * + * Note: if you change the tests involved in this, see also plpgsql's + * exec_simple_check_plan(). That generally needs to have the same idea + * of what's a "simple expression", so that inlining a function that + * previously wasn't inlined won't change plpgsql's conclusion. + */ + if (!IsA(querytree, Query) || + querytree->commandType != CMD_SELECT || + querytree->hasAggs || + querytree->hasWindowFuncs || + querytree->hasTargetSRFs || + querytree->hasSubLinks || + querytree->cteList || + querytree->rtable || + querytree->jointree->fromlist || + querytree->jointree->quals || + querytree->groupClause || + querytree->groupingSets || + querytree->havingQual || + querytree->windowClause || + querytree->distinctClause || + querytree->sortClause || + querytree->limitOffset || + querytree->limitCount || + querytree->setOperations || + list_length(querytree->targetList) != 1) + goto fail; + + /* If the function result is composite, resolve it */ + (void) get_expr_result_type((Node *) fexpr, + NULL, + &rettupdesc); + + /* + * Make sure the function (still) returns what it's declared to. This + * will raise an error if wrong, but that's okay since the function would + * fail at runtime anyway. Note that check_sql_fn_retval will also insert + * a coercion if needed to make the tlist expression match the declared + * type of the function. + * + * Note: we do not try this until we have verified that no rewriting was + * needed; that's probably not important, but let's be careful. + */ + querytree_list = list_make1(querytree); + if (check_sql_fn_retval(list_make1(querytree_list), + result_type, rettupdesc, + false, NULL)) + goto fail; /* reject whole-tuple-result cases */ + + /* + * Given the tests above, check_sql_fn_retval shouldn't have decided to + * inject a projection step, but let's just make sure. + */ + if (querytree != linitial(querytree_list)) + goto fail; + + /* Now we can grab the tlist expression */ + newexpr = (Node *) ((TargetEntry *) linitial(querytree->targetList))->expr; + + /* + * If the SQL function returns VOID, we can only inline it if it is a + * SELECT of an expression returning VOID (ie, it's just a redirection to + * another VOID-returning function). In all non-VOID-returning cases, + * check_sql_fn_retval should ensure that newexpr returns the function's + * declared result type, so this test shouldn't fail otherwise; but we may + * as well cope gracefully if it does. + */ + if (exprType(newexpr) != result_type) + goto fail; + + /* + * Additional validity checks on the expression. It mustn't be more + * volatile than the surrounding function (this is to avoid breaking hacks + * that involve pretending a function is immutable when it really ain't). + * If the surrounding function is declared strict, then the expression + * must contain only strict constructs and must use all of the function + * parameters (this is overkill, but an exact analysis is hard). + */ + if (funcform->provolatile == PROVOLATILE_IMMUTABLE && + contain_mutable_functions(newexpr)) + goto fail; + else if (funcform->provolatile == PROVOLATILE_STABLE && + contain_volatile_functions(newexpr)) + goto fail; + + if (funcform->proisstrict && + contain_nonstrict_functions(newexpr)) + goto fail; + + /* + * If any parameter expression contains a context-dependent node, we can't + * inline, for fear of putting such a node into the wrong context. + */ + if (contain_context_dependent_node((Node *) args)) + goto fail; + + /* + * We may be able to do it; there are still checks on parameter usage to + * make, but those are most easily done in combination with the actual + * substitution of the inputs. So start building expression with inputs + * substituted. + */ + usecounts = (int *) palloc0(funcform->pronargs * sizeof(int)); + newexpr = substitute_actual_parameters(newexpr, funcform->pronargs, + args, usecounts); + + /* Now check for parameter usage */ + i = 0; + foreach(arg, args) + { + Node *param = lfirst(arg); + + if (usecounts[i] == 0) + { + /* Param not used at all: uncool if func is strict */ + if (funcform->proisstrict) + goto fail; + } + else if (usecounts[i] != 1) + { + /* Param used multiple times: uncool if expensive or volatile */ + QualCost eval_cost; + + /* + * We define "expensive" as "contains any subplan or more than 10 + * operators". Note that the subplan search has to be done + * explicitly, since cost_qual_eval() will barf on unplanned + * subselects. + */ + if (contain_subplans(param)) + goto fail; + cost_qual_eval(&eval_cost, list_make1(param), NULL); + if (eval_cost.startup + eval_cost.per_tuple > + 10 * cpu_operator_cost) + goto fail; + + /* + * Check volatility last since this is more expensive than the + * above tests + */ + if (contain_volatile_functions(param)) + goto fail; + } + i++; + } + + /* + * Whew --- we can make the substitution. Copy the modified expression + * out of the temporary memory context, and clean up. + */ + MemoryContextSwitchTo(oldcxt); + + newexpr = copyObject(newexpr); + + MemoryContextDelete(mycxt); + + /* + * If the result is of a collatable type, force the result to expose the + * correct collation. In most cases this does not matter, but it's + * possible that the function result is used directly as a sort key or in + * other places where we expect exprCollation() to tell the truth. + */ + if (OidIsValid(result_collid)) + { + Oid exprcoll = exprCollation(newexpr); + + if (OidIsValid(exprcoll) && exprcoll != result_collid) + { + CollateExpr *newnode = makeNode(CollateExpr); + + newnode->arg = (Expr *) newexpr; + newnode->collOid = result_collid; + newnode->location = -1; + + newexpr = (Node *) newnode; + } + } + + /* + * Since there is now no trace of the function in the plan tree, we must + * explicitly record the plan's dependency on the function. + */ + if (context->root) + record_plan_function_dependency(context->root, funcid); + + /* + * Recursively try to simplify the modified expression. Here we must add + * the current function to the context list of active functions. + */ + context->active_fns = lappend_oid(context->active_fns, funcid); + newexpr = eval_const_expressions_mutator(newexpr, context); + context->active_fns = list_delete_last(context->active_fns); + + error_context_stack = sqlerrcontext.previous; + + return (Expr *) newexpr; + + /* Here if func is not inlinable: release temp memory and return NULL */ +fail: + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(mycxt); + error_context_stack = sqlerrcontext.previous; + + return NULL; +} + +/* + * Replace Param nodes by appropriate actual parameters + */ +static Node * +substitute_actual_parameters(Node *expr, int nargs, List *args, + int *usecounts) +{ + substitute_actual_parameters_context context; + + context.nargs = nargs; + context.args = args; + context.usecounts = usecounts; + + return substitute_actual_parameters_mutator(expr, &context); +} + +static Node * +substitute_actual_parameters_mutator(Node *node, + substitute_actual_parameters_context *context) +{ + if (node == NULL) + return NULL; + if (IsA(node, Param)) + { + Param *param = (Param *) node; + + if (param->paramkind != PARAM_EXTERN) + elog(ERROR, "unexpected paramkind: %d", (int) param->paramkind); + if (param->paramid <= 0 || param->paramid > context->nargs) + elog(ERROR, "invalid paramid: %d", param->paramid); + + /* Count usage of parameter */ + context->usecounts[param->paramid - 1]++; + + /* Select the appropriate actual arg and replace the Param with it */ + /* We don't need to copy at this time (it'll get done later) */ + return list_nth(context->args, param->paramid - 1); + } + return expression_tree_mutator(node, substitute_actual_parameters_mutator, + (void *) context); +} + +/* + * error context callback to let us supply a call-stack traceback + */ +static void +sql_inline_error_callback(void *arg) +{ + inline_error_callback_arg *callback_arg = (inline_error_callback_arg *) arg; + int syntaxerrposition; + + /* If it's a syntax error, convert to internal syntax error report */ + syntaxerrposition = geterrposition(); + if (syntaxerrposition > 0) + { + errposition(0); + internalerrposition(syntaxerrposition); + internalerrquery(callback_arg->prosrc); + } + + errcontext("SQL function \"%s\" during inlining", callback_arg->proname); +} + +/* + * evaluate_expr: pre-evaluate a constant expression + * + * We use the executor's routine ExecEvalExpr() to avoid duplication of + * code and ensure we get the same result as the executor would get. + */ +Expr * +evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, + Oid result_collation) +{ + EState *estate; + ExprState *exprstate; + MemoryContext oldcontext; + Datum const_val; + bool const_is_null; + int16 resultTypLen; + bool resultTypByVal; + + /* + * To use the executor, we need an EState. + */ + estate = CreateExecutorState(); + + /* We can use the estate's working context to avoid memory leaks. */ + oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); + + /* Make sure any opfuncids are filled in. */ + fix_opfuncids((Node *) expr); + + /* + * Prepare expr for execution. (Note: we can't use ExecPrepareExpr + * because it'd result in recursively invoking eval_const_expressions.) + */ + exprstate = ExecInitExpr(expr, NULL); + + /* + * And evaluate it. + * + * It is OK to use a default econtext because none of the ExecEvalExpr() + * code used in this situation will use econtext. That might seem + * fortuitous, but it's not so unreasonable --- a constant expression does + * not depend on context, by definition, n'est ce pas? + */ + const_val = ExecEvalExprSwitchContext(exprstate, + GetPerTupleExprContext(estate), + &const_is_null); + + /* Get info needed about result datatype */ + get_typlenbyval(result_type, &resultTypLen, &resultTypByVal); + + /* Get back to outer memory context */ + MemoryContextSwitchTo(oldcontext); + + /* + * Must copy result out of sub-context used by expression eval. + * + * Also, if it's varlena, forcibly detoast it. This protects us against + * storing TOAST pointers into plans that might outlive the referenced + * data. (makeConst would handle detoasting anyway, but it's worth a few + * extra lines here so that we can do the copy and detoast in one step.) + */ + if (!const_is_null) + { + if (resultTypLen == -1) + const_val = PointerGetDatum(PG_DETOAST_DATUM_COPY(const_val)); + else + const_val = datumCopy(const_val, resultTypByVal, resultTypLen); + } + + /* Release all the junk we just created */ + FreeExecutorState(estate); + + /* + * Make the constant result node. + */ + return (Expr *) makeConst(result_type, result_typmod, result_collation, + resultTypLen, + const_val, const_is_null, + resultTypByVal); +} + + +/* + * inline_set_returning_function + * Attempt to "inline" a set-returning function in the FROM clause. + * + * "rte" is an RTE_FUNCTION rangetable entry. If it represents a call of a + * set-returning SQL function that can safely be inlined, expand the function + * and return the substitute Query structure. Otherwise, return NULL. + * + * We assume that the RTE's expression has already been put through + * eval_const_expressions(), which among other things will take care of + * default arguments and named-argument notation. + * + * This has a good deal of similarity to inline_function(), but that's + * for the non-set-returning case, and there are enough differences to + * justify separate functions. + */ +Query * +inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte) +{ + RangeTblFunction *rtfunc; + FuncExpr *fexpr; + Oid func_oid; + HeapTuple func_tuple; + Form_pg_proc funcform; + char *src; + Datum tmp; + bool isNull; + MemoryContext oldcxt; + MemoryContext mycxt; + inline_error_callback_arg callback_arg; + ErrorContextCallback sqlerrcontext; + SQLFunctionParseInfoPtr pinfo; + TypeFuncClass functypclass; + TupleDesc rettupdesc; + List *raw_parsetree_list; + List *querytree_list; + Query *querytree; + + Assert(rte->rtekind == RTE_FUNCTION); + + /* + * It doesn't make a lot of sense for a SQL SRF to refer to itself in its + * own FROM clause, since that must cause infinite recursion at runtime. + * It will cause this code to recurse too, so check for stack overflow. + * (There's no need to do more.) + */ + check_stack_depth(); + + /* Fail if the RTE has ORDINALITY - we don't implement that here. */ + if (rte->funcordinality) + return NULL; + + /* Fail if RTE isn't a single, simple FuncExpr */ + if (list_length(rte->functions) != 1) + return NULL; + rtfunc = (RangeTblFunction *) linitial(rte->functions); + + if (!IsA(rtfunc->funcexpr, FuncExpr)) + return NULL; + fexpr = (FuncExpr *) rtfunc->funcexpr; + + func_oid = fexpr->funcid; + + /* + * The function must be declared to return a set, else inlining would + * change the results if the contained SELECT didn't return exactly one + * row. + */ + if (!fexpr->funcretset) + return NULL; + + /* + * Refuse to inline if the arguments contain any volatile functions or + * sub-selects. Volatile functions are rejected because inlining may + * result in the arguments being evaluated multiple times, risking a + * change in behavior. Sub-selects are rejected partly for implementation + * reasons (pushing them down another level might change their behavior) + * and partly because they're likely to be expensive and so multiple + * evaluation would be bad. + */ + if (contain_volatile_functions((Node *) fexpr->args) || + contain_subplans((Node *) fexpr->args)) + return NULL; + + /* Check permission to call function (fail later, if not) */ + if (pg_proc_aclcheck(func_oid, GetUserId(), ACL_EXECUTE) != ACLCHECK_OK) + return NULL; + + /* Check whether a plugin wants to hook function entry/exit */ + if (FmgrHookIsNeeded(func_oid)) + return NULL; + + /* + * OK, let's take a look at the function's pg_proc entry. + */ + func_tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(func_oid)); + if (!HeapTupleIsValid(func_tuple)) + elog(ERROR, "cache lookup failed for function %u", func_oid); + funcform = (Form_pg_proc) GETSTRUCT(func_tuple); + + /* + * Forget it if the function is not SQL-language or has other showstopper + * properties. In particular it mustn't be declared STRICT, since we + * couldn't enforce that. It also mustn't be VOLATILE, because that is + * supposed to cause it to be executed with its own snapshot, rather than + * sharing the snapshot of the calling query. We also disallow returning + * SETOF VOID, because inlining would result in exposing the actual result + * of the function's last SELECT, which should not happen in that case. + * (Rechecking prokind, proretset, and pronargs is just paranoia.) + */ + if (funcform->prolang != SQLlanguageId || + funcform->prokind != PROKIND_FUNCTION || + funcform->proisstrict || + funcform->provolatile == PROVOLATILE_VOLATILE || + funcform->prorettype == VOIDOID || + funcform->prosecdef || + !funcform->proretset || + list_length(fexpr->args) != funcform->pronargs || + !heap_attisnull(func_tuple, Anum_pg_proc_proconfig, NULL)) + { + ReleaseSysCache(func_tuple); + return NULL; + } + + /* + * Make a temporary memory context, so that we don't leak all the stuff + * that parsing might create. + */ + mycxt = AllocSetContextCreate(CurrentMemoryContext, + "inline_set_returning_function", + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(mycxt); + + /* Fetch the function body */ + tmp = SysCacheGetAttr(PROCOID, + func_tuple, + Anum_pg_proc_prosrc, + &isNull); + if (isNull) + elog(ERROR, "null prosrc for function %u", func_oid); + src = TextDatumGetCString(tmp); + + /* + * Setup error traceback support for ereport(). This is so that we can + * finger the function that bad information came from. + */ + callback_arg.proname = NameStr(funcform->proname); + callback_arg.prosrc = src; + + sqlerrcontext.callback = sql_inline_error_callback; + sqlerrcontext.arg = (void *) &callback_arg; + sqlerrcontext.previous = error_context_stack; + error_context_stack = &sqlerrcontext; + + /* If we have prosqlbody, pay attention to that not prosrc */ + tmp = SysCacheGetAttr(PROCOID, + func_tuple, + Anum_pg_proc_prosqlbody, + &isNull); + if (!isNull) + { + Node *n; + + n = stringToNode(TextDatumGetCString(tmp)); + if (IsA(n, List)) + querytree_list = linitial_node(List, castNode(List, n)); + else + querytree_list = list_make1(n); + if (list_length(querytree_list) != 1) + goto fail; + querytree = linitial(querytree_list); + + /* Acquire necessary locks, then apply rewriter. */ + AcquireRewriteLocks(querytree, true, false); + querytree_list = pg_rewrite_query(querytree); + if (list_length(querytree_list) != 1) + goto fail; + querytree = linitial(querytree_list); + } + else + { + /* + * Set up to handle parameters while parsing the function body. We + * can use the FuncExpr just created as the input for + * prepare_sql_fn_parse_info. + */ + pinfo = prepare_sql_fn_parse_info(func_tuple, + (Node *) fexpr, + fexpr->inputcollid); + + /* + * Parse, analyze, and rewrite (unlike inline_function(), we can't + * skip rewriting here). We can fail as soon as we find more than one + * query, though. + */ + raw_parsetree_list = pg_parse_query(src); + if (list_length(raw_parsetree_list) != 1) + goto fail; + + querytree_list = pg_analyze_and_rewrite_withcb(linitial(raw_parsetree_list), + src, + (ParserSetupHook) sql_fn_parser_setup, + pinfo, NULL); + if (list_length(querytree_list) != 1) + goto fail; + querytree = linitial(querytree_list); + } + + /* + * Also resolve the actual function result tupdesc, if composite. If the + * function is just declared to return RECORD, dig the info out of the AS + * clause. + */ + functypclass = get_expr_result_type((Node *) fexpr, NULL, &rettupdesc); + if (functypclass == TYPEFUNC_RECORD) + rettupdesc = BuildDescFromLists(rtfunc->funccolnames, + rtfunc->funccoltypes, + rtfunc->funccoltypmods, + rtfunc->funccolcollations); + + /* + * The single command must be a plain SELECT. + */ + if (!IsA(querytree, Query) || + querytree->commandType != CMD_SELECT) + goto fail; + + /* + * Make sure the function (still) returns what it's declared to. This + * will raise an error if wrong, but that's okay since the function would + * fail at runtime anyway. Note that check_sql_fn_retval will also insert + * coercions if needed to make the tlist expression(s) match the declared + * type of the function. We also ask it to insert dummy NULL columns for + * any dropped columns in rettupdesc, so that the elements of the modified + * tlist match up to the attribute numbers. + * + * If the function returns a composite type, don't inline unless the check + * shows it's returning a whole tuple result; otherwise what it's + * returning is a single composite column which is not what we need. + */ + if (!check_sql_fn_retval(list_make1(querytree_list), + fexpr->funcresulttype, rettupdesc, + true, NULL) && + (functypclass == TYPEFUNC_COMPOSITE || + functypclass == TYPEFUNC_COMPOSITE_DOMAIN || + functypclass == TYPEFUNC_RECORD)) + goto fail; /* reject not-whole-tuple-result cases */ + + /* + * check_sql_fn_retval might've inserted a projection step, but that's + * fine; just make sure we use the upper Query. + */ + querytree = linitial_node(Query, querytree_list); + + /* + * Looks good --- substitute parameters into the query. + */ + querytree = substitute_actual_srf_parameters(querytree, + funcform->pronargs, + fexpr->args); + + /* + * Copy the modified query out of the temporary memory context, and clean + * up. + */ + MemoryContextSwitchTo(oldcxt); + + querytree = copyObject(querytree); + + MemoryContextDelete(mycxt); + error_context_stack = sqlerrcontext.previous; + ReleaseSysCache(func_tuple); + + /* + * We don't have to fix collations here because the upper query is already + * parsed, ie, the collations in the RTE are what count. + */ + + /* + * Since there is now no trace of the function in the plan tree, we must + * explicitly record the plan's dependency on the function. + */ + record_plan_function_dependency(root, func_oid); + + /* + * We must also notice if the inserted query adds a dependency on the + * calling role due to RLS quals. + */ + if (querytree->hasRowSecurity) + root->glob->dependsOnRole = true; + + return querytree; + + /* Here if func is not inlinable: release temp memory and return NULL */ +fail: + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(mycxt); + error_context_stack = sqlerrcontext.previous; + ReleaseSysCache(func_tuple); + + return NULL; +} + +/* + * Replace Param nodes by appropriate actual parameters + * + * This is just enough different from substitute_actual_parameters() + * that it needs its own code. + */ +static Query * +substitute_actual_srf_parameters(Query *expr, int nargs, List *args) +{ + substitute_actual_srf_parameters_context context; + + context.nargs = nargs; + context.args = args; + context.sublevels_up = 1; + + return query_tree_mutator(expr, + substitute_actual_srf_parameters_mutator, + &context, + 0); +} + +static Node * +substitute_actual_srf_parameters_mutator(Node *node, + substitute_actual_srf_parameters_context *context) +{ + Node *result; + + if (node == NULL) + return NULL; + if (IsA(node, Query)) + { + context->sublevels_up++; + result = (Node *) query_tree_mutator((Query *) node, + substitute_actual_srf_parameters_mutator, + (void *) context, + 0); + context->sublevels_up--; + return result; + } + if (IsA(node, Param)) + { + Param *param = (Param *) node; + + if (param->paramkind == PARAM_EXTERN) + { + if (param->paramid <= 0 || param->paramid > context->nargs) + elog(ERROR, "invalid paramid: %d", param->paramid); + + /* + * Since the parameter is being inserted into a subquery, we must + * adjust levels. + */ + result = copyObject(list_nth(context->args, param->paramid - 1)); + IncrementVarSublevelsUp(result, context->sublevels_up, 0); + return result; + } + } + return expression_tree_mutator(node, + substitute_actual_srf_parameters_mutator, + (void *) context); +} + +/* + * pull_paramids + * Returns a Bitmapset containing the paramids of all Params in 'expr'. + */ +Bitmapset * +pull_paramids(Expr *expr) +{ + Bitmapset *result = NULL; + + (void) pull_paramids_walker((Node *) expr, &result); + + return result; +} + +static bool +pull_paramids_walker(Node *node, Bitmapset **context) +{ + if (node == NULL) + return false; + if (IsA(node, Param)) + { + Param *param = (Param *) node; + + *context = bms_add_member(*context, param->paramid); + return false; + } + return expression_tree_walker(node, pull_paramids_walker, + (void *) context); +} diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c new file mode 100644 index 0000000..3c11f5d --- /dev/null +++ b/src/backend/optimizer/util/inherit.c @@ -0,0 +1,949 @@ +/*------------------------------------------------------------------------- + * + * inherit.c + * Routines to process child relations in inheritance trees + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/inherit.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/sysattr.h" +#include "access/table.h" +#include "catalog/partition.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_type.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "optimizer/appendinfo.h" +#include "optimizer/inherit.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/plancat.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/prep.h" +#include "optimizer/restrictinfo.h" +#include "parser/parsetree.h" +#include "partitioning/partdesc.h" +#include "partitioning/partprune.h" +#include "utils/rel.h" + + +static void expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo, + RangeTblEntry *parentrte, + Index parentRTindex, Relation parentrel, + PlanRowMark *top_parentrc, LOCKMODE lockmode); +static void expand_single_inheritance_child(PlannerInfo *root, + RangeTblEntry *parentrte, + Index parentRTindex, Relation parentrel, + PlanRowMark *top_parentrc, Relation childrel, + RangeTblEntry **childrte_p, + Index *childRTindex_p); +static Bitmapset *translate_col_privs(const Bitmapset *parent_privs, + List *translated_vars); +static Bitmapset *translate_col_privs_multilevel(PlannerInfo *root, + RelOptInfo *rel, + RelOptInfo *parent_rel, + Bitmapset *parent_cols); +static void expand_appendrel_subquery(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte, Index rti); + + +/* + * expand_inherited_rtentry + * Expand a rangetable entry that has the "inh" bit set. + * + * "inh" is only allowed in two cases: RELATION and SUBQUERY RTEs. + * + * "inh" on a plain RELATION RTE means that it is a partitioned table or the + * parent of a traditional-inheritance set. In this case we must add entries + * for all the interesting child tables to the query's rangetable, and build + * additional planner data structures for them, including RelOptInfos, + * AppendRelInfos, and possibly PlanRowMarks. + * + * Note that the original RTE is considered to represent the whole inheritance + * set. In the case of traditional inheritance, the first of the generated + * RTEs is an RTE for the same table, but with inh = false, to represent the + * parent table in its role as a simple member of the inheritance set. For + * partitioning, we don't need a second RTE because the partitioned table + * itself has no data and need not be scanned. + * + * "inh" on a SUBQUERY RTE means that it's the parent of a UNION ALL group, + * which is treated as an appendrel similarly to inheritance cases; however, + * we already made RTEs and AppendRelInfos for the subqueries. We only need + * to build RelOptInfos for them, which is done by expand_appendrel_subquery. + */ +void +expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte, Index rti) +{ + Oid parentOID; + Relation oldrelation; + LOCKMODE lockmode; + PlanRowMark *oldrc; + bool old_isParent = false; + int old_allMarkTypes = 0; + + Assert(rte->inh); /* else caller error */ + + if (rte->rtekind == RTE_SUBQUERY) + { + expand_appendrel_subquery(root, rel, rte, rti); + return; + } + + Assert(rte->rtekind == RTE_RELATION); + + parentOID = rte->relid; + + /* + * We used to check has_subclass() here, but there's no longer any need + * to, because subquery_planner already did. + */ + + /* + * The rewriter should already have obtained an appropriate lock on each + * relation named in the query, so we can open the parent relation without + * locking it. However, for each child relation we add to the query, we + * must obtain an appropriate lock, because this will be the first use of + * those relations in the parse/rewrite/plan pipeline. Child rels should + * use the same lockmode as their parent. + */ + oldrelation = table_open(parentOID, NoLock); + lockmode = rte->rellockmode; + + /* + * If parent relation is selected FOR UPDATE/SHARE, we need to mark its + * PlanRowMark as isParent = true, and generate a new PlanRowMark for each + * child. + */ + oldrc = get_plan_rowmark(root->rowMarks, rti); + if (oldrc) + { + old_isParent = oldrc->isParent; + oldrc->isParent = true; + /* Save initial value of allMarkTypes before children add to it */ + old_allMarkTypes = oldrc->allMarkTypes; + } + + /* Scan the inheritance set and expand it */ + if (oldrelation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + /* + * Partitioned table, so set up for partitioning. + */ + Assert(rte->relkind == RELKIND_PARTITIONED_TABLE); + + /* + * Recursively expand and lock the partitions. While at it, also + * extract the partition key columns of all the partitioned tables. + */ + expand_partitioned_rtentry(root, rel, rte, rti, + oldrelation, oldrc, lockmode); + } + else + { + /* + * Ordinary table, so process traditional-inheritance children. (Note + * that partitioned tables are not allowed to have inheritance + * children, so it's not possible for both cases to apply.) + */ + List *inhOIDs; + ListCell *l; + + /* Scan for all members of inheritance set, acquire needed locks */ + inhOIDs = find_all_inheritors(parentOID, lockmode, NULL); + + /* + * We used to special-case the situation where the table no longer has + * any children, by clearing rte->inh and exiting. That no longer + * works, because this function doesn't get run until after decisions + * have been made that depend on rte->inh. We have to treat such + * situations as normal inheritance. The table itself should always + * have been found, though. + */ + Assert(inhOIDs != NIL); + Assert(linitial_oid(inhOIDs) == parentOID); + + /* Expand simple_rel_array and friends to hold child objects. */ + expand_planner_arrays(root, list_length(inhOIDs)); + + /* + * Expand inheritance children in the order the OIDs were returned by + * find_all_inheritors. + */ + foreach(l, inhOIDs) + { + Oid childOID = lfirst_oid(l); + Relation newrelation; + RangeTblEntry *childrte; + Index childRTindex; + + /* Open rel if needed; we already have required locks */ + if (childOID != parentOID) + newrelation = table_open(childOID, NoLock); + else + newrelation = oldrelation; + + /* + * It is possible that the parent table has children that are temp + * tables of other backends. We cannot safely access such tables + * (because of buffering issues), and the best thing to do seems + * to be to silently ignore them. + */ + if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation)) + { + table_close(newrelation, lockmode); + continue; + } + + /* Create RTE and AppendRelInfo, plus PlanRowMark if needed. */ + expand_single_inheritance_child(root, rte, rti, oldrelation, + oldrc, newrelation, + &childrte, &childRTindex); + + /* Create the otherrel RelOptInfo too. */ + (void) build_simple_rel(root, childRTindex, rel); + + /* Close child relations, but keep locks */ + if (childOID != parentOID) + table_close(newrelation, NoLock); + } + } + + /* + * Some children might require different mark types, which would've been + * reported into oldrc. If so, add relevant entries to the top-level + * targetlist and update parent rel's reltarget. This should match what + * preprocess_targetlist() would have added if the mark types had been + * requested originally. + * + * (Someday it might be useful to fold these resjunk columns into the + * row-identity-column management used for UPDATE/DELETE. Today is not + * that day, however.) + */ + if (oldrc) + { + int new_allMarkTypes = oldrc->allMarkTypes; + Var *var; + TargetEntry *tle; + char resname[32]; + List *newvars = NIL; + + /* Add TID junk Var if needed, unless we had it already */ + if (new_allMarkTypes & ~(1 << ROW_MARK_COPY) && + !(old_allMarkTypes & ~(1 << ROW_MARK_COPY))) + { + /* Need to fetch TID */ + var = makeVar(oldrc->rti, + SelfItemPointerAttributeNumber, + TIDOID, + -1, + InvalidOid, + 0); + snprintf(resname, sizeof(resname), "ctid%u", oldrc->rowmarkId); + tle = makeTargetEntry((Expr *) var, + list_length(root->processed_tlist) + 1, + pstrdup(resname), + true); + root->processed_tlist = lappend(root->processed_tlist, tle); + newvars = lappend(newvars, var); + } + + /* Add whole-row junk Var if needed, unless we had it already */ + if ((new_allMarkTypes & (1 << ROW_MARK_COPY)) && + !(old_allMarkTypes & (1 << ROW_MARK_COPY))) + { + var = makeWholeRowVar(planner_rt_fetch(oldrc->rti, root), + oldrc->rti, + 0, + false); + snprintf(resname, sizeof(resname), "wholerow%u", oldrc->rowmarkId); + tle = makeTargetEntry((Expr *) var, + list_length(root->processed_tlist) + 1, + pstrdup(resname), + true); + root->processed_tlist = lappend(root->processed_tlist, tle); + newvars = lappend(newvars, var); + } + + /* Add tableoid junk Var, unless we had it already */ + if (!old_isParent) + { + var = makeVar(oldrc->rti, + TableOidAttributeNumber, + OIDOID, + -1, + InvalidOid, + 0); + snprintf(resname, sizeof(resname), "tableoid%u", oldrc->rowmarkId); + tle = makeTargetEntry((Expr *) var, + list_length(root->processed_tlist) + 1, + pstrdup(resname), + true); + root->processed_tlist = lappend(root->processed_tlist, tle); + newvars = lappend(newvars, var); + } + + /* + * Add the newly added Vars to parent's reltarget. We needn't worry + * about the children's reltargets, they'll be made later. + */ + add_vars_to_targetlist(root, newvars, bms_make_singleton(0), false); + } + + table_close(oldrelation, NoLock); +} + +/* + * expand_partitioned_rtentry + * Recursively expand an RTE for a partitioned table. + */ +static void +expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo, + RangeTblEntry *parentrte, + Index parentRTindex, Relation parentrel, + PlanRowMark *top_parentrc, LOCKMODE lockmode) +{ + PartitionDesc partdesc; + Bitmapset *live_parts; + int num_live_parts; + int i; + + check_stack_depth(); + + Assert(parentrte->inh); + + partdesc = PartitionDirectoryLookup(root->glob->partition_directory, + parentrel); + + /* A partitioned table should always have a partition descriptor. */ + Assert(partdesc); + + /* + * Note down whether any partition key cols are being updated. Though it's + * the root partitioned table's updatedCols we are interested in, we + * instead use parentrte to get the updatedCols. This is convenient + * because parentrte already has the root partrel's updatedCols translated + * to match the attribute ordering of parentrel. + */ + if (!root->partColsUpdated) + root->partColsUpdated = + has_partition_attrs(parentrel, parentrte->updatedCols, NULL); + + /* Nothing further to do here if there are no partitions. */ + if (partdesc->nparts == 0) + return; + + /* + * Perform partition pruning using restriction clauses assigned to parent + * relation. live_parts will contain PartitionDesc indexes of partitions + * that survive pruning. Below, we will initialize child objects for the + * surviving partitions. + */ + relinfo->live_parts = live_parts = prune_append_rel_partitions(relinfo); + + /* Expand simple_rel_array and friends to hold child objects. */ + num_live_parts = bms_num_members(live_parts); + if (num_live_parts > 0) + expand_planner_arrays(root, num_live_parts); + + /* + * We also store partition RelOptInfo pointers in the parent relation. + * Since we're palloc0'ing, slots corresponding to pruned partitions will + * contain NULL. + */ + Assert(relinfo->part_rels == NULL); + relinfo->part_rels = (RelOptInfo **) + palloc0(relinfo->nparts * sizeof(RelOptInfo *)); + + /* + * Create a child RTE for each live partition. Note that unlike + * traditional inheritance, we don't need a child RTE for the partitioned + * table itself, because it's not going to be scanned. + */ + i = -1; + while ((i = bms_next_member(live_parts, i)) >= 0) + { + Oid childOID = partdesc->oids[i]; + Relation childrel; + RangeTblEntry *childrte; + Index childRTindex; + RelOptInfo *childrelinfo; + + /* Open rel, acquiring required locks */ + childrel = table_open(childOID, lockmode); + + /* + * Temporary partitions belonging to other sessions should have been + * disallowed at definition, but for paranoia's sake, let's double + * check. + */ + if (RELATION_IS_OTHER_TEMP(childrel)) + elog(ERROR, "temporary relation from another session found as partition"); + + /* Create RTE and AppendRelInfo, plus PlanRowMark if needed. */ + expand_single_inheritance_child(root, parentrte, parentRTindex, + parentrel, top_parentrc, childrel, + &childrte, &childRTindex); + + /* Create the otherrel RelOptInfo too. */ + childrelinfo = build_simple_rel(root, childRTindex, relinfo); + relinfo->part_rels[i] = childrelinfo; + relinfo->all_partrels = bms_add_members(relinfo->all_partrels, + childrelinfo->relids); + + /* If this child is itself partitioned, recurse */ + if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + expand_partitioned_rtentry(root, childrelinfo, + childrte, childRTindex, + childrel, top_parentrc, lockmode); + + /* Close child relation, but keep locks */ + table_close(childrel, NoLock); + } +} + +/* + * expand_single_inheritance_child + * Build a RangeTblEntry and an AppendRelInfo, plus maybe a PlanRowMark. + * + * We now expand the partition hierarchy level by level, creating a + * corresponding hierarchy of AppendRelInfos and RelOptInfos, where each + * partitioned descendant acts as a parent of its immediate partitions. + * (This is a difference from what older versions of PostgreSQL did and what + * is still done in the case of table inheritance for unpartitioned tables, + * where the hierarchy is flattened during RTE expansion.) + * + * PlanRowMarks still carry the top-parent's RTI, and the top-parent's + * allMarkTypes field still accumulates values from all descendents. + * + * "parentrte" and "parentRTindex" are immediate parent's RTE and + * RTI. "top_parentrc" is top parent's PlanRowMark. + * + * The child RangeTblEntry and its RTI are returned in "childrte_p" and + * "childRTindex_p" resp. + */ +static void +expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, + Index parentRTindex, Relation parentrel, + PlanRowMark *top_parentrc, Relation childrel, + RangeTblEntry **childrte_p, + Index *childRTindex_p) +{ + Query *parse = root->parse; + Oid parentOID = RelationGetRelid(parentrel); + Oid childOID = RelationGetRelid(childrel); + RangeTblEntry *childrte; + Index childRTindex; + AppendRelInfo *appinfo; + TupleDesc child_tupdesc; + List *parent_colnames; + List *child_colnames; + + /* + * Build an RTE for the child, and attach to query's rangetable list. We + * copy most scalar fields of the parent's RTE, but replace relation OID, + * relkind, and inh for the child. Also, set requiredPerms to zero since + * all required permissions checks are done on the original RTE. Likewise, + * set the child's securityQuals to empty, because we only want to apply + * the parent's RLS conditions regardless of what RLS properties + * individual children may have. (This is an intentional choice to make + * inherited RLS work like regular permissions checks.) The parent + * securityQuals will be propagated to children along with other base + * restriction clauses, so we don't need to do it here. Other + * infrastructure of the parent RTE has to be translated to match the + * child table's column ordering, which we do below, so a "flat" copy is + * sufficient to start with. + */ + childrte = makeNode(RangeTblEntry); + memcpy(childrte, parentrte, sizeof(RangeTblEntry)); + Assert(parentrte->rtekind == RTE_RELATION); /* else this is dubious */ + childrte->relid = childOID; + childrte->relkind = childrel->rd_rel->relkind; + /* A partitioned child will need to be expanded further. */ + if (childrte->relkind == RELKIND_PARTITIONED_TABLE) + { + Assert(childOID != parentOID); + childrte->inh = true; + } + else + childrte->inh = false; + childrte->requiredPerms = 0; + childrte->securityQuals = NIL; + + /* Link not-yet-fully-filled child RTE into data structures */ + parse->rtable = lappend(parse->rtable, childrte); + childRTindex = list_length(parse->rtable); + *childrte_p = childrte; + *childRTindex_p = childRTindex; + + /* + * Build an AppendRelInfo struct for each parent/child pair. + */ + appinfo = make_append_rel_info(parentrel, childrel, + parentRTindex, childRTindex); + root->append_rel_list = lappend(root->append_rel_list, appinfo); + + /* tablesample is probably null, but copy it */ + childrte->tablesample = copyObject(parentrte->tablesample); + + /* + * Construct an alias clause for the child, which we can also use as eref. + * This is important so that EXPLAIN will print the right column aliases + * for child-table columns. (Since ruleutils.c doesn't have any easy way + * to reassociate parent and child columns, we must get the child column + * aliases right to start with. Note that setting childrte->alias forces + * ruleutils.c to use these column names, which it otherwise would not.) + */ + child_tupdesc = RelationGetDescr(childrel); + parent_colnames = parentrte->eref->colnames; + child_colnames = NIL; + for (int cattno = 0; cattno < child_tupdesc->natts; cattno++) + { + Form_pg_attribute att = TupleDescAttr(child_tupdesc, cattno); + const char *attname; + + if (att->attisdropped) + { + /* Always insert an empty string for a dropped column */ + attname = ""; + } + else if (appinfo->parent_colnos[cattno] > 0 && + appinfo->parent_colnos[cattno] <= list_length(parent_colnames)) + { + /* Duplicate the query-assigned name for the parent column */ + attname = strVal(list_nth(parent_colnames, + appinfo->parent_colnos[cattno] - 1)); + } + else + { + /* New column, just use its real name */ + attname = NameStr(att->attname); + } + child_colnames = lappend(child_colnames, makeString(pstrdup(attname))); + } + + /* + * We just duplicate the parent's table alias name for each child. If the + * plan gets printed, ruleutils.c has to sort out unique table aliases to + * use, which it can handle. + */ + childrte->alias = childrte->eref = makeAlias(parentrte->eref->aliasname, + child_colnames); + + /* + * Translate the column permissions bitmaps to the child's attnums (we + * have to build the translated_vars list before we can do this). But if + * this is the parent table, we can just duplicate the parent's bitmaps. + * + * Note: we need to do this even though the executor won't run any + * permissions checks on the child RTE. The insertedCols/updatedCols + * bitmaps may be examined for trigger-firing purposes. + */ + if (childOID != parentOID) + { + childrte->selectedCols = translate_col_privs(parentrte->selectedCols, + appinfo->translated_vars); + childrte->insertedCols = translate_col_privs(parentrte->insertedCols, + appinfo->translated_vars); + childrte->updatedCols = translate_col_privs(parentrte->updatedCols, + appinfo->translated_vars); + } + else + { + childrte->selectedCols = bms_copy(parentrte->selectedCols); + childrte->insertedCols = bms_copy(parentrte->insertedCols); + childrte->updatedCols = bms_copy(parentrte->updatedCols); + } + + /* + * Store the RTE and appinfo in the respective PlannerInfo arrays, which + * the caller must already have allocated space for. + */ + Assert(childRTindex < root->simple_rel_array_size); + Assert(root->simple_rte_array[childRTindex] == NULL); + root->simple_rte_array[childRTindex] = childrte; + Assert(root->append_rel_array[childRTindex] == NULL); + root->append_rel_array[childRTindex] = appinfo; + + /* + * Build a PlanRowMark if parent is marked FOR UPDATE/SHARE. + */ + if (top_parentrc) + { + PlanRowMark *childrc = makeNode(PlanRowMark); + + childrc->rti = childRTindex; + childrc->prti = top_parentrc->rti; + childrc->rowmarkId = top_parentrc->rowmarkId; + /* Reselect rowmark type, because relkind might not match parent */ + childrc->markType = select_rowmark_type(childrte, + top_parentrc->strength); + childrc->allMarkTypes = (1 << childrc->markType); + childrc->strength = top_parentrc->strength; + childrc->waitPolicy = top_parentrc->waitPolicy; + + /* + * We mark RowMarks for partitioned child tables as parent RowMarks so + * that the executor ignores them (except their existence means that + * the child tables will be locked using the appropriate mode). + */ + childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE); + + /* Include child's rowmark type in top parent's allMarkTypes */ + top_parentrc->allMarkTypes |= childrc->allMarkTypes; + + root->rowMarks = lappend(root->rowMarks, childrc); + } + + /* + * If we are creating a child of the query target relation (only possible + * in UPDATE/DELETE/MERGE), add it to all_result_relids, as well as + * leaf_result_relids if appropriate, and make sure that we generate + * required row-identity data. + */ + if (bms_is_member(parentRTindex, root->all_result_relids)) + { + /* OK, record the child as a result rel too. */ + root->all_result_relids = bms_add_member(root->all_result_relids, + childRTindex); + + /* Non-leaf partitions don't need any row identity info. */ + if (childrte->relkind != RELKIND_PARTITIONED_TABLE) + { + Var *rrvar; + + root->leaf_result_relids = bms_add_member(root->leaf_result_relids, + childRTindex); + + /* + * If we have any child target relations, assume they all need to + * generate a junk "tableoid" column. (If only one child survives + * pruning, we wouldn't really need this, but it's not worth + * thrashing about to avoid it.) + */ + rrvar = makeVar(childRTindex, + TableOidAttributeNumber, + OIDOID, + -1, + InvalidOid, + 0); + add_row_identity_var(root, rrvar, childRTindex, "tableoid"); + + /* Register any row-identity columns needed by this child. */ + add_row_identity_columns(root, childRTindex, + childrte, childrel); + } + } +} + +/* + * get_rel_all_updated_cols + * Returns the set of columns of a given "simple" relation that are + * updated by this query. + */ +Bitmapset * +get_rel_all_updated_cols(PlannerInfo *root, RelOptInfo *rel) +{ + Index relid; + RangeTblEntry *rte; + Bitmapset *updatedCols, + *extraUpdatedCols; + + Assert(root->parse->commandType == CMD_UPDATE); + Assert(IS_SIMPLE_REL(rel)); + + /* + * We obtain updatedCols for the query's result relation. Then, if + * necessary, we map it to the column numbers of the relation for which + * they were requested. + */ + relid = root->parse->resultRelation; + rte = planner_rt_fetch(relid, root); + + updatedCols = rte->updatedCols; + + if (rel->relid != relid) + { + RelOptInfo *top_parent_rel = find_base_rel(root, relid); + + Assert(IS_OTHER_REL(rel)); + + updatedCols = translate_col_privs_multilevel(root, rel, top_parent_rel, + updatedCols); + } + + /* + * Now we must check to see if there are any generated columns that depend + * on the updatedCols, and add them to the result. + */ + extraUpdatedCols = get_dependent_generated_columns(root, rel->relid, + updatedCols); + + return bms_union(updatedCols, extraUpdatedCols); +} + +/* + * translate_col_privs + * Translate a bitmapset representing per-column privileges from the + * parent rel's attribute numbering to the child's. + * + * The only surprise here is that we don't translate a parent whole-row + * reference into a child whole-row reference. That would mean requiring + * permissions on all child columns, which is overly strict, since the + * query is really only going to reference the inherited columns. Instead + * we set the per-column bits for all inherited columns. + */ +static Bitmapset * +translate_col_privs(const Bitmapset *parent_privs, + List *translated_vars) +{ + Bitmapset *child_privs = NULL; + bool whole_row; + int attno; + ListCell *lc; + + /* System attributes have the same numbers in all tables */ + for (attno = FirstLowInvalidHeapAttributeNumber + 1; attno < 0; attno++) + { + if (bms_is_member(attno - FirstLowInvalidHeapAttributeNumber, + parent_privs)) + child_privs = bms_add_member(child_privs, + attno - FirstLowInvalidHeapAttributeNumber); + } + + /* Check if parent has whole-row reference */ + whole_row = bms_is_member(InvalidAttrNumber - FirstLowInvalidHeapAttributeNumber, + parent_privs); + + /* And now translate the regular user attributes, using the vars list */ + attno = InvalidAttrNumber; + foreach(lc, translated_vars) + { + Var *var = lfirst_node(Var, lc); + + attno++; + if (var == NULL) /* ignore dropped columns */ + continue; + if (whole_row || + bms_is_member(attno - FirstLowInvalidHeapAttributeNumber, + parent_privs)) + child_privs = bms_add_member(child_privs, + var->varattno - FirstLowInvalidHeapAttributeNumber); + } + + return child_privs; +} + +/* + * translate_col_privs_multilevel + * Recursively translates the column numbers contained in 'parent_cols' + * to the column numbers of a descendant relation given by 'rel' + * + * Note that because this is based on translate_col_privs, it will expand + * a whole-row reference into all inherited columns. This is not an issue + * for current usages, but beware. + */ +static Bitmapset * +translate_col_privs_multilevel(PlannerInfo *root, RelOptInfo *rel, + RelOptInfo *parent_rel, + Bitmapset *parent_cols) +{ + AppendRelInfo *appinfo; + + /* Fast path for easy case. */ + if (parent_cols == NULL) + return NULL; + + Assert(root->append_rel_array != NULL); + appinfo = root->append_rel_array[rel->relid]; + Assert(appinfo != NULL); + + /* Recurse if immediate parent is not the top parent. */ + if (appinfo->parent_relid != parent_rel->relid) + { + RelOptInfo *next_parent = find_base_rel(root, appinfo->parent_relid); + + parent_cols = translate_col_privs_multilevel(root, next_parent, + parent_rel, + parent_cols); + } + + /* Now translate for this child. */ + return translate_col_privs(parent_cols, appinfo->translated_vars); +} + +/* + * expand_appendrel_subquery + * Add "other rel" RelOptInfos for the children of an appendrel baserel + * + * "rel" is a subquery relation that has the rte->inh flag set, meaning it + * is a UNION ALL subquery that's been flattened into an appendrel, with + * child subqueries listed in root->append_rel_list. We need to build + * a RelOptInfo for each child relation so that we can plan scans on them. + */ +static void +expand_appendrel_subquery(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte, Index rti) +{ + ListCell *l; + + foreach(l, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + Index childRTindex = appinfo->child_relid; + RangeTblEntry *childrte; + RelOptInfo *childrel; + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != rti) + continue; + + /* find the child RTE, which should already exist */ + Assert(childRTindex < root->simple_rel_array_size); + childrte = root->simple_rte_array[childRTindex]; + Assert(childrte != NULL); + + /* Build the child RelOptInfo. */ + childrel = build_simple_rel(root, childRTindex, rel); + + /* Child may itself be an inherited rel, either table or subquery. */ + if (childrte->inh) + expand_inherited_rtentry(root, childrel, childrte, childRTindex); + } +} + + +/* + * apply_child_basequals + * Populate childrel's base restriction quals from parent rel's quals, + * translating them using appinfo. + * + * If any of the resulting clauses evaluate to constant false or NULL, we + * return false and don't apply any quals. Caller should mark the relation as + * a dummy rel in this case, since it doesn't need to be scanned. + */ +bool +apply_child_basequals(PlannerInfo *root, RelOptInfo *parentrel, + RelOptInfo *childrel, RangeTblEntry *childRTE, + AppendRelInfo *appinfo) +{ + List *childquals; + Index cq_min_security; + ListCell *lc; + + /* + * The child rel's targetlist might contain non-Var expressions, which + * means that substitution into the quals could produce opportunities for + * const-simplification, and perhaps even pseudoconstant quals. Therefore, + * transform each RestrictInfo separately to see if it reduces to a + * constant or pseudoconstant. (We must process them separately to keep + * track of the security level of each qual.) + */ + childquals = NIL; + cq_min_security = UINT_MAX; + foreach(lc, parentrel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + Node *childqual; + ListCell *lc2; + + Assert(IsA(rinfo, RestrictInfo)); + childqual = adjust_appendrel_attrs(root, + (Node *) rinfo->clause, + 1, &appinfo); + childqual = eval_const_expressions(root, childqual); + /* check for flat-out constant */ + if (childqual && IsA(childqual, Const)) + { + if (((Const *) childqual)->constisnull || + !DatumGetBool(((Const *) childqual)->constvalue)) + { + /* Restriction reduces to constant FALSE or NULL */ + return false; + } + /* Restriction reduces to constant TRUE, so drop it */ + continue; + } + /* might have gotten an AND clause, if so flatten it */ + foreach(lc2, make_ands_implicit((Expr *) childqual)) + { + Node *onecq = (Node *) lfirst(lc2); + bool pseudoconstant; + + /* check for pseudoconstant (no Vars or volatile functions) */ + pseudoconstant = + !contain_vars_of_level(onecq, 0) && + !contain_volatile_functions(onecq); + if (pseudoconstant) + { + /* tell createplan.c to check for gating quals */ + root->hasPseudoConstantQuals = true; + } + /* reconstitute RestrictInfo with appropriate properties */ + childquals = lappend(childquals, + make_restrictinfo(root, + (Expr *) onecq, + rinfo->is_pushed_down, + rinfo->outerjoin_delayed, + pseudoconstant, + rinfo->security_level, + NULL, NULL, NULL)); + /* track minimum security level among child quals */ + cq_min_security = Min(cq_min_security, rinfo->security_level); + } + } + + /* + * In addition to the quals inherited from the parent, we might have + * securityQuals associated with this particular child node. (Currently + * this can only happen in appendrels originating from UNION ALL; + * inheritance child tables don't have their own securityQuals, see + * expand_single_inheritance_child().) Pull any such securityQuals up + * into the baserestrictinfo for the child. This is similar to + * process_security_barrier_quals() for the parent rel, except that we + * can't make any general deductions from such quals, since they don't + * hold for the whole appendrel. + */ + if (childRTE->securityQuals) + { + Index security_level = 0; + + foreach(lc, childRTE->securityQuals) + { + List *qualset = (List *) lfirst(lc); + ListCell *lc2; + + foreach(lc2, qualset) + { + Expr *qual = (Expr *) lfirst(lc2); + + /* not likely that we'd see constants here, so no check */ + childquals = lappend(childquals, + make_restrictinfo(root, qual, + true, false, false, + security_level, + NULL, NULL, NULL)); + cq_min_security = Min(cq_min_security, security_level); + } + security_level++; + } + Assert(security_level <= root->qual_security_level); + } + + /* + * OK, we've got all the baserestrictinfo quals for this child. + */ + childrel->baserestrictinfo = childquals; + childrel->baserestrict_min_security = cq_min_security; + + return true; +} diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c new file mode 100644 index 0000000..d4cffdb --- /dev/null +++ b/src/backend/optimizer/util/joininfo.c @@ -0,0 +1,140 @@ +/*------------------------------------------------------------------------- + * + * joininfo.c + * joininfo list manipulation routines + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/joininfo.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "optimizer/joininfo.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" + + +/* + * have_relevant_joinclause + * Detect whether there is a joinclause that involves + * the two given relations. + * + * Note: the joinclause does not have to be evaluable with only these two + * relations. This is intentional. For example consider + * SELECT * FROM a, b, c WHERE a.x = (b.y + c.z) + * If a is much larger than the other tables, it may be worthwhile to + * cross-join b and c and then use an inner indexscan on a.x. Therefore + * we should consider this joinclause as reason to join b to c, even though + * it can't be applied at that join step. + */ +bool +have_relevant_joinclause(PlannerInfo *root, + RelOptInfo *rel1, RelOptInfo *rel2) +{ + bool result = false; + List *joininfo; + Relids other_relids; + ListCell *l; + + /* + * We could scan either relation's joininfo list; may as well use the + * shorter one. + */ + if (list_length(rel1->joininfo) <= list_length(rel2->joininfo)) + { + joininfo = rel1->joininfo; + other_relids = rel2->relids; + } + else + { + joininfo = rel2->joininfo; + other_relids = rel1->relids; + } + + foreach(l, joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + + if (bms_overlap(other_relids, rinfo->required_relids)) + { + result = true; + break; + } + } + + /* + * We also need to check the EquivalenceClass data structure, which might + * contain relationships not emitted into the joininfo lists. + */ + if (!result && rel1->has_eclass_joins && rel2->has_eclass_joins) + result = have_relevant_eclass_joinclause(root, rel1, rel2); + + return result; +} + + +/* + * add_join_clause_to_rels + * Add 'restrictinfo' to the joininfo list of each relation it requires. + * + * Note that the same copy of the restrictinfo node is linked to by all the + * lists it is in. This allows us to exploit caching of information about + * the restriction clause (but we must be careful that the information does + * not depend on context). + * + * 'restrictinfo' describes the join clause + * 'join_relids' is the list of relations participating in the join clause + * (there must be more than one) + */ +void +add_join_clause_to_rels(PlannerInfo *root, + RestrictInfo *restrictinfo, + Relids join_relids) +{ + int cur_relid; + + cur_relid = -1; + while ((cur_relid = bms_next_member(join_relids, cur_relid)) >= 0) + { + RelOptInfo *rel = find_base_rel(root, cur_relid); + + rel->joininfo = lappend(rel->joininfo, restrictinfo); + } +} + +/* + * remove_join_clause_from_rels + * Delete 'restrictinfo' from all the joininfo lists it is in + * + * This reverses the effect of add_join_clause_to_rels. It's used when we + * discover that a relation need not be joined at all. + * + * 'restrictinfo' describes the join clause + * 'join_relids' is the list of relations participating in the join clause + * (there must be more than one) + */ +void +remove_join_clause_from_rels(PlannerInfo *root, + RestrictInfo *restrictinfo, + Relids join_relids) +{ + int cur_relid; + + cur_relid = -1; + while ((cur_relid = bms_next_member(join_relids, cur_relid)) >= 0) + { + RelOptInfo *rel = find_base_rel(root, cur_relid); + + /* + * Remove the restrictinfo from the list. Pointer comparison is + * sufficient. + */ + Assert(list_member_ptr(rel->joininfo, restrictinfo)); + rel->joininfo = list_delete_ptr(rel->joininfo, restrictinfo); + } +} diff --git a/src/backend/optimizer/util/orclauses.c b/src/backend/optimizer/util/orclauses.c new file mode 100644 index 0000000..b1363df --- /dev/null +++ b/src/backend/optimizer/util/orclauses.c @@ -0,0 +1,360 @@ +/*------------------------------------------------------------------------- + * + * orclauses.c + * Routines to extract restriction OR clauses from join OR clauses + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/orclauses.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/orclauses.h" +#include "optimizer/restrictinfo.h" + + +static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel); +static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel); +static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, + Expr *orclause, RestrictInfo *join_or_rinfo); + + +/* + * extract_restriction_or_clauses + * Examine join OR-of-AND clauses to see if any useful restriction OR + * clauses can be extracted. If so, add them to the query. + * + * Although a join clause must reference multiple relations overall, + * an OR of ANDs clause might contain sub-clauses that reference just one + * relation and can be used to build a restriction clause for that rel. + * For example consider + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); + * We can transform this into + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) + * AND (a.x = 42 OR a.x = 44) + * AND (b.y = 43 OR b.z = 45); + * which allows the latter clauses to be applied during the scans of a and b, + * perhaps as index qualifications, and in any case reducing the number of + * rows arriving at the join. In essence this is a partial transformation to + * CNF (AND of ORs format). It is not complete, however, because we do not + * unravel the original OR --- doing so would usually bloat the qualification + * expression to little gain. + * + * The added quals are partially redundant with the original OR, and therefore + * would cause the size of the joinrel to be underestimated when it is finally + * formed. (This would be true of a full transformation to CNF as well; the + * fault is not really in the transformation, but in clauselist_selectivity's + * inability to recognize redundant conditions.) We can compensate for this + * redundancy by changing the cached selectivity of the original OR clause, + * canceling out the (valid) reduction in the estimated sizes of the base + * relations so that the estimated joinrel size remains the same. This is + * a MAJOR HACK: it depends on the fact that clause selectivities are cached + * and on the fact that the same RestrictInfo node will appear in every + * joininfo list that might be used when the joinrel is formed. + * And it doesn't work in cases where the size estimation is nonlinear + * (i.e., outer and IN joins). But it beats not doing anything. + * + * We examine each base relation to see if join clauses associated with it + * contain extractable restriction conditions. If so, add those conditions + * to the rel's baserestrictinfo and update the cached selectivities of the + * join clauses. Note that the same join clause will be examined afresh + * from the point of view of each baserel that participates in it, so its + * cached selectivity may get updated multiple times. + */ +void +extract_restriction_or_clauses(PlannerInfo *root) +{ + Index rti; + + /* Examine each baserel for potential join OR clauses */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + ListCell *lc; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + /* + * Find potentially interesting OR joinclauses. We can use any + * joinclause that is considered safe to move to this rel by the + * parameterized-path machinery, even though what we are going to do + * with it is not exactly a parameterized path. + * + * However, it seems best to ignore clauses that have been marked + * redundant (by setting norm_selec > 1). That likely can't happen + * for OR clauses, but let's be safe. + */ + foreach(lc, rel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (restriction_is_or_clause(rinfo) && + join_clause_is_movable_to(rinfo, rel) && + rinfo->norm_selec <= 1) + { + /* Try to extract a qual for this rel only */ + Expr *orclause = extract_or_clause(rinfo, rel); + + /* + * If successful, decide whether we want to use the clause, + * and insert it into the rel's restrictinfo list if so. + */ + if (orclause) + consider_new_or_clause(root, rel, orclause, rinfo); + } + } + } +} + +/* + * Is the given primitive (non-OR) RestrictInfo safe to move to the rel? + */ +static bool +is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel) +{ + /* + * We want clauses that mention the rel, and only the rel. So in + * particular pseudoconstant clauses can be rejected quickly. Then check + * the clause's Var membership. + */ + if (rinfo->pseudoconstant) + return false; + if (!bms_equal(rinfo->clause_relids, rel->relids)) + return false; + + /* We don't want extra evaluations of any volatile functions */ + if (contain_volatile_functions((Node *) rinfo->clause)) + return false; + + return true; +} + +/* + * Try to extract a restriction clause mentioning only "rel" from the given + * join OR-clause. + * + * We must be able to extract at least one qual for this rel from each of + * the arms of the OR, else we can't use it. + * + * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL + * if no OR clause could be extracted. + */ +static Expr * +extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel) +{ + List *clauselist = NIL; + ListCell *lc; + + /* + * Scan each arm of the input OR clause. Notice we descend into + * or_rinfo->orclause, which has RestrictInfo nodes embedded below the + * toplevel OR/AND structure. This is useful because we can use the info + * in those nodes to make is_safe_restriction_clause_for()'s checks + * cheaper. We'll strip those nodes from the returned tree, though, + * meaning that fresh ones will be built if the clause is accepted as a + * restriction clause. This might seem wasteful --- couldn't we re-use + * the existing RestrictInfos? But that'd require assuming that + * selectivity and other cached data is computed exactly the same way for + * a restriction clause as for a join clause, which seems undesirable. + */ + Assert(is_orclause(or_rinfo->orclause)); + foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args) + { + Node *orarg = (Node *) lfirst(lc); + List *subclauses = NIL; + Node *subclause; + + /* OR arguments should be ANDs or sub-RestrictInfos */ + if (is_andclause(orarg)) + { + List *andargs = ((BoolExpr *) orarg)->args; + ListCell *lc2; + + foreach(lc2, andargs) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); + + if (restriction_is_or_clause(rinfo)) + { + /* + * Recurse to deal with nested OR. Note we *must* recurse + * here, this isn't just overly-tense optimization: we + * have to descend far enough to find and strip all + * RestrictInfos in the expression. + */ + Expr *suborclause; + + suborclause = extract_or_clause(rinfo, rel); + if (suborclause) + subclauses = lappend(subclauses, suborclause); + } + else if (is_safe_restriction_clause_for(rinfo, rel)) + subclauses = lappend(subclauses, rinfo->clause); + } + } + else + { + RestrictInfo *rinfo = castNode(RestrictInfo, orarg); + + Assert(!restriction_is_or_clause(rinfo)); + if (is_safe_restriction_clause_for(rinfo, rel)) + subclauses = lappend(subclauses, rinfo->clause); + } + + /* + * If nothing could be extracted from this arm, we can't do anything + * with this OR clause. + */ + if (subclauses == NIL) + return NULL; + + /* + * OK, add subclause(s) to the result OR. If we found more than one, + * we need an AND node. But if we found only one, and it is itself an + * OR node, add its subclauses to the result instead; this is needed + * to preserve AND/OR flatness (ie, no OR directly underneath OR). + */ + subclause = (Node *) make_ands_explicit(subclauses); + if (is_orclause(subclause)) + clauselist = list_concat(clauselist, + ((BoolExpr *) subclause)->args); + else + clauselist = lappend(clauselist, subclause); + } + + /* + * If we got a restriction clause from every arm, wrap them up in an OR + * node. (In theory the OR node might be unnecessary, if there was only + * one arm --- but then the input OR node was also redundant.) + */ + if (clauselist != NIL) + return make_orclause(clauselist); + return NULL; +} + +/* + * Consider whether a successfully-extracted restriction OR clause is + * actually worth using. If so, add it to the planner's data structures, + * and adjust the original join clause (join_or_rinfo) to compensate. + */ +static void +consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, + Expr *orclause, RestrictInfo *join_or_rinfo) +{ + RestrictInfo *or_rinfo; + Selectivity or_selec, + orig_selec; + + /* + * Build a RestrictInfo from the new OR clause. We can assume it's valid + * as a base restriction clause. + */ + or_rinfo = make_restrictinfo(root, + orclause, + true, + false, + false, + join_or_rinfo->security_level, + NULL, + NULL, + NULL); + + /* + * Estimate its selectivity. (We could have done this earlier, but doing + * it on the RestrictInfo representation allows the result to get cached, + * saving work later.) + */ + or_selec = clause_selectivity(root, (Node *) or_rinfo, + 0, JOIN_INNER, NULL); + + /* + * The clause is only worth adding to the query if it rejects a useful + * fraction of the base relation's rows; otherwise, it's just going to + * cause duplicate computation (since we will still have to check the + * original OR clause when the join is formed). Somewhat arbitrarily, we + * set the selectivity threshold at 0.9. + */ + if (or_selec > 0.9) + return; /* forget it */ + + /* + * OK, add it to the rel's restriction-clause list. + */ + rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo); + rel->baserestrict_min_security = Min(rel->baserestrict_min_security, + or_rinfo->security_level); + + /* + * Adjust the original join OR clause's cached selectivity to compensate + * for the selectivity of the added (but redundant) lower-level qual. This + * should result in the join rel getting approximately the same rows + * estimate as it would have gotten without all these shenanigans. + * + * XXX major hack alert: this depends on the assumption that the + * selectivity will stay cached. + * + * XXX another major hack: we adjust only norm_selec, the cached + * selectivity for JOIN_INNER semantics, even though the join clause + * might've been an outer-join clause. This is partly because we can't + * easily identify the relevant SpecialJoinInfo here, and partly because + * the linearity assumption we're making would fail anyway. (If it is an + * outer-join clause, "rel" must be on the nullable side, else we'd not + * have gotten here. So the computation of the join size is going to be + * quite nonlinear with respect to the size of "rel", so it's not clear + * how we ought to adjust outer_selec even if we could compute its + * original value correctly.) + */ + if (or_selec > 0) + { + SpecialJoinInfo sjinfo; + + /* + * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare + * approx_tuple_count() in costsize.c.) + */ + sjinfo.type = T_SpecialJoinInfo; + sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids, + rel->relids); + sjinfo.min_righthand = rel->relids; + sjinfo.syn_lefthand = sjinfo.min_lefthand; + sjinfo.syn_righthand = sjinfo.min_righthand; + sjinfo.jointype = JOIN_INNER; + /* we don't bother trying to make the remaining fields valid */ + sjinfo.lhs_strict = false; + sjinfo.delay_upper_joins = false; + sjinfo.semi_can_btree = false; + sjinfo.semi_can_hash = false; + sjinfo.semi_operators = NIL; + sjinfo.semi_rhs_exprs = NIL; + + /* Compute inner-join size */ + orig_selec = clause_selectivity(root, (Node *) join_or_rinfo, + 0, JOIN_INNER, &sjinfo); + + /* And hack cached selectivity so join size remains the same */ + join_or_rinfo->norm_selec = orig_selec / or_selec; + /* ensure result stays in sane range, in particular not "redundant" */ + if (join_or_rinfo->norm_selec > 1) + join_or_rinfo->norm_selec = 1; + /* as explained above, we don't touch outer_selec */ + } +} diff --git a/src/backend/optimizer/util/paramassign.c b/src/backend/optimizer/util/paramassign.c new file mode 100644 index 0000000..12486cb --- /dev/null +++ b/src/backend/optimizer/util/paramassign.c @@ -0,0 +1,591 @@ +/*------------------------------------------------------------------------- + * + * paramassign.c + * Functions for assigning PARAM_EXEC slots during planning. + * + * This module is responsible for managing three planner data structures: + * + * root->glob->paramExecTypes: records actual assignments of PARAM_EXEC slots. + * The i'th list element holds the data type OID of the i'th parameter slot. + * (Elements can be InvalidOid if they represent slots that are needed for + * chgParam signaling, but will never hold a value at runtime.) This list is + * global to the whole plan since the executor has only one PARAM_EXEC array. + * Assignments are permanent for the plan: we never remove entries once added. + * + * root->plan_params: a list of PlannerParamItem nodes, recording Vars and + * PlaceHolderVars that the root's query level needs to supply to lower-level + * subqueries, along with the PARAM_EXEC number to use for each such value. + * Elements are added to this list while planning a subquery, and the list + * is reset to empty after completion of each subquery. + * + * root->curOuterParams: a list of NestLoopParam nodes, recording Vars and + * PlaceHolderVars that some outer level of nestloop needs to pass down to + * a lower-level plan node in its righthand side. Elements are added to this + * list as createplan.c creates lower Plan nodes that need such Params, and + * are removed when it creates a NestLoop Plan node that will supply those + * values. + * + * The latter two data structures are used to prevent creating multiple + * PARAM_EXEC slots (each requiring work to fill) when the same upper + * SubPlan or NestLoop supplies a value that is referenced in more than + * one place in its child plan nodes. However, when the same Var has to + * be supplied to different subplan trees by different SubPlan or NestLoop + * parent nodes, we don't recognize any commonality; a fresh plan_params or + * curOuterParams entry will be made (since the old one has been removed + * when we finished processing the earlier SubPlan or NestLoop) and a fresh + * PARAM_EXEC number will be assigned. At one time we tried to avoid + * allocating duplicate PARAM_EXEC numbers in such cases, but it's harder + * than it seems to avoid bugs due to overlapping Param lifetimes, so we + * don't risk that anymore. Minimizing the number of PARAM_EXEC slots + * doesn't really save much executor work anyway. + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/optimizer/util/paramassign.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/nodeFuncs.h" +#include "nodes/plannodes.h" +#include "optimizer/paramassign.h" +#include "optimizer/placeholder.h" +#include "rewrite/rewriteManip.h" + + +/* + * Select a PARAM_EXEC number to identify the given Var as a parameter for + * the current subquery. (It might already have one.) + * Record the need for the Var in the proper upper-level root->plan_params. + */ +static int +assign_param_for_var(PlannerInfo *root, Var *var) +{ + ListCell *ppl; + PlannerParamItem *pitem; + Index levelsup; + + /* Find the query level the Var belongs to */ + for (levelsup = var->varlevelsup; levelsup > 0; levelsup--) + root = root->parent_root; + + /* If there's already a matching PlannerParamItem there, just use it */ + foreach(ppl, root->plan_params) + { + pitem = (PlannerParamItem *) lfirst(ppl); + if (IsA(pitem->item, Var)) + { + Var *pvar = (Var *) pitem->item; + + /* + * This comparison must match _equalVar(), except for ignoring + * varlevelsup. Note that _equalVar() ignores varnosyn, + * varattnosyn, and location, so this does too. + */ + if (pvar->varno == var->varno && + pvar->varattno == var->varattno && + pvar->vartype == var->vartype && + pvar->vartypmod == var->vartypmod && + pvar->varcollid == var->varcollid) + return pitem->paramId; + } + } + + /* Nope, so make a new one */ + var = copyObject(var); + var->varlevelsup = 0; + + pitem = makeNode(PlannerParamItem); + pitem->item = (Node *) var; + pitem->paramId = list_length(root->glob->paramExecTypes); + root->glob->paramExecTypes = lappend_oid(root->glob->paramExecTypes, + var->vartype); + + root->plan_params = lappend(root->plan_params, pitem); + + return pitem->paramId; +} + +/* + * Generate a Param node to replace the given Var, + * which is expected to have varlevelsup > 0 (ie, it is not local). + * Record the need for the Var in the proper upper-level root->plan_params. + */ +Param * +replace_outer_var(PlannerInfo *root, Var *var) +{ + Param *retval; + int i; + + Assert(var->varlevelsup > 0 && var->varlevelsup < root->query_level); + + /* Find the Var in the appropriate plan_params, or add it if not present */ + i = assign_param_for_var(root, var); + + retval = makeNode(Param); + retval->paramkind = PARAM_EXEC; + retval->paramid = i; + retval->paramtype = var->vartype; + retval->paramtypmod = var->vartypmod; + retval->paramcollid = var->varcollid; + retval->location = var->location; + + return retval; +} + +/* + * Select a PARAM_EXEC number to identify the given PlaceHolderVar as a + * parameter for the current subquery. (It might already have one.) + * Record the need for the PHV in the proper upper-level root->plan_params. + * + * This is just like assign_param_for_var, except for PlaceHolderVars. + */ +static int +assign_param_for_placeholdervar(PlannerInfo *root, PlaceHolderVar *phv) +{ + ListCell *ppl; + PlannerParamItem *pitem; + Index levelsup; + + /* Find the query level the PHV belongs to */ + for (levelsup = phv->phlevelsup; levelsup > 0; levelsup--) + root = root->parent_root; + + /* If there's already a matching PlannerParamItem there, just use it */ + foreach(ppl, root->plan_params) + { + pitem = (PlannerParamItem *) lfirst(ppl); + if (IsA(pitem->item, PlaceHolderVar)) + { + PlaceHolderVar *pphv = (PlaceHolderVar *) pitem->item; + + /* We assume comparing the PHIDs is sufficient */ + if (pphv->phid == phv->phid) + return pitem->paramId; + } + } + + /* Nope, so make a new one */ + phv = copyObject(phv); + IncrementVarSublevelsUp((Node *) phv, -((int) phv->phlevelsup), 0); + Assert(phv->phlevelsup == 0); + + pitem = makeNode(PlannerParamItem); + pitem->item = (Node *) phv; + pitem->paramId = list_length(root->glob->paramExecTypes); + root->glob->paramExecTypes = lappend_oid(root->glob->paramExecTypes, + exprType((Node *) phv->phexpr)); + + root->plan_params = lappend(root->plan_params, pitem); + + return pitem->paramId; +} + +/* + * Generate a Param node to replace the given PlaceHolderVar, + * which is expected to have phlevelsup > 0 (ie, it is not local). + * Record the need for the PHV in the proper upper-level root->plan_params. + * + * This is just like replace_outer_var, except for PlaceHolderVars. + */ +Param * +replace_outer_placeholdervar(PlannerInfo *root, PlaceHolderVar *phv) +{ + Param *retval; + int i; + + Assert(phv->phlevelsup > 0 && phv->phlevelsup < root->query_level); + + /* Find the PHV in the appropriate plan_params, or add it if not present */ + i = assign_param_for_placeholdervar(root, phv); + + retval = makeNode(Param); + retval->paramkind = PARAM_EXEC; + retval->paramid = i; + retval->paramtype = exprType((Node *) phv->phexpr); + retval->paramtypmod = exprTypmod((Node *) phv->phexpr); + retval->paramcollid = exprCollation((Node *) phv->phexpr); + retval->location = -1; + + return retval; +} + +/* + * Generate a Param node to replace the given Aggref + * which is expected to have agglevelsup > 0 (ie, it is not local). + * Record the need for the Aggref in the proper upper-level root->plan_params. + */ +Param * +replace_outer_agg(PlannerInfo *root, Aggref *agg) +{ + Param *retval; + PlannerParamItem *pitem; + Index levelsup; + + Assert(agg->agglevelsup > 0 && agg->agglevelsup < root->query_level); + + /* Find the query level the Aggref belongs to */ + for (levelsup = agg->agglevelsup; levelsup > 0; levelsup--) + root = root->parent_root; + + /* + * It does not seem worthwhile to try to de-duplicate references to outer + * aggs. Just make a new slot every time. + */ + agg = copyObject(agg); + IncrementVarSublevelsUp((Node *) agg, -((int) agg->agglevelsup), 0); + Assert(agg->agglevelsup == 0); + + pitem = makeNode(PlannerParamItem); + pitem->item = (Node *) agg; + pitem->paramId = list_length(root->glob->paramExecTypes); + root->glob->paramExecTypes = lappend_oid(root->glob->paramExecTypes, + agg->aggtype); + + root->plan_params = lappend(root->plan_params, pitem); + + retval = makeNode(Param); + retval->paramkind = PARAM_EXEC; + retval->paramid = pitem->paramId; + retval->paramtype = agg->aggtype; + retval->paramtypmod = -1; + retval->paramcollid = agg->aggcollid; + retval->location = agg->location; + + return retval; +} + +/* + * Generate a Param node to replace the given GroupingFunc expression which is + * expected to have agglevelsup > 0 (ie, it is not local). + * Record the need for the GroupingFunc in the proper upper-level + * root->plan_params. + */ +Param * +replace_outer_grouping(PlannerInfo *root, GroupingFunc *grp) +{ + Param *retval; + PlannerParamItem *pitem; + Index levelsup; + Oid ptype = exprType((Node *) grp); + + Assert(grp->agglevelsup > 0 && grp->agglevelsup < root->query_level); + + /* Find the query level the GroupingFunc belongs to */ + for (levelsup = grp->agglevelsup; levelsup > 0; levelsup--) + root = root->parent_root; + + /* + * It does not seem worthwhile to try to de-duplicate references to outer + * aggs. Just make a new slot every time. + */ + grp = copyObject(grp); + IncrementVarSublevelsUp((Node *) grp, -((int) grp->agglevelsup), 0); + Assert(grp->agglevelsup == 0); + + pitem = makeNode(PlannerParamItem); + pitem->item = (Node *) grp; + pitem->paramId = list_length(root->glob->paramExecTypes); + root->glob->paramExecTypes = lappend_oid(root->glob->paramExecTypes, + ptype); + + root->plan_params = lappend(root->plan_params, pitem); + + retval = makeNode(Param); + retval->paramkind = PARAM_EXEC; + retval->paramid = pitem->paramId; + retval->paramtype = ptype; + retval->paramtypmod = -1; + retval->paramcollid = InvalidOid; + retval->location = grp->location; + + return retval; +} + +/* + * Generate a Param node to replace the given Var, + * which is expected to come from some upper NestLoop plan node. + * Record the need for the Var in root->curOuterParams. + */ +Param * +replace_nestloop_param_var(PlannerInfo *root, Var *var) +{ + Param *param; + NestLoopParam *nlp; + ListCell *lc; + + /* Is this Var already listed in root->curOuterParams? */ + foreach(lc, root->curOuterParams) + { + nlp = (NestLoopParam *) lfirst(lc); + if (equal(var, nlp->paramval)) + { + /* Yes, so just make a Param referencing this NLP's slot */ + param = makeNode(Param); + param->paramkind = PARAM_EXEC; + param->paramid = nlp->paramno; + param->paramtype = var->vartype; + param->paramtypmod = var->vartypmod; + param->paramcollid = var->varcollid; + param->location = var->location; + return param; + } + } + + /* No, so assign a PARAM_EXEC slot for a new NLP */ + param = generate_new_exec_param(root, + var->vartype, + var->vartypmod, + var->varcollid); + param->location = var->location; + + /* Add it to the list of required NLPs */ + nlp = makeNode(NestLoopParam); + nlp->paramno = param->paramid; + nlp->paramval = copyObject(var); + root->curOuterParams = lappend(root->curOuterParams, nlp); + + /* And return the replacement Param */ + return param; +} + +/* + * Generate a Param node to replace the given PlaceHolderVar, + * which is expected to come from some upper NestLoop plan node. + * Record the need for the PHV in root->curOuterParams. + * + * This is just like replace_nestloop_param_var, except for PlaceHolderVars. + */ +Param * +replace_nestloop_param_placeholdervar(PlannerInfo *root, PlaceHolderVar *phv) +{ + Param *param; + NestLoopParam *nlp; + ListCell *lc; + + /* Is this PHV already listed in root->curOuterParams? */ + foreach(lc, root->curOuterParams) + { + nlp = (NestLoopParam *) lfirst(lc); + if (equal(phv, nlp->paramval)) + { + /* Yes, so just make a Param referencing this NLP's slot */ + param = makeNode(Param); + param->paramkind = PARAM_EXEC; + param->paramid = nlp->paramno; + param->paramtype = exprType((Node *) phv->phexpr); + param->paramtypmod = exprTypmod((Node *) phv->phexpr); + param->paramcollid = exprCollation((Node *) phv->phexpr); + param->location = -1; + return param; + } + } + + /* No, so assign a PARAM_EXEC slot for a new NLP */ + param = generate_new_exec_param(root, + exprType((Node *) phv->phexpr), + exprTypmod((Node *) phv->phexpr), + exprCollation((Node *) phv->phexpr)); + + /* Add it to the list of required NLPs */ + nlp = makeNode(NestLoopParam); + nlp->paramno = param->paramid; + nlp->paramval = (Var *) copyObject(phv); + root->curOuterParams = lappend(root->curOuterParams, nlp); + + /* And return the replacement Param */ + return param; +} + +/* + * process_subquery_nestloop_params + * Handle params of a parameterized subquery that need to be fed + * from an outer nestloop. + * + * Currently, that would be *all* params that a subquery in FROM has demanded + * from the current query level, since they must be LATERAL references. + * + * subplan_params is a list of PlannerParamItems that we intend to pass to + * a subquery-in-FROM. (This was constructed in root->plan_params while + * planning the subquery, but isn't there anymore when this is called.) + * + * The subplan's references to the outer variables are already represented + * as PARAM_EXEC Params, since that conversion was done by the routines above + * while planning the subquery. So we need not modify the subplan or the + * PlannerParamItems here. What we do need to do is add entries to + * root->curOuterParams to signal the parent nestloop plan node that it must + * provide these values. This differs from replace_nestloop_param_var in + * that the PARAM_EXEC slots to use have already been determined. + * + * Note that we also use root->curOuterRels as an implicit parameter for + * sanity checks. + */ +void +process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params) +{ + ListCell *lc; + + foreach(lc, subplan_params) + { + PlannerParamItem *pitem = lfirst_node(PlannerParamItem, lc); + + if (IsA(pitem->item, Var)) + { + Var *var = (Var *) pitem->item; + NestLoopParam *nlp; + ListCell *lc; + + /* If not from a nestloop outer rel, complain */ + if (!bms_is_member(var->varno, root->curOuterRels)) + elog(ERROR, "non-LATERAL parameter required by subquery"); + + /* Is this param already listed in root->curOuterParams? */ + foreach(lc, root->curOuterParams) + { + nlp = (NestLoopParam *) lfirst(lc); + if (nlp->paramno == pitem->paramId) + { + Assert(equal(var, nlp->paramval)); + /* Present, so nothing to do */ + break; + } + } + if (lc == NULL) + { + /* No, so add it */ + nlp = makeNode(NestLoopParam); + nlp->paramno = pitem->paramId; + nlp->paramval = copyObject(var); + root->curOuterParams = lappend(root->curOuterParams, nlp); + } + } + else if (IsA(pitem->item, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) pitem->item; + NestLoopParam *nlp; + ListCell *lc; + + /* If not from a nestloop outer rel, complain */ + if (!bms_is_subset(find_placeholder_info(root, phv, false)->ph_eval_at, + root->curOuterRels)) + elog(ERROR, "non-LATERAL parameter required by subquery"); + + /* Is this param already listed in root->curOuterParams? */ + foreach(lc, root->curOuterParams) + { + nlp = (NestLoopParam *) lfirst(lc); + if (nlp->paramno == pitem->paramId) + { + Assert(equal(phv, nlp->paramval)); + /* Present, so nothing to do */ + break; + } + } + if (lc == NULL) + { + /* No, so add it */ + nlp = makeNode(NestLoopParam); + nlp->paramno = pitem->paramId; + nlp->paramval = (Var *) copyObject(phv); + root->curOuterParams = lappend(root->curOuterParams, nlp); + } + } + else + elog(ERROR, "unexpected type of subquery parameter"); + } +} + +/* + * Identify any NestLoopParams that should be supplied by a NestLoop plan + * node with the specified lefthand rels. Remove them from the active + * root->curOuterParams list and return them as the result list. + */ +List * +identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids) +{ + List *result; + ListCell *cell; + + result = NIL; + foreach(cell, root->curOuterParams) + { + NestLoopParam *nlp = (NestLoopParam *) lfirst(cell); + + /* + * We are looking for Vars and PHVs that can be supplied by the + * lefthand rels. The "bms_overlap" test is just an optimization to + * allow skipping find_placeholder_info() if the PHV couldn't match. + */ + if (IsA(nlp->paramval, Var) && + bms_is_member(nlp->paramval->varno, leftrelids)) + { + root->curOuterParams = foreach_delete_current(root->curOuterParams, + cell); + result = lappend(result, nlp); + } + else if (IsA(nlp->paramval, PlaceHolderVar) && + bms_overlap(((PlaceHolderVar *) nlp->paramval)->phrels, + leftrelids) && + bms_is_subset(find_placeholder_info(root, + (PlaceHolderVar *) nlp->paramval, + false)->ph_eval_at, + leftrelids)) + { + root->curOuterParams = foreach_delete_current(root->curOuterParams, + cell); + result = lappend(result, nlp); + } + } + return result; +} + +/* + * Generate a new Param node that will not conflict with any other. + * + * This is used to create Params representing subplan outputs or + * NestLoop parameters. + * + * We don't need to build a PlannerParamItem for such a Param, but we do + * need to make sure we record the type in paramExecTypes (otherwise, + * there won't be a slot allocated for it). + */ +Param * +generate_new_exec_param(PlannerInfo *root, Oid paramtype, int32 paramtypmod, + Oid paramcollation) +{ + Param *retval; + + retval = makeNode(Param); + retval->paramkind = PARAM_EXEC; + retval->paramid = list_length(root->glob->paramExecTypes); + root->glob->paramExecTypes = lappend_oid(root->glob->paramExecTypes, + paramtype); + retval->paramtype = paramtype; + retval->paramtypmod = paramtypmod; + retval->paramcollid = paramcollation; + retval->location = -1; + + return retval; +} + +/* + * Assign a (nonnegative) PARAM_EXEC ID for a special parameter (one that + * is not actually used to carry a value at runtime). Such parameters are + * used for special runtime signaling purposes, such as connecting a + * recursive union node to its worktable scan node or forcing plan + * re-evaluation within the EvalPlanQual mechanism. No actual Param node + * exists with this ID, however. + */ +int +assign_special_exec_param(PlannerInfo *root) +{ + int paramId = list_length(root->glob->paramExecTypes); + + root->glob->paramExecTypes = lappend_oid(root->glob->paramExecTypes, + InvalidOid); + return paramId; +} diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c new file mode 100644 index 0000000..33affaf --- /dev/null +++ b/src/backend/optimizer/util/pathnode.c @@ -0,0 +1,4298 @@ +/*------------------------------------------------------------------------- + * + * pathnode.c + * Routines to manipulate pathlists and create path nodes + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/pathnode.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "foreign/fdwapi.h" +#include "miscadmin.h" +#include "nodes/extensible.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/appendinfo.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/planmain.h" +#include "optimizer/prep.h" +#include "optimizer/restrictinfo.h" +#include "optimizer/tlist.h" +#include "parser/parsetree.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/selfuncs.h" + +typedef enum +{ + COSTS_EQUAL, /* path costs are fuzzily equal */ + COSTS_BETTER1, /* first path is cheaper than second */ + COSTS_BETTER2, /* second path is cheaper than first */ + COSTS_DIFFERENT /* neither path dominates the other on cost */ +} PathCostComparison; + +/* + * STD_FUZZ_FACTOR is the normal fuzz factor for compare_path_costs_fuzzily. + * XXX is it worth making this user-controllable? It provides a tradeoff + * between planner runtime and the accuracy of path cost comparisons. + */ +#define STD_FUZZ_FACTOR 1.01 + +static List *translate_sub_tlist(List *tlist, int relid); +static int append_total_cost_compare(const ListCell *a, const ListCell *b); +static int append_startup_cost_compare(const ListCell *a, const ListCell *b); +static List *reparameterize_pathlist_by_child(PlannerInfo *root, + List *pathlist, + RelOptInfo *child_rel); + + +/***************************************************************************** + * MISC. PATH UTILITIES + *****************************************************************************/ + +/* + * compare_path_costs + * Return -1, 0, or +1 according as path1 is cheaper, the same cost, + * or more expensive than path2 for the specified criterion. + */ +int +compare_path_costs(Path *path1, Path *path2, CostSelector criterion) +{ + if (criterion == STARTUP_COST) + { + if (path1->startup_cost < path2->startup_cost) + return -1; + if (path1->startup_cost > path2->startup_cost) + return +1; + + /* + * If paths have the same startup cost (not at all unlikely), order + * them by total cost. + */ + if (path1->total_cost < path2->total_cost) + return -1; + if (path1->total_cost > path2->total_cost) + return +1; + } + else + { + if (path1->total_cost < path2->total_cost) + return -1; + if (path1->total_cost > path2->total_cost) + return +1; + + /* + * If paths have the same total cost, order them by startup cost. + */ + if (path1->startup_cost < path2->startup_cost) + return -1; + if (path1->startup_cost > path2->startup_cost) + return +1; + } + return 0; +} + +/* + * compare_fractional_path_costs + * Return -1, 0, or +1 according as path1 is cheaper, the same cost, + * or more expensive than path2 for fetching the specified fraction + * of the total tuples. + * + * If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the + * path with the cheaper total_cost. + */ +int +compare_fractional_path_costs(Path *path1, Path *path2, + double fraction) +{ + Cost cost1, + cost2; + + if (fraction <= 0.0 || fraction >= 1.0) + return compare_path_costs(path1, path2, TOTAL_COST); + cost1 = path1->startup_cost + + fraction * (path1->total_cost - path1->startup_cost); + cost2 = path2->startup_cost + + fraction * (path2->total_cost - path2->startup_cost); + if (cost1 < cost2) + return -1; + if (cost1 > cost2) + return +1; + return 0; +} + +/* + * compare_path_costs_fuzzily + * Compare the costs of two paths to see if either can be said to + * dominate the other. + * + * We use fuzzy comparisons so that add_path() can avoid keeping both of + * a pair of paths that really have insignificantly different cost. + * + * The fuzz_factor argument must be 1.0 plus delta, where delta is the + * fraction of the smaller cost that is considered to be a significant + * difference. For example, fuzz_factor = 1.01 makes the fuzziness limit + * be 1% of the smaller cost. + * + * The two paths are said to have "equal" costs if both startup and total + * costs are fuzzily the same. Path1 is said to be better than path2 if + * it has fuzzily better startup cost and fuzzily no worse total cost, + * or if it has fuzzily better total cost and fuzzily no worse startup cost. + * Path2 is better than path1 if the reverse holds. Finally, if one path + * is fuzzily better than the other on startup cost and fuzzily worse on + * total cost, we just say that their costs are "different", since neither + * dominates the other across the whole performance spectrum. + * + * This function also enforces a policy rule that paths for which the relevant + * one of parent->consider_startup and parent->consider_param_startup is false + * cannot survive comparisons solely on the grounds of good startup cost, so + * we never return COSTS_DIFFERENT when that is true for the total-cost loser. + * (But if total costs are fuzzily equal, we compare startup costs anyway, + * in hopes of eliminating one path or the other.) + */ +static PathCostComparison +compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) +{ +#define CONSIDER_PATH_STARTUP_COST(p) \ + ((p)->param_info == NULL ? (p)->parent->consider_startup : (p)->parent->consider_param_startup) + + /* + * Check total cost first since it's more likely to be different; many + * paths have zero startup cost. + */ + if (path1->total_cost > path2->total_cost * fuzz_factor) + { + /* path1 fuzzily worse on total cost */ + if (CONSIDER_PATH_STARTUP_COST(path1) && + path2->startup_cost > path1->startup_cost * fuzz_factor) + { + /* ... but path2 fuzzily worse on startup, so DIFFERENT */ + return COSTS_DIFFERENT; + } + /* else path2 dominates */ + return COSTS_BETTER2; + } + if (path2->total_cost > path1->total_cost * fuzz_factor) + { + /* path2 fuzzily worse on total cost */ + if (CONSIDER_PATH_STARTUP_COST(path2) && + path1->startup_cost > path2->startup_cost * fuzz_factor) + { + /* ... but path1 fuzzily worse on startup, so DIFFERENT */ + return COSTS_DIFFERENT; + } + /* else path1 dominates */ + return COSTS_BETTER1; + } + /* fuzzily the same on total cost ... */ + if (path1->startup_cost > path2->startup_cost * fuzz_factor) + { + /* ... but path1 fuzzily worse on startup, so path2 wins */ + return COSTS_BETTER2; + } + if (path2->startup_cost > path1->startup_cost * fuzz_factor) + { + /* ... but path2 fuzzily worse on startup, so path1 wins */ + return COSTS_BETTER1; + } + /* fuzzily the same on both costs */ + return COSTS_EQUAL; + +#undef CONSIDER_PATH_STARTUP_COST +} + +/* + * set_cheapest + * Find the minimum-cost paths from among a relation's paths, + * and save them in the rel's cheapest-path fields. + * + * cheapest_total_path is normally the cheapest-total-cost unparameterized + * path; but if there are no unparameterized paths, we assign it to be the + * best (cheapest least-parameterized) parameterized path. However, only + * unparameterized paths are considered candidates for cheapest_startup_path, + * so that will be NULL if there are no unparameterized paths. + * + * The cheapest_parameterized_paths list collects all parameterized paths + * that have survived the add_path() tournament for this relation. (Since + * add_path ignores pathkeys for a parameterized path, these will be paths + * that have best cost or best row count for their parameterization. We + * may also have both a parallel-safe and a non-parallel-safe path in some + * cases for the same parameterization in some cases, but this should be + * relatively rare since, most typically, all paths for the same relation + * will be parallel-safe or none of them will.) + * + * cheapest_parameterized_paths always includes the cheapest-total + * unparameterized path, too, if there is one; the users of that list find + * it more convenient if that's included. + * + * This is normally called only after we've finished constructing the path + * list for the rel node. + */ +void +set_cheapest(RelOptInfo *parent_rel) +{ + Path *cheapest_startup_path; + Path *cheapest_total_path; + Path *best_param_path; + List *parameterized_paths; + ListCell *p; + + Assert(IsA(parent_rel, RelOptInfo)); + + if (parent_rel->pathlist == NIL) + elog(ERROR, "could not devise a query plan for the given query"); + + cheapest_startup_path = cheapest_total_path = best_param_path = NULL; + parameterized_paths = NIL; + + foreach(p, parent_rel->pathlist) + { + Path *path = (Path *) lfirst(p); + int cmp; + + if (path->param_info) + { + /* Parameterized path, so add it to parameterized_paths */ + parameterized_paths = lappend(parameterized_paths, path); + + /* + * If we have an unparameterized cheapest-total, we no longer care + * about finding the best parameterized path, so move on. + */ + if (cheapest_total_path) + continue; + + /* + * Otherwise, track the best parameterized path, which is the one + * with least total cost among those of the minimum + * parameterization. + */ + if (best_param_path == NULL) + best_param_path = path; + else + { + switch (bms_subset_compare(PATH_REQ_OUTER(path), + PATH_REQ_OUTER(best_param_path))) + { + case BMS_EQUAL: + /* keep the cheaper one */ + if (compare_path_costs(path, best_param_path, + TOTAL_COST) < 0) + best_param_path = path; + break; + case BMS_SUBSET1: + /* new path is less-parameterized */ + best_param_path = path; + break; + case BMS_SUBSET2: + /* old path is less-parameterized, keep it */ + break; + case BMS_DIFFERENT: + + /* + * This means that neither path has the least possible + * parameterization for the rel. We'll sit on the old + * path until something better comes along. + */ + break; + } + } + } + else + { + /* Unparameterized path, so consider it for cheapest slots */ + if (cheapest_total_path == NULL) + { + cheapest_startup_path = cheapest_total_path = path; + continue; + } + + /* + * If we find two paths of identical costs, try to keep the + * better-sorted one. The paths might have unrelated sort + * orderings, in which case we can only guess which might be + * better to keep, but if one is superior then we definitely + * should keep that one. + */ + cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST); + if (cmp > 0 || + (cmp == 0 && + compare_pathkeys(cheapest_startup_path->pathkeys, + path->pathkeys) == PATHKEYS_BETTER2)) + cheapest_startup_path = path; + + cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST); + if (cmp > 0 || + (cmp == 0 && + compare_pathkeys(cheapest_total_path->pathkeys, + path->pathkeys) == PATHKEYS_BETTER2)) + cheapest_total_path = path; + } + } + + /* Add cheapest unparameterized path, if any, to parameterized_paths */ + if (cheapest_total_path) + parameterized_paths = lcons(cheapest_total_path, parameterized_paths); + + /* + * If there is no unparameterized path, use the best parameterized path as + * cheapest_total_path (but not as cheapest_startup_path). + */ + if (cheapest_total_path == NULL) + cheapest_total_path = best_param_path; + Assert(cheapest_total_path != NULL); + + parent_rel->cheapest_startup_path = cheapest_startup_path; + parent_rel->cheapest_total_path = cheapest_total_path; + parent_rel->cheapest_unique_path = NULL; /* computed only if needed */ + parent_rel->cheapest_parameterized_paths = parameterized_paths; +} + +/* + * add_path + * Consider a potential implementation path for the specified parent rel, + * and add it to the rel's pathlist if it is worthy of consideration. + * A path is worthy if it has a better sort order (better pathkeys) or + * cheaper cost (on either dimension), or generates fewer rows, than any + * existing path that has the same or superset parameterization rels. + * We also consider parallel-safe paths more worthy than others. + * + * We also remove from the rel's pathlist any old paths that are dominated + * by new_path --- that is, new_path is cheaper, at least as well ordered, + * generates no more rows, requires no outer rels not required by the old + * path, and is no less parallel-safe. + * + * In most cases, a path with a superset parameterization will generate + * fewer rows (since it has more join clauses to apply), so that those two + * figures of merit move in opposite directions; this means that a path of + * one parameterization can seldom dominate a path of another. But such + * cases do arise, so we make the full set of checks anyway. + * + * There are two policy decisions embedded in this function, along with + * its sibling add_path_precheck. First, we treat all parameterized paths + * as having NIL pathkeys, so that they cannot win comparisons on the + * basis of sort order. This is to reduce the number of parameterized + * paths that are kept; see discussion in src/backend/optimizer/README. + * + * Second, we only consider cheap startup cost to be interesting if + * parent_rel->consider_startup is true for an unparameterized path, or + * parent_rel->consider_param_startup is true for a parameterized one. + * Again, this allows discarding useless paths sooner. + * + * The pathlist is kept sorted by total_cost, with cheaper paths + * at the front. Within this routine, that's simply a speed hack: + * doing it that way makes it more likely that we will reject an inferior + * path after a few comparisons, rather than many comparisons. + * However, add_path_precheck relies on this ordering to exit early + * when possible. + * + * NOTE: discarded Path objects are immediately pfree'd to reduce planner + * memory consumption. We dare not try to free the substructure of a Path, + * since much of it may be shared with other Paths or the query tree itself; + * but just recycling discarded Path nodes is a very useful savings in + * a large join tree. We can recycle the List nodes of pathlist, too. + * + * As noted in optimizer/README, deleting a previously-accepted Path is + * safe because we know that Paths of this rel cannot yet be referenced + * from any other rel, such as a higher-level join. However, in some cases + * it is possible that a Path is referenced by another Path for its own + * rel; we must not delete such a Path, even if it is dominated by the new + * Path. Currently this occurs only for IndexPath objects, which may be + * referenced as children of BitmapHeapPaths as well as being paths in + * their own right. Hence, we don't pfree IndexPaths when rejecting them. + * + * 'parent_rel' is the relation entry to which the path corresponds. + * 'new_path' is a potential path for parent_rel. + * + * Returns nothing, but modifies parent_rel->pathlist. + */ +void +add_path(RelOptInfo *parent_rel, Path *new_path) +{ + bool accept_new = true; /* unless we find a superior old path */ + int insert_at = 0; /* where to insert new item */ + List *new_path_pathkeys; + ListCell *p1; + + /* + * This is a convenient place to check for query cancel --- no part of the + * planner goes very long without calling add_path(). + */ + CHECK_FOR_INTERRUPTS(); + + /* Pretend parameterized paths have no pathkeys, per comment above */ + new_path_pathkeys = new_path->param_info ? NIL : new_path->pathkeys; + + /* + * Loop to check proposed new path against old paths. Note it is possible + * for more than one old path to be tossed out because new_path dominates + * it. + */ + foreach(p1, parent_rel->pathlist) + { + Path *old_path = (Path *) lfirst(p1); + bool remove_old = false; /* unless new proves superior */ + PathCostComparison costcmp; + PathKeysComparison keyscmp; + BMS_Comparison outercmp; + + /* + * Do a fuzzy cost comparison with standard fuzziness limit. + */ + costcmp = compare_path_costs_fuzzily(new_path, old_path, + STD_FUZZ_FACTOR); + + /* + * If the two paths compare differently for startup and total cost, + * then we want to keep both, and we can skip comparing pathkeys and + * required_outer rels. If they compare the same, proceed with the + * other comparisons. Row count is checked last. (We make the tests + * in this order because the cost comparison is most likely to turn + * out "different", and the pathkeys comparison next most likely. As + * explained above, row count very seldom makes a difference, so even + * though it's cheap to compare there's not much point in checking it + * earlier.) + */ + if (costcmp != COSTS_DIFFERENT) + { + /* Similarly check to see if either dominates on pathkeys */ + List *old_path_pathkeys; + + old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys; + keyscmp = compare_pathkeys(new_path_pathkeys, + old_path_pathkeys); + if (keyscmp != PATHKEYS_DIFFERENT) + { + switch (costcmp) + { + case COSTS_EQUAL: + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), + PATH_REQ_OUTER(old_path)); + if (keyscmp == PATHKEYS_BETTER1) + { + if ((outercmp == BMS_EQUAL || + outercmp == BMS_SUBSET1) && + new_path->rows <= old_path->rows && + new_path->parallel_safe >= old_path->parallel_safe) + remove_old = true; /* new dominates old */ + } + else if (keyscmp == PATHKEYS_BETTER2) + { + if ((outercmp == BMS_EQUAL || + outercmp == BMS_SUBSET2) && + new_path->rows >= old_path->rows && + new_path->parallel_safe <= old_path->parallel_safe) + accept_new = false; /* old dominates new */ + } + else /* keyscmp == PATHKEYS_EQUAL */ + { + if (outercmp == BMS_EQUAL) + { + /* + * Same pathkeys and outer rels, and fuzzily + * the same cost, so keep just one; to decide + * which, first check parallel-safety, then + * rows, then do a fuzzy cost comparison with + * very small fuzz limit. (We used to do an + * exact cost comparison, but that results in + * annoying platform-specific plan variations + * due to roundoff in the cost estimates.) If + * things are still tied, arbitrarily keep + * only the old path. Notice that we will + * keep only the old path even if the + * less-fuzzy comparison decides the startup + * and total costs compare differently. + */ + if (new_path->parallel_safe > + old_path->parallel_safe) + remove_old = true; /* new dominates old */ + else if (new_path->parallel_safe < + old_path->parallel_safe) + accept_new = false; /* old dominates new */ + else if (new_path->rows < old_path->rows) + remove_old = true; /* new dominates old */ + else if (new_path->rows > old_path->rows) + accept_new = false; /* old dominates new */ + else if (compare_path_costs_fuzzily(new_path, + old_path, + 1.0000000001) == COSTS_BETTER1) + remove_old = true; /* new dominates old */ + else + accept_new = false; /* old equals or + * dominates new */ + } + else if (outercmp == BMS_SUBSET1 && + new_path->rows <= old_path->rows && + new_path->parallel_safe >= old_path->parallel_safe) + remove_old = true; /* new dominates old */ + else if (outercmp == BMS_SUBSET2 && + new_path->rows >= old_path->rows && + new_path->parallel_safe <= old_path->parallel_safe) + accept_new = false; /* old dominates new */ + /* else different parameterizations, keep both */ + } + break; + case COSTS_BETTER1: + if (keyscmp != PATHKEYS_BETTER2) + { + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), + PATH_REQ_OUTER(old_path)); + if ((outercmp == BMS_EQUAL || + outercmp == BMS_SUBSET1) && + new_path->rows <= old_path->rows && + new_path->parallel_safe >= old_path->parallel_safe) + remove_old = true; /* new dominates old */ + } + break; + case COSTS_BETTER2: + if (keyscmp != PATHKEYS_BETTER1) + { + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), + PATH_REQ_OUTER(old_path)); + if ((outercmp == BMS_EQUAL || + outercmp == BMS_SUBSET2) && + new_path->rows >= old_path->rows && + new_path->parallel_safe <= old_path->parallel_safe) + accept_new = false; /* old dominates new */ + } + break; + case COSTS_DIFFERENT: + + /* + * can't get here, but keep this case to keep compiler + * quiet + */ + break; + } + } + } + + /* + * Remove current element from pathlist if dominated by new. + */ + if (remove_old) + { + parent_rel->pathlist = foreach_delete_current(parent_rel->pathlist, + p1); + + /* + * Delete the data pointed-to by the deleted cell, if possible + */ + if (!IsA(old_path, IndexPath)) + pfree(old_path); + } + else + { + /* new belongs after this old path if it has cost >= old's */ + if (new_path->total_cost >= old_path->total_cost) + insert_at = foreach_current_index(p1) + 1; + } + + /* + * If we found an old path that dominates new_path, we can quit + * scanning the pathlist; we will not add new_path, and we assume + * new_path cannot dominate any other elements of the pathlist. + */ + if (!accept_new) + break; + } + + if (accept_new) + { + /* Accept the new path: insert it at proper place in pathlist */ + parent_rel->pathlist = + list_insert_nth(parent_rel->pathlist, insert_at, new_path); + } + else + { + /* Reject and recycle the new path */ + if (!IsA(new_path, IndexPath)) + pfree(new_path); + } +} + +/* + * add_path_precheck + * Check whether a proposed new path could possibly get accepted. + * We assume we know the path's pathkeys and parameterization accurately, + * and have lower bounds for its costs. + * + * Note that we do not know the path's rowcount, since getting an estimate for + * that is too expensive to do before prechecking. We assume here that paths + * of a superset parameterization will generate fewer rows; if that holds, + * then paths with different parameterizations cannot dominate each other + * and so we can simply ignore existing paths of another parameterization. + * (In the infrequent cases where that rule of thumb fails, add_path will + * get rid of the inferior path.) + * + * At the time this is called, we haven't actually built a Path structure, + * so the required information has to be passed piecemeal. + */ +bool +add_path_precheck(RelOptInfo *parent_rel, + Cost startup_cost, Cost total_cost, + List *pathkeys, Relids required_outer) +{ + List *new_path_pathkeys; + bool consider_startup; + ListCell *p1; + + /* Pretend parameterized paths have no pathkeys, per add_path policy */ + new_path_pathkeys = required_outer ? NIL : pathkeys; + + /* Decide whether new path's startup cost is interesting */ + consider_startup = required_outer ? parent_rel->consider_param_startup : parent_rel->consider_startup; + + foreach(p1, parent_rel->pathlist) + { + Path *old_path = (Path *) lfirst(p1); + PathKeysComparison keyscmp; + + /* + * We are looking for an old_path with the same parameterization (and + * by assumption the same rowcount) that dominates the new path on + * pathkeys as well as both cost metrics. If we find one, we can + * reject the new path. + * + * Cost comparisons here should match compare_path_costs_fuzzily. + */ + if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR) + { + /* new path can win on startup cost only if consider_startup */ + if (startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR || + !consider_startup) + { + /* new path loses on cost, so check pathkeys... */ + List *old_path_pathkeys; + + old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys; + keyscmp = compare_pathkeys(new_path_pathkeys, + old_path_pathkeys); + if (keyscmp == PATHKEYS_EQUAL || + keyscmp == PATHKEYS_BETTER2) + { + /* new path does not win on pathkeys... */ + if (bms_equal(required_outer, PATH_REQ_OUTER(old_path))) + { + /* Found an old path that dominates the new one */ + return false; + } + } + } + } + else + { + /* + * Since the pathlist is sorted by total_cost, we can stop looking + * once we reach a path with a total_cost larger than the new + * path's. + */ + break; + } + } + + return true; +} + +/* + * add_partial_path + * Like add_path, our goal here is to consider whether a path is worthy + * of being kept around, but the considerations here are a bit different. + * A partial path is one which can be executed in any number of workers in + * parallel such that each worker will generate a subset of the path's + * overall result. + * + * As in add_path, the partial_pathlist is kept sorted with the cheapest + * total path in front. This is depended on by multiple places, which + * just take the front entry as the cheapest path without searching. + * + * We don't generate parameterized partial paths for several reasons. Most + * importantly, they're not safe to execute, because there's nothing to + * make sure that a parallel scan within the parameterized portion of the + * plan is running with the same value in every worker at the same time. + * Fortunately, it seems unlikely to be worthwhile anyway, because having + * each worker scan the entire outer relation and a subset of the inner + * relation will generally be a terrible plan. The inner (parameterized) + * side of the plan will be small anyway. There could be rare cases where + * this wins big - e.g. if join order constraints put a 1-row relation on + * the outer side of the topmost join with a parameterized plan on the inner + * side - but we'll have to be content not to handle such cases until + * somebody builds an executor infrastructure that can cope with them. + * + * Because we don't consider parameterized paths here, we also don't + * need to consider the row counts as a measure of quality: every path will + * produce the same number of rows. Neither do we need to consider startup + * costs: parallelism is only used for plans that will be run to completion. + * Therefore, this routine is much simpler than add_path: it needs to + * consider only pathkeys and total cost. + * + * As with add_path, we pfree paths that are found to be dominated by + * another partial path; this requires that there be no other references to + * such paths yet. Hence, GatherPaths must not be created for a rel until + * we're done creating all partial paths for it. Unlike add_path, we don't + * take an exception for IndexPaths as partial index paths won't be + * referenced by partial BitmapHeapPaths. + */ +void +add_partial_path(RelOptInfo *parent_rel, Path *new_path) +{ + bool accept_new = true; /* unless we find a superior old path */ + int insert_at = 0; /* where to insert new item */ + ListCell *p1; + + /* Check for query cancel. */ + CHECK_FOR_INTERRUPTS(); + + /* Path to be added must be parallel safe. */ + Assert(new_path->parallel_safe); + + /* Relation should be OK for parallelism, too. */ + Assert(parent_rel->consider_parallel); + + /* + * As in add_path, throw out any paths which are dominated by the new + * path, but throw out the new path if some existing path dominates it. + */ + foreach(p1, parent_rel->partial_pathlist) + { + Path *old_path = (Path *) lfirst(p1); + bool remove_old = false; /* unless new proves superior */ + PathKeysComparison keyscmp; + + /* Compare pathkeys. */ + keyscmp = compare_pathkeys(new_path->pathkeys, old_path->pathkeys); + + /* Unless pathkeys are incompatible, keep just one of the two paths. */ + if (keyscmp != PATHKEYS_DIFFERENT) + { + if (new_path->total_cost > old_path->total_cost * STD_FUZZ_FACTOR) + { + /* New path costs more; keep it only if pathkeys are better. */ + if (keyscmp != PATHKEYS_BETTER1) + accept_new = false; + } + else if (old_path->total_cost > new_path->total_cost + * STD_FUZZ_FACTOR) + { + /* Old path costs more; keep it only if pathkeys are better. */ + if (keyscmp != PATHKEYS_BETTER2) + remove_old = true; + } + else if (keyscmp == PATHKEYS_BETTER1) + { + /* Costs are about the same, new path has better pathkeys. */ + remove_old = true; + } + else if (keyscmp == PATHKEYS_BETTER2) + { + /* Costs are about the same, old path has better pathkeys. */ + accept_new = false; + } + else if (old_path->total_cost > new_path->total_cost * 1.0000000001) + { + /* Pathkeys are the same, and the old path costs more. */ + remove_old = true; + } + else + { + /* + * Pathkeys are the same, and new path isn't materially + * cheaper. + */ + accept_new = false; + } + } + + /* + * Remove current element from partial_pathlist if dominated by new. + */ + if (remove_old) + { + parent_rel->partial_pathlist = + foreach_delete_current(parent_rel->partial_pathlist, p1); + pfree(old_path); + } + else + { + /* new belongs after this old path if it has cost >= old's */ + if (new_path->total_cost >= old_path->total_cost) + insert_at = foreach_current_index(p1) + 1; + } + + /* + * If we found an old path that dominates new_path, we can quit + * scanning the partial_pathlist; we will not add new_path, and we + * assume new_path cannot dominate any later path. + */ + if (!accept_new) + break; + } + + if (accept_new) + { + /* Accept the new path: insert it at proper place */ + parent_rel->partial_pathlist = + list_insert_nth(parent_rel->partial_pathlist, insert_at, new_path); + } + else + { + /* Reject and recycle the new path */ + pfree(new_path); + } +} + +/* + * add_partial_path_precheck + * Check whether a proposed new partial path could possibly get accepted. + * + * Unlike add_path_precheck, we can ignore startup cost and parameterization, + * since they don't matter for partial paths (see add_partial_path). But + * we do want to make sure we don't add a partial path if there's already + * a complete path that dominates it, since in that case the proposed path + * is surely a loser. + */ +bool +add_partial_path_precheck(RelOptInfo *parent_rel, Cost total_cost, + List *pathkeys) +{ + ListCell *p1; + + /* + * Our goal here is twofold. First, we want to find out whether this path + * is clearly inferior to some existing partial path. If so, we want to + * reject it immediately. Second, we want to find out whether this path + * is clearly superior to some existing partial path -- at least, modulo + * final cost computations. If so, we definitely want to consider it. + * + * Unlike add_path(), we always compare pathkeys here. This is because we + * expect partial_pathlist to be very short, and getting a definitive + * answer at this stage avoids the need to call add_path_precheck. + */ + foreach(p1, parent_rel->partial_pathlist) + { + Path *old_path = (Path *) lfirst(p1); + PathKeysComparison keyscmp; + + keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys); + if (keyscmp != PATHKEYS_DIFFERENT) + { + if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR && + keyscmp != PATHKEYS_BETTER1) + return false; + if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR && + keyscmp != PATHKEYS_BETTER2) + return true; + } + } + + /* + * This path is neither clearly inferior to an existing partial path nor + * clearly good enough that it might replace one. Compare it to + * non-parallel plans. If it loses even before accounting for the cost of + * the Gather node, we should definitely reject it. + * + * Note that we pass the total_cost to add_path_precheck twice. This is + * because it's never advantageous to consider the startup cost of a + * partial path; the resulting plans, if run in parallel, will be run to + * completion. + */ + if (!add_path_precheck(parent_rel, total_cost, total_cost, pathkeys, + NULL)) + return false; + + return true; +} + + +/***************************************************************************** + * PATH NODE CREATION ROUTINES + *****************************************************************************/ + +/* + * create_seqscan_path + * Creates a path corresponding to a sequential scan, returning the + * pathnode. + */ +Path * +create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer, int parallel_workers) +{ + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_SeqScan; + pathnode->parent = rel; + pathnode->pathtarget = rel->reltarget; + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->parallel_aware = (parallel_workers > 0); + pathnode->parallel_safe = rel->consider_parallel; + pathnode->parallel_workers = parallel_workers; + pathnode->pathkeys = NIL; /* seqscan has unordered result */ + + cost_seqscan(pathnode, root, rel, pathnode->param_info); + + return pathnode; +} + +/* + * create_samplescan_path + * Creates a path node for a sampled table scan. + */ +Path * +create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) +{ + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_SampleScan; + pathnode->parent = rel; + pathnode->pathtarget = rel->reltarget; + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->parallel_aware = false; + pathnode->parallel_safe = rel->consider_parallel; + pathnode->parallel_workers = 0; + pathnode->pathkeys = NIL; /* samplescan has unordered result */ + + cost_samplescan(pathnode, root, rel, pathnode->param_info); + + return pathnode; +} + +/* + * create_index_path + * Creates a path node for an index scan. + * + * 'index' is a usable index. + * 'indexclauses' is a list of IndexClause nodes representing clauses + * to be enforced as qual conditions in the scan. + * 'indexorderbys' is a list of bare expressions (no RestrictInfos) + * to be used as index ordering operators in the scan. + * 'indexorderbycols' is an integer list of index column numbers (zero based) + * the ordering operators can be used with. + * 'pathkeys' describes the ordering of the path. + * 'indexscandir' is ForwardScanDirection or BackwardScanDirection + * for an ordered index, or NoMovementScanDirection for + * an unordered index. + * 'indexonly' is true if an index-only scan is wanted. + * 'required_outer' is the set of outer relids for a parameterized path. + * 'loop_count' is the number of repetitions of the indexscan to factor into + * estimates of caching behavior. + * 'partial_path' is true if constructing a parallel index scan path. + * + * Returns the new path node. + */ +IndexPath * +create_index_path(PlannerInfo *root, + IndexOptInfo *index, + List *indexclauses, + List *indexorderbys, + List *indexorderbycols, + List *pathkeys, + ScanDirection indexscandir, + bool indexonly, + Relids required_outer, + double loop_count, + bool partial_path) +{ + IndexPath *pathnode = makeNode(IndexPath); + RelOptInfo *rel = index->rel; + + pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.pathkeys = pathkeys; + + pathnode->indexinfo = index; + pathnode->indexclauses = indexclauses; + pathnode->indexorderbys = indexorderbys; + pathnode->indexorderbycols = indexorderbycols; + pathnode->indexscandir = indexscandir; + + cost_index(pathnode, root, loop_count, partial_path); + + return pathnode; +} + +/* + * create_bitmap_heap_path + * Creates a path node for a bitmap scan. + * + * 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes. + * 'required_outer' is the set of outer relids for a parameterized path. + * 'loop_count' is the number of repetitions of the indexscan to factor into + * estimates of caching behavior. + * + * loop_count should match the value used when creating the component + * IndexPaths. + */ +BitmapHeapPath * +create_bitmap_heap_path(PlannerInfo *root, + RelOptInfo *rel, + Path *bitmapqual, + Relids required_outer, + double loop_count, + int parallel_degree) +{ + BitmapHeapPath *pathnode = makeNode(BitmapHeapPath); + + pathnode->path.pathtype = T_BitmapHeapScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = (parallel_degree > 0); + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = parallel_degree; + pathnode->path.pathkeys = NIL; /* always unordered */ + + pathnode->bitmapqual = bitmapqual; + + cost_bitmap_heap_scan(&pathnode->path, root, rel, + pathnode->path.param_info, + bitmapqual, loop_count); + + return pathnode; +} + +/* + * create_bitmap_and_path + * Creates a path node representing a BitmapAnd. + */ +BitmapAndPath * +create_bitmap_and_path(PlannerInfo *root, + RelOptInfo *rel, + List *bitmapquals) +{ + BitmapAndPath *pathnode = makeNode(BitmapAndPath); + Relids required_outer = NULL; + ListCell *lc; + + pathnode->path.pathtype = T_BitmapAnd; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + + /* + * Identify the required outer rels as the union of what the child paths + * depend on. (Alternatively, we could insist that the caller pass this + * in, but it's more convenient and reliable to compute it here.) + */ + foreach(lc, bitmapquals) + { + Path *bitmapqual = (Path *) lfirst(lc); + + required_outer = bms_add_members(required_outer, + PATH_REQ_OUTER(bitmapqual)); + } + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + + /* + * Currently, a BitmapHeapPath, BitmapAndPath, or BitmapOrPath will be + * parallel-safe if and only if rel->consider_parallel is set. So, we can + * set the flag for this path based only on the relation-level flag, + * without actually iterating over the list of children. + */ + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + + pathnode->path.pathkeys = NIL; /* always unordered */ + + pathnode->bitmapquals = bitmapquals; + + /* this sets bitmapselectivity as well as the regular cost fields: */ + cost_bitmap_and_node(pathnode, root); + + return pathnode; +} + +/* + * create_bitmap_or_path + * Creates a path node representing a BitmapOr. + */ +BitmapOrPath * +create_bitmap_or_path(PlannerInfo *root, + RelOptInfo *rel, + List *bitmapquals) +{ + BitmapOrPath *pathnode = makeNode(BitmapOrPath); + Relids required_outer = NULL; + ListCell *lc; + + pathnode->path.pathtype = T_BitmapOr; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + + /* + * Identify the required outer rels as the union of what the child paths + * depend on. (Alternatively, we could insist that the caller pass this + * in, but it's more convenient and reliable to compute it here.) + */ + foreach(lc, bitmapquals) + { + Path *bitmapqual = (Path *) lfirst(lc); + + required_outer = bms_add_members(required_outer, + PATH_REQ_OUTER(bitmapqual)); + } + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + + /* + * Currently, a BitmapHeapPath, BitmapAndPath, or BitmapOrPath will be + * parallel-safe if and only if rel->consider_parallel is set. So, we can + * set the flag for this path based only on the relation-level flag, + * without actually iterating over the list of children. + */ + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + + pathnode->path.pathkeys = NIL; /* always unordered */ + + pathnode->bitmapquals = bitmapquals; + + /* this sets bitmapselectivity as well as the regular cost fields: */ + cost_bitmap_or_node(pathnode, root); + + return pathnode; +} + +/* + * create_tidscan_path + * Creates a path corresponding to a scan by TID, returning the pathnode. + */ +TidPath * +create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, + Relids required_outer) +{ + TidPath *pathnode = makeNode(TidPath); + + pathnode->path.pathtype = T_TidScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.pathkeys = NIL; /* always unordered */ + + pathnode->tidquals = tidquals; + + cost_tidscan(&pathnode->path, root, rel, tidquals, + pathnode->path.param_info); + + return pathnode; +} + +/* + * create_tidrangescan_path + * Creates a path corresponding to a scan by a range of TIDs, returning + * the pathnode. + */ +TidRangePath * +create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel, + List *tidrangequals, Relids required_outer) +{ + TidRangePath *pathnode = makeNode(TidRangePath); + + pathnode->path.pathtype = T_TidRangeScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.pathkeys = NIL; /* always unordered */ + + pathnode->tidrangequals = tidrangequals; + + cost_tidrangescan(&pathnode->path, root, rel, tidrangequals, + pathnode->path.param_info); + + return pathnode; +} + +/* + * create_append_path + * Creates a path corresponding to an Append plan, returning the + * pathnode. + * + * Note that we must handle subpaths = NIL, representing a dummy access path. + * Also, there are callers that pass root = NULL. + */ +AppendPath * +create_append_path(PlannerInfo *root, + RelOptInfo *rel, + List *subpaths, List *partial_subpaths, + List *pathkeys, Relids required_outer, + int parallel_workers, bool parallel_aware, + double rows) +{ + AppendPath *pathnode = makeNode(AppendPath); + ListCell *l; + + Assert(!parallel_aware || parallel_workers > 0); + + pathnode->path.pathtype = T_Append; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + + /* + * When generating an Append path for a partitioned table, there may be + * parameterized quals that are useful for run-time pruning. Hence, + * compute path.param_info the same way as for any other baserel, so that + * such quals will be available for make_partition_pruneinfo(). (This + * would not work right for a non-baserel, ie a scan on a non-leaf child + * partition, and it's not necessary anyway in that case. Must skip it if + * we don't have "root", too.) + */ + if (root && rel->reloptkind == RELOPT_BASEREL && IS_PARTITIONED_REL(rel)) + pathnode->path.param_info = get_baserel_parampathinfo(root, + rel, + required_outer); + else + pathnode->path.param_info = get_appendrel_parampathinfo(rel, + required_outer); + + pathnode->path.parallel_aware = parallel_aware; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = parallel_workers; + pathnode->path.pathkeys = pathkeys; + + /* + * For parallel append, non-partial paths are sorted by descending total + * costs. That way, the total time to finish all non-partial paths is + * minimized. Also, the partial paths are sorted by descending startup + * costs. There may be some paths that require to do startup work by a + * single worker. In such case, it's better for workers to choose the + * expensive ones first, whereas the leader should choose the cheapest + * startup plan. + */ + if (pathnode->path.parallel_aware) + { + /* + * We mustn't fiddle with the order of subpaths when the Append has + * pathkeys. The order they're listed in is critical to keeping the + * pathkeys valid. + */ + Assert(pathkeys == NIL); + + list_sort(subpaths, append_total_cost_compare); + list_sort(partial_subpaths, append_startup_cost_compare); + } + pathnode->first_partial_path = list_length(subpaths); + pathnode->subpaths = list_concat(subpaths, partial_subpaths); + + /* + * Apply query-wide LIMIT if known and path is for sole base relation. + * (Handling this at this low level is a bit klugy.) + */ + if (root != NULL && bms_equal(rel->relids, root->all_baserels)) + pathnode->limit_tuples = root->limit_tuples; + else + pathnode->limit_tuples = -1.0; + + foreach(l, pathnode->subpaths) + { + Path *subpath = (Path *) lfirst(l); + + pathnode->path.parallel_safe = pathnode->path.parallel_safe && + subpath->parallel_safe; + + /* All child paths must have same parameterization */ + Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer)); + } + + Assert(!parallel_aware || pathnode->path.parallel_safe); + + /* + * If there's exactly one child path, the Append is a no-op and will be + * discarded later (in setrefs.c); therefore, we can inherit the child's + * size and cost, as well as its pathkeys if any (overriding whatever the + * caller might've said). Otherwise, we must do the normal costsize + * calculation. + */ + if (list_length(pathnode->subpaths) == 1) + { + Path *child = (Path *) linitial(pathnode->subpaths); + + pathnode->path.rows = child->rows; + pathnode->path.startup_cost = child->startup_cost; + pathnode->path.total_cost = child->total_cost; + pathnode->path.pathkeys = child->pathkeys; + } + else + cost_append(pathnode); + + /* If the caller provided a row estimate, override the computed value. */ + if (rows >= 0) + pathnode->path.rows = rows; + + return pathnode; +} + +/* + * append_total_cost_compare + * list_sort comparator for sorting append child paths + * by total_cost descending + * + * For equal total costs, we fall back to comparing startup costs; if those + * are equal too, break ties using bms_compare on the paths' relids. + * (This is to avoid getting unpredictable results from list_sort.) + */ +static int +append_total_cost_compare(const ListCell *a, const ListCell *b) +{ + Path *path1 = (Path *) lfirst(a); + Path *path2 = (Path *) lfirst(b); + int cmp; + + cmp = compare_path_costs(path1, path2, TOTAL_COST); + if (cmp != 0) + return -cmp; + return bms_compare(path1->parent->relids, path2->parent->relids); +} + +/* + * append_startup_cost_compare + * list_sort comparator for sorting append child paths + * by startup_cost descending + * + * For equal startup costs, we fall back to comparing total costs; if those + * are equal too, break ties using bms_compare on the paths' relids. + * (This is to avoid getting unpredictable results from list_sort.) + */ +static int +append_startup_cost_compare(const ListCell *a, const ListCell *b) +{ + Path *path1 = (Path *) lfirst(a); + Path *path2 = (Path *) lfirst(b); + int cmp; + + cmp = compare_path_costs(path1, path2, STARTUP_COST); + if (cmp != 0) + return -cmp; + return bms_compare(path1->parent->relids, path2->parent->relids); +} + +/* + * create_merge_append_path + * Creates a path corresponding to a MergeAppend plan, returning the + * pathnode. + */ +MergeAppendPath * +create_merge_append_path(PlannerInfo *root, + RelOptInfo *rel, + List *subpaths, + List *pathkeys, + Relids required_outer) +{ + MergeAppendPath *pathnode = makeNode(MergeAppendPath); + Cost input_startup_cost; + Cost input_total_cost; + ListCell *l; + + pathnode->path.pathtype = T_MergeAppend; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = get_appendrel_parampathinfo(rel, + required_outer); + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.pathkeys = pathkeys; + pathnode->subpaths = subpaths; + + /* + * Apply query-wide LIMIT if known and path is for sole base relation. + * (Handling this at this low level is a bit klugy.) + */ + if (bms_equal(rel->relids, root->all_baserels)) + pathnode->limit_tuples = root->limit_tuples; + else + pathnode->limit_tuples = -1.0; + + /* + * Add up the sizes and costs of the input paths. + */ + pathnode->path.rows = 0; + input_startup_cost = 0; + input_total_cost = 0; + foreach(l, subpaths) + { + Path *subpath = (Path *) lfirst(l); + + pathnode->path.rows += subpath->rows; + pathnode->path.parallel_safe = pathnode->path.parallel_safe && + subpath->parallel_safe; + + if (pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + /* Subpath is adequately ordered, we won't need to sort it */ + input_startup_cost += subpath->startup_cost; + input_total_cost += subpath->total_cost; + } + else + { + /* We'll need to insert a Sort node, so include cost for that */ + Path sort_path; /* dummy for result of cost_sort */ + + cost_sort(&sort_path, + root, + pathkeys, + subpath->total_cost, + subpath->parent->tuples, + subpath->pathtarget->width, + 0.0, + work_mem, + pathnode->limit_tuples); + input_startup_cost += sort_path.startup_cost; + input_total_cost += sort_path.total_cost; + } + + /* All child paths must have same parameterization */ + Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer)); + } + + /* + * Now we can compute total costs of the MergeAppend. If there's exactly + * one child path, the MergeAppend is a no-op and will be discarded later + * (in setrefs.c); otherwise we do the normal cost calculation. + */ + if (list_length(subpaths) == 1) + { + pathnode->path.startup_cost = input_startup_cost; + pathnode->path.total_cost = input_total_cost; + } + else + cost_merge_append(&pathnode->path, root, + pathkeys, list_length(subpaths), + input_startup_cost, input_total_cost, + pathnode->path.rows); + + return pathnode; +} + +/* + * create_group_result_path + * Creates a path representing a Result-and-nothing-else plan. + * + * This is only used for degenerate grouping cases, in which we know we + * need to produce one result row, possibly filtered by a HAVING qual. + */ +GroupResultPath * +create_group_result_path(PlannerInfo *root, RelOptInfo *rel, + PathTarget *target, List *havingqual) +{ + GroupResultPath *pathnode = makeNode(GroupResultPath); + + pathnode->path.pathtype = T_Result; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + pathnode->path.param_info = NULL; /* there are no other rels... */ + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.pathkeys = NIL; + pathnode->quals = havingqual; + + /* + * We can't quite use cost_resultscan() because the quals we want to + * account for are not baserestrict quals of the rel. Might as well just + * hack it here. + */ + pathnode->path.rows = 1; + pathnode->path.startup_cost = target->cost.startup; + pathnode->path.total_cost = target->cost.startup + + cpu_tuple_cost + target->cost.per_tuple; + + /* + * Add cost of qual, if any --- but we ignore its selectivity, since our + * rowcount estimate should be 1 no matter what the qual is. + */ + if (havingqual) + { + QualCost qual_cost; + + cost_qual_eval(&qual_cost, havingqual, root); + /* havingqual is evaluated once at startup */ + pathnode->path.startup_cost += qual_cost.startup + qual_cost.per_tuple; + pathnode->path.total_cost += qual_cost.startup + qual_cost.per_tuple; + } + + return pathnode; +} + +/* + * create_material_path + * Creates a path corresponding to a Material plan, returning the + * pathnode. + */ +MaterialPath * +create_material_path(RelOptInfo *rel, Path *subpath) +{ + MaterialPath *pathnode = makeNode(MaterialPath); + + Assert(subpath->parent == rel); + + pathnode->path.pathtype = T_Material; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = subpath->param_info; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + + cost_material(&pathnode->path, + subpath->startup_cost, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width); + + return pathnode; +} + +/* + * create_memoize_path + * Creates a path corresponding to a Memoize plan, returning the pathnode. + */ +MemoizePath * +create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, + List *param_exprs, List *hash_operators, + bool singlerow, bool binary_mode, double calls) +{ + MemoizePath *pathnode = makeNode(MemoizePath); + + Assert(subpath->parent == rel); + + pathnode->path.pathtype = T_Memoize; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = subpath->param_info; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + pathnode->hash_operators = hash_operators; + pathnode->param_exprs = param_exprs; + pathnode->singlerow = singlerow; + pathnode->binary_mode = binary_mode; + pathnode->calls = calls; + + /* + * For now we set est_entries to 0. cost_memoize_rescan() does all the + * hard work to determine how many cache entries there are likely to be, + * so it seems best to leave it up to that function to fill this field in. + * If left at 0, the executor will make a guess at a good value. + */ + pathnode->est_entries = 0; + + /* + * Add a small additional charge for caching the first entry. All the + * harder calculations for rescans are performed in cost_memoize_rescan(). + */ + pathnode->path.startup_cost = subpath->startup_cost + cpu_tuple_cost; + pathnode->path.total_cost = subpath->total_cost + cpu_tuple_cost; + pathnode->path.rows = subpath->rows; + + return pathnode; +} + +/* + * create_unique_path + * Creates a path representing elimination of distinct rows from the + * input data. Distinct-ness is defined according to the needs of the + * semijoin represented by sjinfo. If it is not possible to identify + * how to make the data unique, NULL is returned. + * + * If used at all, this is likely to be called repeatedly on the same rel; + * and the input subpath should always be the same (the cheapest_total path + * for the rel). So we cache the result. + */ +UniquePath * +create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, + SpecialJoinInfo *sjinfo) +{ + UniquePath *pathnode; + Path sort_path; /* dummy for result of cost_sort */ + Path agg_path; /* dummy for result of cost_agg */ + MemoryContext oldcontext; + int numCols; + + /* Caller made a mistake if subpath isn't cheapest_total ... */ + Assert(subpath == rel->cheapest_total_path); + Assert(subpath->parent == rel); + /* ... or if SpecialJoinInfo is the wrong one */ + Assert(sjinfo->jointype == JOIN_SEMI); + Assert(bms_equal(rel->relids, sjinfo->syn_righthand)); + + /* If result already cached, return it */ + if (rel->cheapest_unique_path) + return (UniquePath *) rel->cheapest_unique_path; + + /* If it's not possible to unique-ify, return NULL */ + if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash)) + return NULL; + + /* + * When called during GEQO join planning, we are in a short-lived memory + * context. We must make sure that the path and any subsidiary data + * structures created for a baserel survive the GEQO cycle, else the + * baserel is trashed for future GEQO cycles. On the other hand, when we + * are creating those for a joinrel during GEQO, we don't want them to + * clutter the main planning context. Upshot is that the best solution is + * to explicitly allocate memory in the same context the given RelOptInfo + * is in. + */ + oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); + + pathnode = makeNode(UniquePath); + + pathnode->path.pathtype = T_Unique; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = subpath->param_info; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + + /* + * Assume the output is unsorted, since we don't necessarily have pathkeys + * to represent it. (This might get overridden below.) + */ + pathnode->path.pathkeys = NIL; + + pathnode->subpath = subpath; + pathnode->in_operators = sjinfo->semi_operators; + pathnode->uniq_exprs = sjinfo->semi_rhs_exprs; + + /* + * If the input is a relation and it has a unique index that proves the + * semi_rhs_exprs are unique, then we don't need to do anything. Note + * that relation_has_unique_index_for automatically considers restriction + * clauses for the rel, as well. + */ + if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree && + relation_has_unique_index_for(root, rel, NIL, + sjinfo->semi_rhs_exprs, + sjinfo->semi_operators)) + { + pathnode->umethod = UNIQUE_PATH_NOOP; + pathnode->path.rows = rel->rows; + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->total_cost; + pathnode->path.pathkeys = subpath->pathkeys; + + rel->cheapest_unique_path = (Path *) pathnode; + + MemoryContextSwitchTo(oldcontext); + + return pathnode; + } + + /* + * If the input is a subquery whose output must be unique already, then we + * don't need to do anything. The test for uniqueness has to consider + * exactly which columns we are extracting; for example "SELECT DISTINCT + * x,y" doesn't guarantee that x alone is distinct. So we cannot check for + * this optimization unless semi_rhs_exprs consists only of simple Vars + * referencing subquery outputs. (Possibly we could do something with + * expressions in the subquery outputs, too, but for now keep it simple.) + */ + if (rel->rtekind == RTE_SUBQUERY) + { + RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); + + if (query_supports_distinctness(rte->subquery)) + { + List *sub_tlist_colnos; + + sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs, + rel->relid); + + if (sub_tlist_colnos && + query_is_distinct_for(rte->subquery, + sub_tlist_colnos, + sjinfo->semi_operators)) + { + pathnode->umethod = UNIQUE_PATH_NOOP; + pathnode->path.rows = rel->rows; + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->total_cost; + pathnode->path.pathkeys = subpath->pathkeys; + + rel->cheapest_unique_path = (Path *) pathnode; + + MemoryContextSwitchTo(oldcontext); + + return pathnode; + } + } + } + + /* Estimate number of output rows */ + pathnode->path.rows = estimate_num_groups(root, + sjinfo->semi_rhs_exprs, + rel->rows, + NULL, + NULL); + numCols = list_length(sjinfo->semi_rhs_exprs); + + if (sjinfo->semi_can_btree) + { + /* + * Estimate cost for sort+unique implementation + */ + cost_sort(&sort_path, root, NIL, + subpath->total_cost, + rel->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + -1.0); + + /* + * Charge one cpu_operator_cost per comparison per input tuple. We + * assume all columns get compared at most of the tuples. (XXX + * probably this is an overestimate.) This should agree with + * create_upper_unique_path. + */ + sort_path.total_cost += cpu_operator_cost * rel->rows * numCols; + } + + if (sjinfo->semi_can_hash) + { + /* + * Estimate the overhead per hashtable entry at 64 bytes (same as in + * planner.c). + */ + int hashentrysize = subpath->pathtarget->width + 64; + + if (hashentrysize * pathnode->path.rows > get_hash_memory_limit()) + { + /* + * We should not try to hash. Hack the SpecialJoinInfo to + * remember this, in case we come through here again. + */ + sjinfo->semi_can_hash = false; + } + else + cost_agg(&agg_path, root, + AGG_HASHED, NULL, + numCols, pathnode->path.rows, + NIL, + subpath->startup_cost, + subpath->total_cost, + rel->rows, + subpath->pathtarget->width); + } + + if (sjinfo->semi_can_btree && sjinfo->semi_can_hash) + { + if (agg_path.total_cost < sort_path.total_cost) + pathnode->umethod = UNIQUE_PATH_HASH; + else + pathnode->umethod = UNIQUE_PATH_SORT; + } + else if (sjinfo->semi_can_btree) + pathnode->umethod = UNIQUE_PATH_SORT; + else if (sjinfo->semi_can_hash) + pathnode->umethod = UNIQUE_PATH_HASH; + else + { + /* we can get here only if we abandoned hashing above */ + MemoryContextSwitchTo(oldcontext); + return NULL; + } + + if (pathnode->umethod == UNIQUE_PATH_HASH) + { + pathnode->path.startup_cost = agg_path.startup_cost; + pathnode->path.total_cost = agg_path.total_cost; + } + else + { + pathnode->path.startup_cost = sort_path.startup_cost; + pathnode->path.total_cost = sort_path.total_cost; + } + + rel->cheapest_unique_path = (Path *) pathnode; + + MemoryContextSwitchTo(oldcontext); + + return pathnode; +} + +/* + * create_gather_merge_path + * + * Creates a path corresponding to a gather merge scan, returning + * the pathnode. + */ +GatherMergePath * +create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, + PathTarget *target, List *pathkeys, + Relids required_outer, double *rows) +{ + GatherMergePath *pathnode = makeNode(GatherMergePath); + Cost input_startup_cost = 0; + Cost input_total_cost = 0; + + Assert(subpath->parallel_safe); + Assert(pathkeys); + + pathnode->path.pathtype = T_GatherMerge; + pathnode->path.parent = rel; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = false; + + pathnode->subpath = subpath; + pathnode->num_workers = subpath->parallel_workers; + pathnode->path.pathkeys = pathkeys; + pathnode->path.pathtarget = target ? target : rel->reltarget; + pathnode->path.rows += subpath->rows; + + if (pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + /* Subpath is adequately ordered, we won't need to sort it */ + input_startup_cost += subpath->startup_cost; + input_total_cost += subpath->total_cost; + } + else + { + /* We'll need to insert a Sort node, so include cost for that */ + Path sort_path; /* dummy for result of cost_sort */ + + cost_sort(&sort_path, + root, + pathkeys, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + -1); + input_startup_cost += sort_path.startup_cost; + input_total_cost += sort_path.total_cost; + } + + cost_gather_merge(pathnode, root, rel, pathnode->path.param_info, + input_startup_cost, input_total_cost, rows); + + return pathnode; +} + +/* + * translate_sub_tlist - get subquery column numbers represented by tlist + * + * The given targetlist usually contains only Vars referencing the given relid. + * Extract their varattnos (ie, the column numbers of the subquery) and return + * as an integer List. + * + * If any of the tlist items is not a simple Var, we cannot determine whether + * the subquery's uniqueness condition (if any) matches ours, so punt and + * return NIL. + */ +static List * +translate_sub_tlist(List *tlist, int relid) +{ + List *result = NIL; + ListCell *l; + + foreach(l, tlist) + { + Var *var = (Var *) lfirst(l); + + if (!var || !IsA(var, Var) || + var->varno != relid) + return NIL; /* punt */ + + result = lappend_int(result, var->varattno); + } + return result; +} + +/* + * create_gather_path + * Creates a path corresponding to a gather scan, returning the + * pathnode. + * + * 'rows' may optionally be set to override row estimates from other sources. + */ +GatherPath * +create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, + PathTarget *target, Relids required_outer, double *rows) +{ + GatherPath *pathnode = makeNode(GatherPath); + + Assert(subpath->parallel_safe); + + pathnode->path.pathtype = T_Gather; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = false; + pathnode->path.parallel_workers = 0; + pathnode->path.pathkeys = NIL; /* Gather has unordered result */ + + pathnode->subpath = subpath; + pathnode->num_workers = subpath->parallel_workers; + pathnode->single_copy = false; + + if (pathnode->num_workers == 0) + { + pathnode->path.pathkeys = subpath->pathkeys; + pathnode->num_workers = 1; + pathnode->single_copy = true; + } + + cost_gather(pathnode, root, rel, pathnode->path.param_info, rows); + + return pathnode; +} + +/* + * create_subqueryscan_path + * Creates a path corresponding to a scan of a subquery, + * returning the pathnode. + */ +SubqueryScanPath * +create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, + List *pathkeys, Relids required_outer) +{ + SubqueryScanPath *pathnode = makeNode(SubqueryScanPath); + + pathnode->path.pathtype = T_SubqueryScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + pathnode->path.pathkeys = pathkeys; + pathnode->subpath = subpath; + + cost_subqueryscan(pathnode, root, rel, pathnode->path.param_info); + + return pathnode; +} + +/* + * create_functionscan_path + * Creates a path corresponding to a sequential scan of a function, + * returning the pathnode. + */ +Path * +create_functionscan_path(PlannerInfo *root, RelOptInfo *rel, + List *pathkeys, Relids required_outer) +{ + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_FunctionScan; + pathnode->parent = rel; + pathnode->pathtarget = rel->reltarget; + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->parallel_aware = false; + pathnode->parallel_safe = rel->consider_parallel; + pathnode->parallel_workers = 0; + pathnode->pathkeys = pathkeys; + + cost_functionscan(pathnode, root, rel, pathnode->param_info); + + return pathnode; +} + +/* + * create_tablefuncscan_path + * Creates a path corresponding to a sequential scan of a table function, + * returning the pathnode. + */ +Path * +create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) +{ + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_TableFuncScan; + pathnode->parent = rel; + pathnode->pathtarget = rel->reltarget; + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->parallel_aware = false; + pathnode->parallel_safe = rel->consider_parallel; + pathnode->parallel_workers = 0; + pathnode->pathkeys = NIL; /* result is always unordered */ + + cost_tablefuncscan(pathnode, root, rel, pathnode->param_info); + + return pathnode; +} + +/* + * create_valuesscan_path + * Creates a path corresponding to a scan of a VALUES list, + * returning the pathnode. + */ +Path * +create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) +{ + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_ValuesScan; + pathnode->parent = rel; + pathnode->pathtarget = rel->reltarget; + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->parallel_aware = false; + pathnode->parallel_safe = rel->consider_parallel; + pathnode->parallel_workers = 0; + pathnode->pathkeys = NIL; /* result is always unordered */ + + cost_valuesscan(pathnode, root, rel, pathnode->param_info); + + return pathnode; +} + +/* + * create_ctescan_path + * Creates a path corresponding to a scan of a non-self-reference CTE, + * returning the pathnode. + */ +Path * +create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) +{ + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_CteScan; + pathnode->parent = rel; + pathnode->pathtarget = rel->reltarget; + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->parallel_aware = false; + pathnode->parallel_safe = rel->consider_parallel; + pathnode->parallel_workers = 0; + pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */ + + cost_ctescan(pathnode, root, rel, pathnode->param_info); + + return pathnode; +} + +/* + * create_namedtuplestorescan_path + * Creates a path corresponding to a scan of a named tuplestore, returning + * the pathnode. + */ +Path * +create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) +{ + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_NamedTuplestoreScan; + pathnode->parent = rel; + pathnode->pathtarget = rel->reltarget; + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->parallel_aware = false; + pathnode->parallel_safe = rel->consider_parallel; + pathnode->parallel_workers = 0; + pathnode->pathkeys = NIL; /* result is always unordered */ + + cost_namedtuplestorescan(pathnode, root, rel, pathnode->param_info); + + return pathnode; +} + +/* + * create_resultscan_path + * Creates a path corresponding to a scan of an RTE_RESULT relation, + * returning the pathnode. + */ +Path * +create_resultscan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) +{ + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_Result; + pathnode->parent = rel; + pathnode->pathtarget = rel->reltarget; + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->parallel_aware = false; + pathnode->parallel_safe = rel->consider_parallel; + pathnode->parallel_workers = 0; + pathnode->pathkeys = NIL; /* result is always unordered */ + + cost_resultscan(pathnode, root, rel, pathnode->param_info); + + return pathnode; +} + +/* + * create_worktablescan_path + * Creates a path corresponding to a scan of a self-reference CTE, + * returning the pathnode. + */ +Path * +create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) +{ + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_WorkTableScan; + pathnode->parent = rel; + pathnode->pathtarget = rel->reltarget; + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->parallel_aware = false; + pathnode->parallel_safe = rel->consider_parallel; + pathnode->parallel_workers = 0; + pathnode->pathkeys = NIL; /* result is always unordered */ + + /* Cost is the same as for a regular CTE scan */ + cost_ctescan(pathnode, root, rel, pathnode->param_info); + + return pathnode; +} + +/* + * create_foreignscan_path + * Creates a path corresponding to a scan of a foreign base table, + * returning the pathnode. + * + * This function is never called from core Postgres; rather, it's expected + * to be called by the GetForeignPaths function of a foreign data wrapper. + * We make the FDW supply all fields of the path, since we do not have any way + * to calculate them in core. However, there is a usually-sane default for + * the pathtarget (rel->reltarget), so we let a NULL for "target" select that. + */ +ForeignPath * +create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel, + PathTarget *target, + double rows, Cost startup_cost, Cost total_cost, + List *pathkeys, + Relids required_outer, + Path *fdw_outerpath, + List *fdw_private) +{ + ForeignPath *pathnode = makeNode(ForeignPath); + + /* Historically some FDWs were confused about when to use this */ + Assert(IS_SIMPLE_REL(rel)); + + pathnode->path.pathtype = T_ForeignScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target ? target : rel->reltarget; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.rows = rows; + pathnode->path.startup_cost = startup_cost; + pathnode->path.total_cost = total_cost; + pathnode->path.pathkeys = pathkeys; + + pathnode->fdw_outerpath = fdw_outerpath; + pathnode->fdw_private = fdw_private; + + return pathnode; +} + +/* + * create_foreign_join_path + * Creates a path corresponding to a scan of a foreign join, + * returning the pathnode. + * + * This function is never called from core Postgres; rather, it's expected + * to be called by the GetForeignJoinPaths function of a foreign data wrapper. + * We make the FDW supply all fields of the path, since we do not have any way + * to calculate them in core. However, there is a usually-sane default for + * the pathtarget (rel->reltarget), so we let a NULL for "target" select that. + */ +ForeignPath * +create_foreign_join_path(PlannerInfo *root, RelOptInfo *rel, + PathTarget *target, + double rows, Cost startup_cost, Cost total_cost, + List *pathkeys, + Relids required_outer, + Path *fdw_outerpath, + List *fdw_private) +{ + ForeignPath *pathnode = makeNode(ForeignPath); + + /* + * We should use get_joinrel_parampathinfo to handle parameterized paths, + * but the API of this function doesn't support it, and existing + * extensions aren't yet trying to build such paths anyway. For the + * moment just throw an error if someone tries it; eventually we should + * revisit this. + */ + if (!bms_is_empty(required_outer) || !bms_is_empty(rel->lateral_relids)) + elog(ERROR, "parameterized foreign joins are not supported yet"); + + pathnode->path.pathtype = T_ForeignScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target ? target : rel->reltarget; + pathnode->path.param_info = NULL; /* XXX see above */ + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.rows = rows; + pathnode->path.startup_cost = startup_cost; + pathnode->path.total_cost = total_cost; + pathnode->path.pathkeys = pathkeys; + + pathnode->fdw_outerpath = fdw_outerpath; + pathnode->fdw_private = fdw_private; + + return pathnode; +} + +/* + * create_foreign_upper_path + * Creates a path corresponding to an upper relation that's computed + * directly by an FDW, returning the pathnode. + * + * This function is never called from core Postgres; rather, it's expected to + * be called by the GetForeignUpperPaths function of a foreign data wrapper. + * We make the FDW supply all fields of the path, since we do not have any way + * to calculate them in core. However, there is a usually-sane default for + * the pathtarget (rel->reltarget), so we let a NULL for "target" select that. + */ +ForeignPath * +create_foreign_upper_path(PlannerInfo *root, RelOptInfo *rel, + PathTarget *target, + double rows, Cost startup_cost, Cost total_cost, + List *pathkeys, + Path *fdw_outerpath, + List *fdw_private) +{ + ForeignPath *pathnode = makeNode(ForeignPath); + + /* + * Upper relations should never have any lateral references, since joining + * is complete. + */ + Assert(bms_is_empty(rel->lateral_relids)); + + pathnode->path.pathtype = T_ForeignScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target ? target : rel->reltarget; + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.rows = rows; + pathnode->path.startup_cost = startup_cost; + pathnode->path.total_cost = total_cost; + pathnode->path.pathkeys = pathkeys; + + pathnode->fdw_outerpath = fdw_outerpath; + pathnode->fdw_private = fdw_private; + + return pathnode; +} + +/* + * calc_nestloop_required_outer + * Compute the required_outer set for a nestloop join path + * + * Note: result must not share storage with either input + */ +Relids +calc_nestloop_required_outer(Relids outerrelids, + Relids outer_paramrels, + Relids innerrelids, + Relids inner_paramrels) +{ + Relids required_outer; + + /* inner_path can require rels from outer path, but not vice versa */ + Assert(!bms_overlap(outer_paramrels, innerrelids)); + /* easy case if inner path is not parameterized */ + if (!inner_paramrels) + return bms_copy(outer_paramrels); + /* else, form the union ... */ + required_outer = bms_union(outer_paramrels, inner_paramrels); + /* ... and remove any mention of now-satisfied outer rels */ + required_outer = bms_del_members(required_outer, + outerrelids); + /* maintain invariant that required_outer is exactly NULL if empty */ + if (bms_is_empty(required_outer)) + { + bms_free(required_outer); + required_outer = NULL; + } + return required_outer; +} + +/* + * calc_non_nestloop_required_outer + * Compute the required_outer set for a merge or hash join path + * + * Note: result must not share storage with either input + */ +Relids +calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path) +{ + Relids outer_paramrels = PATH_REQ_OUTER(outer_path); + Relids inner_paramrels = PATH_REQ_OUTER(inner_path); + Relids required_outer; + + /* neither path can require rels from the other */ + Assert(!bms_overlap(outer_paramrels, inner_path->parent->relids)); + Assert(!bms_overlap(inner_paramrels, outer_path->parent->relids)); + /* form the union ... */ + required_outer = bms_union(outer_paramrels, inner_paramrels); + /* we do not need an explicit test for empty; bms_union gets it right */ + return required_outer; +} + +/* + * create_nestloop_path + * Creates a pathnode corresponding to a nestloop join between two + * relations. + * + * 'joinrel' is the join relation. + * 'jointype' is the type of join required + * 'workspace' is the result from initial_cost_nestloop + * 'extra' contains various information about the join + * 'outer_path' is the outer path + * 'inner_path' is the inner path + * 'restrict_clauses' are the RestrictInfo nodes to apply at the join + * 'pathkeys' are the path keys of the new join path + * 'required_outer' is the set of required outer rels + * + * Returns the resulting path node. + */ +NestPath * +create_nestloop_path(PlannerInfo *root, + RelOptInfo *joinrel, + JoinType jointype, + JoinCostWorkspace *workspace, + JoinPathExtraData *extra, + Path *outer_path, + Path *inner_path, + List *restrict_clauses, + List *pathkeys, + Relids required_outer) +{ + NestPath *pathnode = makeNode(NestPath); + Relids inner_req_outer = PATH_REQ_OUTER(inner_path); + + /* + * If the inner path is parameterized by the outer, we must drop any + * restrict_clauses that are due to be moved into the inner path. We have + * to do this now, rather than postpone the work till createplan time, + * because the restrict_clauses list can affect the size and cost + * estimates for this path. + */ + if (bms_overlap(inner_req_outer, outer_path->parent->relids)) + { + Relids inner_and_outer = bms_union(inner_path->parent->relids, + inner_req_outer); + List *jclauses = NIL; + ListCell *lc; + + foreach(lc, restrict_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (!join_clause_is_movable_into(rinfo, + inner_path->parent->relids, + inner_and_outer)) + jclauses = lappend(jclauses, rinfo); + } + restrict_clauses = jclauses; + } + + pathnode->jpath.path.pathtype = T_NestLoop; + pathnode->jpath.path.parent = joinrel; + pathnode->jpath.path.pathtarget = joinrel->reltarget; + pathnode->jpath.path.param_info = + get_joinrel_parampathinfo(root, + joinrel, + outer_path, + inner_path, + extra->sjinfo, + required_outer, + &restrict_clauses); + pathnode->jpath.path.parallel_aware = false; + pathnode->jpath.path.parallel_safe = joinrel->consider_parallel && + outer_path->parallel_safe && inner_path->parallel_safe; + /* This is a foolish way to estimate parallel_workers, but for now... */ + pathnode->jpath.path.parallel_workers = outer_path->parallel_workers; + pathnode->jpath.path.pathkeys = pathkeys; + pathnode->jpath.jointype = jointype; + pathnode->jpath.inner_unique = extra->inner_unique; + pathnode->jpath.outerjoinpath = outer_path; + pathnode->jpath.innerjoinpath = inner_path; + pathnode->jpath.joinrestrictinfo = restrict_clauses; + + final_cost_nestloop(root, pathnode, workspace, extra); + + return pathnode; +} + +/* + * create_mergejoin_path + * Creates a pathnode corresponding to a mergejoin join between + * two relations + * + * 'joinrel' is the join relation + * 'jointype' is the type of join required + * 'workspace' is the result from initial_cost_mergejoin + * 'extra' contains various information about the join + * 'outer_path' is the outer path + * 'inner_path' is the inner path + * 'restrict_clauses' are the RestrictInfo nodes to apply at the join + * 'pathkeys' are the path keys of the new join path + * 'required_outer' is the set of required outer rels + * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses + * (this should be a subset of the restrict_clauses list) + * 'outersortkeys' are the sort varkeys for the outer relation + * 'innersortkeys' are the sort varkeys for the inner relation + */ +MergePath * +create_mergejoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + JoinType jointype, + JoinCostWorkspace *workspace, + JoinPathExtraData *extra, + Path *outer_path, + Path *inner_path, + List *restrict_clauses, + List *pathkeys, + Relids required_outer, + List *mergeclauses, + List *outersortkeys, + List *innersortkeys) +{ + MergePath *pathnode = makeNode(MergePath); + + pathnode->jpath.path.pathtype = T_MergeJoin; + pathnode->jpath.path.parent = joinrel; + pathnode->jpath.path.pathtarget = joinrel->reltarget; + pathnode->jpath.path.param_info = + get_joinrel_parampathinfo(root, + joinrel, + outer_path, + inner_path, + extra->sjinfo, + required_outer, + &restrict_clauses); + pathnode->jpath.path.parallel_aware = false; + pathnode->jpath.path.parallel_safe = joinrel->consider_parallel && + outer_path->parallel_safe && inner_path->parallel_safe; + /* This is a foolish way to estimate parallel_workers, but for now... */ + pathnode->jpath.path.parallel_workers = outer_path->parallel_workers; + pathnode->jpath.path.pathkeys = pathkeys; + pathnode->jpath.jointype = jointype; + pathnode->jpath.inner_unique = extra->inner_unique; + pathnode->jpath.outerjoinpath = outer_path; + pathnode->jpath.innerjoinpath = inner_path; + pathnode->jpath.joinrestrictinfo = restrict_clauses; + pathnode->path_mergeclauses = mergeclauses; + pathnode->outersortkeys = outersortkeys; + pathnode->innersortkeys = innersortkeys; + /* pathnode->skip_mark_restore will be set by final_cost_mergejoin */ + /* pathnode->materialize_inner will be set by final_cost_mergejoin */ + + final_cost_mergejoin(root, pathnode, workspace, extra); + + return pathnode; +} + +/* + * create_hashjoin_path + * Creates a pathnode corresponding to a hash join between two relations. + * + * 'joinrel' is the join relation + * 'jointype' is the type of join required + * 'workspace' is the result from initial_cost_hashjoin + * 'extra' contains various information about the join + * 'outer_path' is the cheapest outer path + * 'inner_path' is the cheapest inner path + * 'parallel_hash' to select Parallel Hash of inner path (shared hash table) + * 'restrict_clauses' are the RestrictInfo nodes to apply at the join + * 'required_outer' is the set of required outer rels + * 'hashclauses' are the RestrictInfo nodes to use as hash clauses + * (this should be a subset of the restrict_clauses list) + */ +HashPath * +create_hashjoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + JoinType jointype, + JoinCostWorkspace *workspace, + JoinPathExtraData *extra, + Path *outer_path, + Path *inner_path, + bool parallel_hash, + List *restrict_clauses, + Relids required_outer, + List *hashclauses) +{ + HashPath *pathnode = makeNode(HashPath); + + pathnode->jpath.path.pathtype = T_HashJoin; + pathnode->jpath.path.parent = joinrel; + pathnode->jpath.path.pathtarget = joinrel->reltarget; + pathnode->jpath.path.param_info = + get_joinrel_parampathinfo(root, + joinrel, + outer_path, + inner_path, + extra->sjinfo, + required_outer, + &restrict_clauses); + pathnode->jpath.path.parallel_aware = + joinrel->consider_parallel && parallel_hash; + pathnode->jpath.path.parallel_safe = joinrel->consider_parallel && + outer_path->parallel_safe && inner_path->parallel_safe; + /* This is a foolish way to estimate parallel_workers, but for now... */ + pathnode->jpath.path.parallel_workers = outer_path->parallel_workers; + + /* + * A hashjoin never has pathkeys, since its output ordering is + * unpredictable due to possible batching. XXX If the inner relation is + * small enough, we could instruct the executor that it must not batch, + * and then we could assume that the output inherits the outer relation's + * ordering, which might save a sort step. However there is considerable + * downside if our estimate of the inner relation size is badly off. For + * the moment we don't risk it. (Note also that if we wanted to take this + * seriously, joinpath.c would have to consider many more paths for the + * outer rel than it does now.) + */ + pathnode->jpath.path.pathkeys = NIL; + pathnode->jpath.jointype = jointype; + pathnode->jpath.inner_unique = extra->inner_unique; + pathnode->jpath.outerjoinpath = outer_path; + pathnode->jpath.innerjoinpath = inner_path; + pathnode->jpath.joinrestrictinfo = restrict_clauses; + pathnode->path_hashclauses = hashclauses; + /* final_cost_hashjoin will fill in pathnode->num_batches */ + + final_cost_hashjoin(root, pathnode, workspace, extra); + + return pathnode; +} + +/* + * create_projection_path + * Creates a pathnode that represents performing a projection. + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'target' is the PathTarget to be computed + */ +ProjectionPath * +create_projection_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + PathTarget *target) +{ + ProjectionPath *pathnode = makeNode(ProjectionPath); + PathTarget *oldtarget; + + /* + * We mustn't put a ProjectionPath directly above another; it's useless + * and will confuse create_projection_plan. Rather than making sure all + * callers handle that, let's implement it here, by stripping off any + * ProjectionPath in what we're given. Given this rule, there won't be + * more than one. + */ + if (IsA(subpath, ProjectionPath)) + { + ProjectionPath *subpp = (ProjectionPath *) subpath; + + Assert(subpp->path.parent == rel); + subpath = subpp->subpath; + Assert(!IsA(subpath, ProjectionPath)); + } + + pathnode->path.pathtype = T_Result; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe && + is_parallel_safe(root, (Node *) target->exprs); + pathnode->path.parallel_workers = subpath->parallel_workers; + /* Projection does not change the sort order */ + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + + /* + * We might not need a separate Result node. If the input plan node type + * can project, we can just tell it to project something else. Or, if it + * can't project but the desired target has the same expression list as + * what the input will produce anyway, we can still give it the desired + * tlist (possibly changing its ressortgroupref labels, but nothing else). + * Note: in the latter case, create_projection_plan has to recheck our + * conclusion; see comments therein. + */ + oldtarget = subpath->pathtarget; + if (is_projection_capable_path(subpath) || + equal(oldtarget->exprs, target->exprs)) + { + /* No separate Result node needed */ + pathnode->dummypp = true; + + /* + * Set cost of plan as subpath's cost, adjusted for tlist replacement. + */ + pathnode->path.rows = subpath->rows; + pathnode->path.startup_cost = subpath->startup_cost + + (target->cost.startup - oldtarget->cost.startup); + pathnode->path.total_cost = subpath->total_cost + + (target->cost.startup - oldtarget->cost.startup) + + (target->cost.per_tuple - oldtarget->cost.per_tuple) * subpath->rows; + } + else + { + /* We really do need the Result node */ + pathnode->dummypp = false; + + /* + * The Result node's cost is cpu_tuple_cost per row, plus the cost of + * evaluating the tlist. There is no qual to worry about. + */ + pathnode->path.rows = subpath->rows; + pathnode->path.startup_cost = subpath->startup_cost + + target->cost.startup; + pathnode->path.total_cost = subpath->total_cost + + target->cost.startup + + (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows; + } + + return pathnode; +} + +/* + * apply_projection_to_path + * Add a projection step, or just apply the target directly to given path. + * + * This has the same net effect as create_projection_path(), except that if + * a separate Result plan node isn't needed, we just replace the given path's + * pathtarget with the desired one. This must be used only when the caller + * knows that the given path isn't referenced elsewhere and so can be modified + * in-place. + * + * If the input path is a GatherPath or GatherMergePath, we try to push the + * new target down to its input as well; this is a yet more invasive + * modification of the input path, which create_projection_path() can't do. + * + * Note that we mustn't change the source path's parent link; so when it is + * add_path'd to "rel" things will be a bit inconsistent. So far that has + * not caused any trouble. + * + * 'rel' is the parent relation associated with the result + * 'path' is the path representing the source of data + * 'target' is the PathTarget to be computed + */ +Path * +apply_projection_to_path(PlannerInfo *root, + RelOptInfo *rel, + Path *path, + PathTarget *target) +{ + QualCost oldcost; + + /* + * If given path can't project, we might need a Result node, so make a + * separate ProjectionPath. + */ + if (!is_projection_capable_path(path)) + return (Path *) create_projection_path(root, rel, path, target); + + /* + * We can just jam the desired tlist into the existing path, being sure to + * update its cost estimates appropriately. + */ + oldcost = path->pathtarget->cost; + path->pathtarget = target; + + path->startup_cost += target->cost.startup - oldcost.startup; + path->total_cost += target->cost.startup - oldcost.startup + + (target->cost.per_tuple - oldcost.per_tuple) * path->rows; + + /* + * If the path happens to be a Gather or GatherMerge path, we'd like to + * arrange for the subpath to return the required target list so that + * workers can help project. But if there is something that is not + * parallel-safe in the target expressions, then we can't. + */ + if ((IsA(path, GatherPath) || IsA(path, GatherMergePath)) && + is_parallel_safe(root, (Node *) target->exprs)) + { + /* + * We always use create_projection_path here, even if the subpath is + * projection-capable, so as to avoid modifying the subpath in place. + * It seems unlikely at present that there could be any other + * references to the subpath, but better safe than sorry. + * + * Note that we don't change the parallel path's cost estimates; it + * might be appropriate to do so, to reflect the fact that the bulk of + * the target evaluation will happen in workers. + */ + if (IsA(path, GatherPath)) + { + GatherPath *gpath = (GatherPath *) path; + + gpath->subpath = (Path *) + create_projection_path(root, + gpath->subpath->parent, + gpath->subpath, + target); + } + else + { + GatherMergePath *gmpath = (GatherMergePath *) path; + + gmpath->subpath = (Path *) + create_projection_path(root, + gmpath->subpath->parent, + gmpath->subpath, + target); + } + } + else if (path->parallel_safe && + !is_parallel_safe(root, (Node *) target->exprs)) + { + /* + * We're inserting a parallel-restricted target list into a path + * currently marked parallel-safe, so we have to mark it as no longer + * safe. + */ + path->parallel_safe = false; + } + + return path; +} + +/* + * create_set_projection_path + * Creates a pathnode that represents performing a projection that + * includes set-returning functions. + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'target' is the PathTarget to be computed + */ +ProjectSetPath * +create_set_projection_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + PathTarget *target) +{ + ProjectSetPath *pathnode = makeNode(ProjectSetPath); + double tlist_rows; + ListCell *lc; + + pathnode->path.pathtype = T_ProjectSet; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe && + is_parallel_safe(root, (Node *) target->exprs); + pathnode->path.parallel_workers = subpath->parallel_workers; + /* Projection does not change the sort order XXX? */ + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + + /* + * Estimate number of rows produced by SRFs for each row of input; if + * there's more than one in this node, use the maximum. + */ + tlist_rows = 1; + foreach(lc, target->exprs) + { + Node *node = (Node *) lfirst(lc); + double itemrows; + + itemrows = expression_returns_set_rows(root, node); + if (tlist_rows < itemrows) + tlist_rows = itemrows; + } + + /* + * In addition to the cost of evaluating the tlist, charge cpu_tuple_cost + * per input row, and half of cpu_tuple_cost for each added output row. + * This is slightly bizarre maybe, but it's what 9.6 did; we may revisit + * this estimate later. + */ + pathnode->path.rows = subpath->rows * tlist_rows; + pathnode->path.startup_cost = subpath->startup_cost + + target->cost.startup; + pathnode->path.total_cost = subpath->total_cost + + target->cost.startup + + (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows + + (pathnode->path.rows - subpath->rows) * cpu_tuple_cost / 2; + + return pathnode; +} + +/* + * create_incremental_sort_path + * Creates a pathnode that represents performing an incremental sort. + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'pathkeys' represents the desired sort order + * 'presorted_keys' is the number of keys by which the input path is + * already sorted + * 'limit_tuples' is the estimated bound on the number of output tuples, + * or -1 if no LIMIT or couldn't estimate + */ +IncrementalSortPath * +create_incremental_sort_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + List *pathkeys, + int presorted_keys, + double limit_tuples) +{ + IncrementalSortPath *sort = makeNode(IncrementalSortPath); + SortPath *pathnode = &sort->spath; + + pathnode->path.pathtype = T_IncrementalSort; + pathnode->path.parent = rel; + /* Sort doesn't project, so use source path's pathtarget */ + pathnode->path.pathtarget = subpath->pathtarget; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + pathnode->path.pathkeys = pathkeys; + + pathnode->subpath = subpath; + + cost_incremental_sort(&pathnode->path, + root, pathkeys, presorted_keys, + subpath->startup_cost, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, /* XXX comparison_cost shouldn't be 0? */ + work_mem, limit_tuples); + + sort->nPresortedCols = presorted_keys; + + return sort; +} + +/* + * create_sort_path + * Creates a pathnode that represents performing an explicit sort. + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'pathkeys' represents the desired sort order + * 'limit_tuples' is the estimated bound on the number of output tuples, + * or -1 if no LIMIT or couldn't estimate + */ +SortPath * +create_sort_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + List *pathkeys, + double limit_tuples) +{ + SortPath *pathnode = makeNode(SortPath); + + pathnode->path.pathtype = T_Sort; + pathnode->path.parent = rel; + /* Sort doesn't project, so use source path's pathtarget */ + pathnode->path.pathtarget = subpath->pathtarget; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + pathnode->path.pathkeys = pathkeys; + + pathnode->subpath = subpath; + + cost_sort(&pathnode->path, root, pathkeys, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, /* XXX comparison_cost shouldn't be 0? */ + work_mem, limit_tuples); + + return pathnode; +} + +/* + * create_group_path + * Creates a pathnode that represents performing grouping of presorted input + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'target' is the PathTarget to be computed + * 'groupClause' is a list of SortGroupClause's representing the grouping + * 'qual' is the HAVING quals if any + * 'numGroups' is the estimated number of groups + */ +GroupPath * +create_group_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + List *groupClause, + List *qual, + double numGroups) +{ + GroupPath *pathnode = makeNode(GroupPath); + PathTarget *target = rel->reltarget; + + pathnode->path.pathtype = T_Group; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + /* Group doesn't change sort ordering */ + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + + pathnode->groupClause = groupClause; + pathnode->qual = qual; + + cost_group(&pathnode->path, root, + list_length(groupClause), + numGroups, + qual, + subpath->startup_cost, subpath->total_cost, + subpath->rows); + + /* add tlist eval cost for each output row */ + pathnode->path.startup_cost += target->cost.startup; + pathnode->path.total_cost += target->cost.startup + + target->cost.per_tuple * pathnode->path.rows; + + return pathnode; +} + +/* + * create_upper_unique_path + * Creates a pathnode that represents performing an explicit Unique step + * on presorted input. + * + * This produces a Unique plan node, but the use-case is so different from + * create_unique_path that it doesn't seem worth trying to merge the two. + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'numCols' is the number of grouping columns + * 'numGroups' is the estimated number of groups + * + * The input path must be sorted on the grouping columns, plus possibly + * additional columns; so the first numCols pathkeys are the grouping columns + */ +UpperUniquePath * +create_upper_unique_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + int numCols, + double numGroups) +{ + UpperUniquePath *pathnode = makeNode(UpperUniquePath); + + pathnode->path.pathtype = T_Unique; + pathnode->path.parent = rel; + /* Unique doesn't project, so use source path's pathtarget */ + pathnode->path.pathtarget = subpath->pathtarget; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + /* Unique doesn't change the input ordering */ + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + pathnode->numkeys = numCols; + + /* + * Charge one cpu_operator_cost per comparison per input tuple. We assume + * all columns get compared at most of the tuples. (XXX probably this is + * an overestimate.) + */ + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->total_cost + + cpu_operator_cost * subpath->rows * numCols; + pathnode->path.rows = numGroups; + + return pathnode; +} + +/* + * create_agg_path + * Creates a pathnode that represents performing aggregation/grouping + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'target' is the PathTarget to be computed + * 'aggstrategy' is the Agg node's basic implementation strategy + * 'aggsplit' is the Agg node's aggregate-splitting mode + * 'groupClause' is a list of SortGroupClause's representing the grouping + * 'qual' is the HAVING quals if any + * 'aggcosts' contains cost info about the aggregate functions to be computed + * 'numGroups' is the estimated number of groups (1 if not grouping) + */ +AggPath * +create_agg_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + PathTarget *target, + AggStrategy aggstrategy, + AggSplit aggsplit, + List *groupClause, + List *qual, + const AggClauseCosts *aggcosts, + double numGroups) +{ + AggPath *pathnode = makeNode(AggPath); + + pathnode->path.pathtype = T_Agg; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + if (aggstrategy == AGG_SORTED) + pathnode->path.pathkeys = subpath->pathkeys; /* preserves order */ + else + pathnode->path.pathkeys = NIL; /* output is unordered */ + pathnode->subpath = subpath; + + pathnode->aggstrategy = aggstrategy; + pathnode->aggsplit = aggsplit; + pathnode->numGroups = numGroups; + pathnode->transitionSpace = aggcosts ? aggcosts->transitionSpace : 0; + pathnode->groupClause = groupClause; + pathnode->qual = qual; + + cost_agg(&pathnode->path, root, + aggstrategy, aggcosts, + list_length(groupClause), numGroups, + qual, + subpath->startup_cost, subpath->total_cost, + subpath->rows, subpath->pathtarget->width); + + /* add tlist eval cost for each output row */ + pathnode->path.startup_cost += target->cost.startup; + pathnode->path.total_cost += target->cost.startup + + target->cost.per_tuple * pathnode->path.rows; + + return pathnode; +} + +/* + * create_groupingsets_path + * Creates a pathnode that represents performing GROUPING SETS aggregation + * + * GroupingSetsPath represents sorted grouping with one or more grouping sets. + * The input path's result must be sorted to match the last entry in + * rollup_groupclauses. + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'target' is the PathTarget to be computed + * 'having_qual' is the HAVING quals if any + * 'rollups' is a list of RollupData nodes + * 'agg_costs' contains cost info about the aggregate functions to be computed + * 'numGroups' is the estimated total number of groups + */ +GroupingSetsPath * +create_groupingsets_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + List *having_qual, + AggStrategy aggstrategy, + List *rollups, + const AggClauseCosts *agg_costs, + double numGroups) +{ + GroupingSetsPath *pathnode = makeNode(GroupingSetsPath); + PathTarget *target = rel->reltarget; + ListCell *lc; + bool is_first = true; + bool is_first_sort = true; + + /* The topmost generated Plan node will be an Agg */ + pathnode->path.pathtype = T_Agg; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + pathnode->path.param_info = subpath->param_info; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + pathnode->subpath = subpath; + + /* + * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED + * to AGG_HASHED, here if possible. + */ + if (aggstrategy == AGG_SORTED && + list_length(rollups) == 1 && + ((RollupData *) linitial(rollups))->groupClause == NIL) + aggstrategy = AGG_PLAIN; + + if (aggstrategy == AGG_MIXED && + list_length(rollups) == 1) + aggstrategy = AGG_HASHED; + + /* + * Output will be in sorted order by group_pathkeys if, and only if, there + * is a single rollup operation on a non-empty list of grouping + * expressions. + */ + if (aggstrategy == AGG_SORTED && list_length(rollups) == 1) + pathnode->path.pathkeys = root->group_pathkeys; + else + pathnode->path.pathkeys = NIL; + + pathnode->aggstrategy = aggstrategy; + pathnode->rollups = rollups; + pathnode->qual = having_qual; + pathnode->transitionSpace = agg_costs ? agg_costs->transitionSpace : 0; + + Assert(rollups != NIL); + Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1); + Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1); + + foreach(lc, rollups) + { + RollupData *rollup = lfirst(lc); + List *gsets = rollup->gsets; + int numGroupCols = list_length(linitial(gsets)); + + /* + * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the + * (already-sorted) input, and following ones do their own sort. + * + * In AGG_HASHED mode, there is one rollup for each grouping set. + * + * In AGG_MIXED mode, the first rollups are hashed, the first + * non-hashed one takes the (already-sorted) input, and following ones + * do their own sort. + */ + if (is_first) + { + cost_agg(&pathnode->path, root, + aggstrategy, + agg_costs, + numGroupCols, + rollup->numGroups, + having_qual, + subpath->startup_cost, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width); + is_first = false; + if (!rollup->is_hashed) + is_first_sort = false; + } + else + { + Path sort_path; /* dummy for result of cost_sort */ + Path agg_path; /* dummy for result of cost_agg */ + + if (rollup->is_hashed || is_first_sort) + { + /* + * Account for cost of aggregation, but don't charge input + * cost again + */ + cost_agg(&agg_path, root, + rollup->is_hashed ? AGG_HASHED : AGG_SORTED, + agg_costs, + numGroupCols, + rollup->numGroups, + having_qual, + 0.0, 0.0, + subpath->rows, + subpath->pathtarget->width); + if (!rollup->is_hashed) + is_first_sort = false; + } + else + { + /* Account for cost of sort, but don't charge input cost again */ + cost_sort(&sort_path, root, NIL, + 0.0, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + -1.0); + + /* Account for cost of aggregation */ + + cost_agg(&agg_path, root, + AGG_SORTED, + agg_costs, + numGroupCols, + rollup->numGroups, + having_qual, + sort_path.startup_cost, + sort_path.total_cost, + sort_path.rows, + subpath->pathtarget->width); + } + + pathnode->path.total_cost += agg_path.total_cost; + pathnode->path.rows += agg_path.rows; + } + } + + /* add tlist eval cost for each output row */ + pathnode->path.startup_cost += target->cost.startup; + pathnode->path.total_cost += target->cost.startup + + target->cost.per_tuple * pathnode->path.rows; + + return pathnode; +} + +/* + * create_minmaxagg_path + * Creates a pathnode that represents computation of MIN/MAX aggregates + * + * 'rel' is the parent relation associated with the result + * 'target' is the PathTarget to be computed + * 'mmaggregates' is a list of MinMaxAggInfo structs + * 'quals' is the HAVING quals if any + */ +MinMaxAggPath * +create_minmaxagg_path(PlannerInfo *root, + RelOptInfo *rel, + PathTarget *target, + List *mmaggregates, + List *quals) +{ + MinMaxAggPath *pathnode = makeNode(MinMaxAggPath); + Cost initplan_cost; + ListCell *lc; + + /* The topmost generated Plan node will be a Result */ + pathnode->path.pathtype = T_Result; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + /* A MinMaxAggPath implies use of initplans, so cannot be parallel-safe */ + pathnode->path.parallel_safe = false; + pathnode->path.parallel_workers = 0; + /* Result is one unordered row */ + pathnode->path.rows = 1; + pathnode->path.pathkeys = NIL; + + pathnode->mmaggregates = mmaggregates; + pathnode->quals = quals; + + /* Calculate cost of all the initplans ... */ + initplan_cost = 0; + foreach(lc, mmaggregates) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + + initplan_cost += mminfo->pathcost; + } + + /* add tlist eval cost for each output row, plus cpu_tuple_cost */ + pathnode->path.startup_cost = initplan_cost + target->cost.startup; + pathnode->path.total_cost = initplan_cost + target->cost.startup + + target->cost.per_tuple + cpu_tuple_cost; + + /* + * Add cost of qual, if any --- but we ignore its selectivity, since our + * rowcount estimate should be 1 no matter what the qual is. + */ + if (quals) + { + QualCost qual_cost; + + cost_qual_eval(&qual_cost, quals, root); + pathnode->path.startup_cost += qual_cost.startup; + pathnode->path.total_cost += qual_cost.startup + qual_cost.per_tuple; + } + + return pathnode; +} + +/* + * create_windowagg_path + * Creates a pathnode that represents computation of window functions + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'target' is the PathTarget to be computed + * 'windowFuncs' is a list of WindowFunc structs + * 'winclause' is a WindowClause that is common to all the WindowFuncs + * 'qual' WindowClause.runconditions from lower-level WindowAggPaths. + * Must always be NIL when topwindow == false + * 'topwindow' pass as true only for the top-level WindowAgg. False for all + * intermediate WindowAggs. + * + * The input must be sorted according to the WindowClause's PARTITION keys + * plus ORDER BY keys. + */ +WindowAggPath * +create_windowagg_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + PathTarget *target, + List *windowFuncs, + WindowClause *winclause, + List *qual, + bool topwindow) +{ + WindowAggPath *pathnode = makeNode(WindowAggPath); + + /* qual can only be set for the topwindow */ + Assert(qual == NIL || topwindow); + + pathnode->path.pathtype = T_WindowAgg; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + /* WindowAgg preserves the input sort order */ + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + pathnode->winclause = winclause; + pathnode->qual = qual; + pathnode->topwindow = topwindow; + + /* + * For costing purposes, assume that there are no redundant partitioning + * or ordering columns; it's not worth the trouble to deal with that + * corner case here. So we just pass the unmodified list lengths to + * cost_windowagg. + */ + cost_windowagg(&pathnode->path, root, + windowFuncs, + list_length(winclause->partitionClause), + list_length(winclause->orderClause), + subpath->startup_cost, + subpath->total_cost, + subpath->rows); + + /* add tlist eval cost for each output row */ + pathnode->path.startup_cost += target->cost.startup; + pathnode->path.total_cost += target->cost.startup + + target->cost.per_tuple * pathnode->path.rows; + + return pathnode; +} + +/* + * create_setop_path + * Creates a pathnode that represents computation of INTERSECT or EXCEPT + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'cmd' is the specific semantics (INTERSECT or EXCEPT, with/without ALL) + * 'strategy' is the implementation strategy (sorted or hashed) + * 'distinctList' is a list of SortGroupClause's representing the grouping + * 'flagColIdx' is the column number where the flag column will be, if any + * 'firstFlag' is the flag value for the first input relation when hashing; + * or -1 when sorting + * 'numGroups' is the estimated number of distinct groups + * 'outputRows' is the estimated number of output rows + */ +SetOpPath * +create_setop_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + SetOpCmd cmd, + SetOpStrategy strategy, + List *distinctList, + AttrNumber flagColIdx, + int firstFlag, + double numGroups, + double outputRows) +{ + SetOpPath *pathnode = makeNode(SetOpPath); + + pathnode->path.pathtype = T_SetOp; + pathnode->path.parent = rel; + /* SetOp doesn't project, so use source path's pathtarget */ + pathnode->path.pathtarget = subpath->pathtarget; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + /* SetOp preserves the input sort order if in sort mode */ + pathnode->path.pathkeys = + (strategy == SETOP_SORTED) ? subpath->pathkeys : NIL; + + pathnode->subpath = subpath; + pathnode->cmd = cmd; + pathnode->strategy = strategy; + pathnode->distinctList = distinctList; + pathnode->flagColIdx = flagColIdx; + pathnode->firstFlag = firstFlag; + pathnode->numGroups = numGroups; + + /* + * Charge one cpu_operator_cost per comparison per input tuple. We assume + * all columns get compared at most of the tuples. + */ + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->total_cost + + cpu_operator_cost * subpath->rows * list_length(distinctList); + pathnode->path.rows = outputRows; + + return pathnode; +} + +/* + * create_recursiveunion_path + * Creates a pathnode that represents a recursive UNION node + * + * 'rel' is the parent relation associated with the result + * 'leftpath' is the source of data for the non-recursive term + * 'rightpath' is the source of data for the recursive term + * 'target' is the PathTarget to be computed + * 'distinctList' is a list of SortGroupClause's representing the grouping + * 'wtParam' is the ID of Param representing work table + * 'numGroups' is the estimated number of groups + * + * For recursive UNION ALL, distinctList is empty and numGroups is zero + */ +RecursiveUnionPath * +create_recursiveunion_path(PlannerInfo *root, + RelOptInfo *rel, + Path *leftpath, + Path *rightpath, + PathTarget *target, + List *distinctList, + int wtParam, + double numGroups) +{ + RecursiveUnionPath *pathnode = makeNode(RecursiveUnionPath); + + pathnode->path.pathtype = T_RecursiveUnion; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + leftpath->parallel_safe && rightpath->parallel_safe; + /* Foolish, but we'll do it like joins for now: */ + pathnode->path.parallel_workers = leftpath->parallel_workers; + /* RecursiveUnion result is always unsorted */ + pathnode->path.pathkeys = NIL; + + pathnode->leftpath = leftpath; + pathnode->rightpath = rightpath; + pathnode->distinctList = distinctList; + pathnode->wtParam = wtParam; + pathnode->numGroups = numGroups; + + cost_recursive_union(&pathnode->path, leftpath, rightpath); + + return pathnode; +} + +/* + * create_lockrows_path + * Creates a pathnode that represents acquiring row locks + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'rowMarks' is a list of PlanRowMark's + * 'epqParam' is the ID of Param for EvalPlanQual re-eval + */ +LockRowsPath * +create_lockrows_path(PlannerInfo *root, RelOptInfo *rel, + Path *subpath, List *rowMarks, int epqParam) +{ + LockRowsPath *pathnode = makeNode(LockRowsPath); + + pathnode->path.pathtype = T_LockRows; + pathnode->path.parent = rel; + /* LockRows doesn't project, so use source path's pathtarget */ + pathnode->path.pathtarget = subpath->pathtarget; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = false; + pathnode->path.parallel_workers = 0; + pathnode->path.rows = subpath->rows; + + /* + * The result cannot be assumed sorted, since locking might cause the sort + * key columns to be replaced with new values. + */ + pathnode->path.pathkeys = NIL; + + pathnode->subpath = subpath; + pathnode->rowMarks = rowMarks; + pathnode->epqParam = epqParam; + + /* + * We should charge something extra for the costs of row locking and + * possible refetches, but it's hard to say how much. For now, use + * cpu_tuple_cost per row. + */ + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->total_cost + + cpu_tuple_cost * subpath->rows; + + return pathnode; +} + +/* + * create_modifytable_path + * Creates a pathnode that represents performing INSERT/UPDATE/DELETE/MERGE + * mods + * + * 'rel' is the parent relation associated with the result + * 'subpath' is a Path producing source data + * 'operation' is the operation type + * 'canSetTag' is true if we set the command tag/es_processed + * 'nominalRelation' is the parent RT index for use of EXPLAIN + * 'rootRelation' is the partitioned/inherited table root RTI, or 0 if none + * 'partColsUpdated' is true if any partitioning columns are being updated, + * either from the target relation or a descendent partitioned table. + * 'resultRelations' is an integer list of actual RT indexes of target rel(s) + * 'updateColnosLists' is a list of UPDATE target column number lists + * (one sublist per rel); or NIL if not an UPDATE + * 'withCheckOptionLists' is a list of WCO lists (one per rel) + * 'returningLists' is a list of RETURNING tlists (one per rel) + * 'rowMarks' is a list of PlanRowMarks (non-locking only) + * 'onconflict' is the ON CONFLICT clause, or NULL + * 'epqParam' is the ID of Param for EvalPlanQual re-eval + * 'mergeActionLists' is a list of lists of MERGE actions (one per rel) + */ +ModifyTablePath * +create_modifytable_path(PlannerInfo *root, RelOptInfo *rel, + Path *subpath, + CmdType operation, bool canSetTag, + Index nominalRelation, Index rootRelation, + bool partColsUpdated, + List *resultRelations, + List *updateColnosLists, + List *withCheckOptionLists, List *returningLists, + List *rowMarks, OnConflictExpr *onconflict, + List *mergeActionLists, int epqParam) +{ + ModifyTablePath *pathnode = makeNode(ModifyTablePath); + + Assert(operation == CMD_MERGE || + (operation == CMD_UPDATE ? + list_length(resultRelations) == list_length(updateColnosLists) : + updateColnosLists == NIL)); + Assert(withCheckOptionLists == NIL || + list_length(resultRelations) == list_length(withCheckOptionLists)); + Assert(returningLists == NIL || + list_length(resultRelations) == list_length(returningLists)); + + pathnode->path.pathtype = T_ModifyTable; + pathnode->path.parent = rel; + /* pathtarget is not interesting, just make it minimally valid */ + pathnode->path.pathtarget = rel->reltarget; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = false; + pathnode->path.parallel_workers = 0; + pathnode->path.pathkeys = NIL; + + /* + * Compute cost & rowcount as subpath cost & rowcount (if RETURNING) + * + * Currently, we don't charge anything extra for the actual table + * modification work, nor for the WITH CHECK OPTIONS or RETURNING + * expressions if any. It would only be window dressing, since + * ModifyTable is always a top-level node and there is no way for the + * costs to change any higher-level planning choices. But we might want + * to make it look better sometime. + */ + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->total_cost; + if (returningLists != NIL) + { + pathnode->path.rows = subpath->rows; + + /* + * Set width to match the subpath output. XXX this is totally wrong: + * we should return an average of the RETURNING tlist widths. But + * it's what happened historically, and improving it is a task for + * another day. (Again, it's mostly window dressing.) + */ + pathnode->path.pathtarget->width = subpath->pathtarget->width; + } + else + { + pathnode->path.rows = 0; + pathnode->path.pathtarget->width = 0; + } + + pathnode->subpath = subpath; + pathnode->operation = operation; + pathnode->canSetTag = canSetTag; + pathnode->nominalRelation = nominalRelation; + pathnode->rootRelation = rootRelation; + pathnode->partColsUpdated = partColsUpdated; + pathnode->resultRelations = resultRelations; + pathnode->updateColnosLists = updateColnosLists; + pathnode->withCheckOptionLists = withCheckOptionLists; + pathnode->returningLists = returningLists; + pathnode->rowMarks = rowMarks; + pathnode->onconflict = onconflict; + pathnode->epqParam = epqParam; + pathnode->mergeActionLists = mergeActionLists; + + return pathnode; +} + +/* + * create_limit_path + * Creates a pathnode that represents performing LIMIT/OFFSET + * + * In addition to providing the actual OFFSET and LIMIT expressions, + * the caller must provide estimates of their values for costing purposes. + * The estimates are as computed by preprocess_limit(), ie, 0 represents + * the clause not being present, and -1 means it's present but we could + * not estimate its value. + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'limitOffset' is the actual OFFSET expression, or NULL + * 'limitCount' is the actual LIMIT expression, or NULL + * 'offset_est' is the estimated value of the OFFSET expression + * 'count_est' is the estimated value of the LIMIT expression + */ +LimitPath * +create_limit_path(PlannerInfo *root, RelOptInfo *rel, + Path *subpath, + Node *limitOffset, Node *limitCount, + LimitOption limitOption, + int64 offset_est, int64 count_est) +{ + LimitPath *pathnode = makeNode(LimitPath); + + pathnode->path.pathtype = T_Limit; + pathnode->path.parent = rel; + /* Limit doesn't project, so use source path's pathtarget */ + pathnode->path.pathtarget = subpath->pathtarget; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + pathnode->path.rows = subpath->rows; + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->total_cost; + pathnode->path.pathkeys = subpath->pathkeys; + pathnode->subpath = subpath; + pathnode->limitOffset = limitOffset; + pathnode->limitCount = limitCount; + pathnode->limitOption = limitOption; + + /* + * Adjust the output rows count and costs according to the offset/limit. + */ + adjust_limit_rows_costs(&pathnode->path.rows, + &pathnode->path.startup_cost, + &pathnode->path.total_cost, + offset_est, count_est); + + return pathnode; +} + +/* + * adjust_limit_rows_costs + * Adjust the size and cost estimates for a LimitPath node according to the + * offset/limit. + * + * This is only a cosmetic issue if we are at top level, but if we are + * building a subquery then it's important to report correct info to the outer + * planner. + * + * When the offset or count couldn't be estimated, use 10% of the estimated + * number of rows emitted from the subpath. + * + * XXX we don't bother to add eval costs of the offset/limit expressions + * themselves to the path costs. In theory we should, but in most cases those + * expressions are trivial and it's just not worth the trouble. + */ +void +adjust_limit_rows_costs(double *rows, /* in/out parameter */ + Cost *startup_cost, /* in/out parameter */ + Cost *total_cost, /* in/out parameter */ + int64 offset_est, + int64 count_est) +{ + double input_rows = *rows; + Cost input_startup_cost = *startup_cost; + Cost input_total_cost = *total_cost; + + if (offset_est != 0) + { + double offset_rows; + + if (offset_est > 0) + offset_rows = (double) offset_est; + else + offset_rows = clamp_row_est(input_rows * 0.10); + if (offset_rows > *rows) + offset_rows = *rows; + if (input_rows > 0) + *startup_cost += + (input_total_cost - input_startup_cost) + * offset_rows / input_rows; + *rows -= offset_rows; + if (*rows < 1) + *rows = 1; + } + + if (count_est != 0) + { + double count_rows; + + if (count_est > 0) + count_rows = (double) count_est; + else + count_rows = clamp_row_est(input_rows * 0.10); + if (count_rows > *rows) + count_rows = *rows; + if (input_rows > 0) + *total_cost = *startup_cost + + (input_total_cost - input_startup_cost) + * count_rows / input_rows; + *rows = count_rows; + if (*rows < 1) + *rows = 1; + } +} + + +/* + * reparameterize_path + * Attempt to modify a Path to have greater parameterization + * + * We use this to attempt to bring all child paths of an appendrel to the + * same parameterization level, ensuring that they all enforce the same set + * of join quals (and thus that that parameterization can be attributed to + * an append path built from such paths). Currently, only a few path types + * are supported here, though more could be added at need. We return NULL + * if we can't reparameterize the given path. + * + * Note: we intentionally do not pass created paths to add_path(); it would + * possibly try to delete them on the grounds of being cost-inferior to the + * paths they were made from, and we don't want that. Paths made here are + * not necessarily of general-purpose usefulness, but they can be useful + * as members of an append path. + */ +Path * +reparameterize_path(PlannerInfo *root, Path *path, + Relids required_outer, + double loop_count) +{ + RelOptInfo *rel = path->parent; + + /* Can only increase, not decrease, path's parameterization */ + if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer)) + return NULL; + switch (path->pathtype) + { + case T_SeqScan: + return create_seqscan_path(root, rel, required_outer, 0); + case T_SampleScan: + return (Path *) create_samplescan_path(root, rel, required_outer); + case T_IndexScan: + case T_IndexOnlyScan: + { + IndexPath *ipath = (IndexPath *) path; + IndexPath *newpath = makeNode(IndexPath); + + /* + * We can't use create_index_path directly, and would not want + * to because it would re-compute the indexqual conditions + * which is wasted effort. Instead we hack things a bit: + * flat-copy the path node, revise its param_info, and redo + * the cost estimate. + */ + memcpy(newpath, ipath, sizeof(IndexPath)); + newpath->path.param_info = + get_baserel_parampathinfo(root, rel, required_outer); + cost_index(newpath, root, loop_count, false); + return (Path *) newpath; + } + case T_BitmapHeapScan: + { + BitmapHeapPath *bpath = (BitmapHeapPath *) path; + + return (Path *) create_bitmap_heap_path(root, + rel, + bpath->bitmapqual, + required_outer, + loop_count, 0); + } + case T_SubqueryScan: + { + SubqueryScanPath *spath = (SubqueryScanPath *) path; + + return (Path *) create_subqueryscan_path(root, + rel, + spath->subpath, + spath->path.pathkeys, + required_outer); + } + case T_Result: + /* Supported only for RTE_RESULT scan paths */ + if (IsA(path, Path)) + return create_resultscan_path(root, rel, required_outer); + break; + case T_Append: + { + AppendPath *apath = (AppendPath *) path; + List *childpaths = NIL; + List *partialpaths = NIL; + int i; + ListCell *lc; + + /* Reparameterize the children */ + i = 0; + foreach(lc, apath->subpaths) + { + Path *spath = (Path *) lfirst(lc); + + spath = reparameterize_path(root, spath, + required_outer, + loop_count); + if (spath == NULL) + return NULL; + /* We have to re-split the regular and partial paths */ + if (i < apath->first_partial_path) + childpaths = lappend(childpaths, spath); + else + partialpaths = lappend(partialpaths, spath); + i++; + } + return (Path *) + create_append_path(root, rel, childpaths, partialpaths, + apath->path.pathkeys, required_outer, + apath->path.parallel_workers, + apath->path.parallel_aware, + -1); + } + case T_Memoize: + { + MemoizePath *mpath = (MemoizePath *) path; + Path *spath = mpath->subpath; + + spath = reparameterize_path(root, spath, + required_outer, + loop_count); + if (spath == NULL) + return NULL; + return (Path *) create_memoize_path(root, rel, + spath, + mpath->param_exprs, + mpath->hash_operators, + mpath->singlerow, + mpath->binary_mode, + mpath->calls); + } + default: + break; + } + return NULL; +} + +/* + * reparameterize_path_by_child + * Given a path parameterized by the parent of the given child relation, + * translate the path to be parameterized by the given child relation. + * + * The function creates a new path of the same type as the given path, but + * parameterized by the given child relation. Most fields from the original + * path can simply be flat-copied, but any expressions must be adjusted to + * refer to the correct varnos, and any paths must be recursively + * reparameterized. Other fields that refer to specific relids also need + * adjustment. + * + * The cost, number of rows, width and parallel path properties depend upon + * path->parent, which does not change during the translation. Hence those + * members are copied as they are. + * + * If the given path can not be reparameterized, the function returns NULL. + */ +Path * +reparameterize_path_by_child(PlannerInfo *root, Path *path, + RelOptInfo *child_rel) +{ + +#define FLAT_COPY_PATH(newnode, node, nodetype) \ + ( (newnode) = makeNode(nodetype), \ + memcpy((newnode), (node), sizeof(nodetype)) ) + +#define ADJUST_CHILD_ATTRS(node) \ + ((node) = \ + (List *) adjust_appendrel_attrs_multilevel(root, (Node *) (node), \ + child_rel->relids, \ + child_rel->top_parent_relids)) + +#define REPARAMETERIZE_CHILD_PATH(path) \ +do { \ + (path) = reparameterize_path_by_child(root, (path), child_rel); \ + if ((path) == NULL) \ + return NULL; \ +} while(0) + +#define REPARAMETERIZE_CHILD_PATH_LIST(pathlist) \ +do { \ + if ((pathlist) != NIL) \ + { \ + (pathlist) = reparameterize_pathlist_by_child(root, (pathlist), \ + child_rel); \ + if ((pathlist) == NIL) \ + return NULL; \ + } \ +} while(0) + + Path *new_path; + ParamPathInfo *new_ppi; + ParamPathInfo *old_ppi; + Relids required_outer; + + /* + * If the path is not parameterized by parent of the given relation, it + * doesn't need reparameterization. + */ + if (!path->param_info || + !bms_overlap(PATH_REQ_OUTER(path), child_rel->top_parent_relids)) + return path; + + /* + * If possible, reparameterize the given path, making a copy. + * + * This function is currently only applied to the inner side of a nestloop + * join that is being partitioned by the partitionwise-join code. Hence, + * we need only support path types that plausibly arise in that context. + * (In particular, supporting sorted path types would be a waste of code + * and cycles: even if we translated them here, they'd just lose in + * subsequent cost comparisons.) If we do see an unsupported path type, + * that just means we won't be able to generate a partitionwise-join plan + * using that path type. + */ + switch (nodeTag(path)) + { + case T_Path: + FLAT_COPY_PATH(new_path, path, Path); + break; + + case T_IndexPath: + { + IndexPath *ipath; + + FLAT_COPY_PATH(ipath, path, IndexPath); + ADJUST_CHILD_ATTRS(ipath->indexclauses); + new_path = (Path *) ipath; + } + break; + + case T_BitmapHeapPath: + { + BitmapHeapPath *bhpath; + + FLAT_COPY_PATH(bhpath, path, BitmapHeapPath); + REPARAMETERIZE_CHILD_PATH(bhpath->bitmapqual); + new_path = (Path *) bhpath; + } + break; + + case T_BitmapAndPath: + { + BitmapAndPath *bapath; + + FLAT_COPY_PATH(bapath, path, BitmapAndPath); + REPARAMETERIZE_CHILD_PATH_LIST(bapath->bitmapquals); + new_path = (Path *) bapath; + } + break; + + case T_BitmapOrPath: + { + BitmapOrPath *bopath; + + FLAT_COPY_PATH(bopath, path, BitmapOrPath); + REPARAMETERIZE_CHILD_PATH_LIST(bopath->bitmapquals); + new_path = (Path *) bopath; + } + break; + + case T_ForeignPath: + { + ForeignPath *fpath; + ReparameterizeForeignPathByChild_function rfpc_func; + + FLAT_COPY_PATH(fpath, path, ForeignPath); + if (fpath->fdw_outerpath) + REPARAMETERIZE_CHILD_PATH(fpath->fdw_outerpath); + + /* Hand over to FDW if needed. */ + rfpc_func = + path->parent->fdwroutine->ReparameterizeForeignPathByChild; + if (rfpc_func) + fpath->fdw_private = rfpc_func(root, fpath->fdw_private, + child_rel); + new_path = (Path *) fpath; + } + break; + + case T_CustomPath: + { + CustomPath *cpath; + + FLAT_COPY_PATH(cpath, path, CustomPath); + REPARAMETERIZE_CHILD_PATH_LIST(cpath->custom_paths); + if (cpath->methods && + cpath->methods->ReparameterizeCustomPathByChild) + cpath->custom_private = + cpath->methods->ReparameterizeCustomPathByChild(root, + cpath->custom_private, + child_rel); + new_path = (Path *) cpath; + } + break; + + case T_NestPath: + { + JoinPath *jpath; + NestPath *npath; + + FLAT_COPY_PATH(npath, path, NestPath); + + jpath = (JoinPath *) npath; + REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath); + REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath); + ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo); + new_path = (Path *) npath; + } + break; + + case T_MergePath: + { + JoinPath *jpath; + MergePath *mpath; + + FLAT_COPY_PATH(mpath, path, MergePath); + + jpath = (JoinPath *) mpath; + REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath); + REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath); + ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo); + ADJUST_CHILD_ATTRS(mpath->path_mergeclauses); + new_path = (Path *) mpath; + } + break; + + case T_HashPath: + { + JoinPath *jpath; + HashPath *hpath; + + FLAT_COPY_PATH(hpath, path, HashPath); + + jpath = (JoinPath *) hpath; + REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath); + REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath); + ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo); + ADJUST_CHILD_ATTRS(hpath->path_hashclauses); + new_path = (Path *) hpath; + } + break; + + case T_AppendPath: + { + AppendPath *apath; + + FLAT_COPY_PATH(apath, path, AppendPath); + REPARAMETERIZE_CHILD_PATH_LIST(apath->subpaths); + new_path = (Path *) apath; + } + break; + + case T_MemoizePath: + { + MemoizePath *mpath; + + FLAT_COPY_PATH(mpath, path, MemoizePath); + REPARAMETERIZE_CHILD_PATH(mpath->subpath); + ADJUST_CHILD_ATTRS(mpath->param_exprs); + new_path = (Path *) mpath; + } + break; + + case T_GatherPath: + { + GatherPath *gpath; + + FLAT_COPY_PATH(gpath, path, GatherPath); + REPARAMETERIZE_CHILD_PATH(gpath->subpath); + new_path = (Path *) gpath; + } + break; + + default: + + /* We don't know how to reparameterize this path. */ + return NULL; + } + + /* + * Adjust the parameterization information, which refers to the topmost + * parent. The topmost parent can be multiple levels away from the given + * child, hence use multi-level expression adjustment routines. + */ + old_ppi = new_path->param_info; + required_outer = + adjust_child_relids_multilevel(root, old_ppi->ppi_req_outer, + child_rel->relids, + child_rel->top_parent_relids); + + /* If we already have a PPI for this parameterization, just return it */ + new_ppi = find_param_path_info(new_path->parent, required_outer); + + /* + * If not, build a new one and link it to the list of PPIs. For the same + * reason as explained in mark_dummy_rel(), allocate new PPI in the same + * context the given RelOptInfo is in. + */ + if (new_ppi == NULL) + { + MemoryContext oldcontext; + RelOptInfo *rel = path->parent; + + oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); + + new_ppi = makeNode(ParamPathInfo); + new_ppi->ppi_req_outer = bms_copy(required_outer); + new_ppi->ppi_rows = old_ppi->ppi_rows; + new_ppi->ppi_clauses = old_ppi->ppi_clauses; + ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses); + rel->ppilist = lappend(rel->ppilist, new_ppi); + + MemoryContextSwitchTo(oldcontext); + } + bms_free(required_outer); + + new_path->param_info = new_ppi; + + /* + * Adjust the path target if the parent of the outer relation is + * referenced in the targetlist. This can happen when only the parent of + * outer relation is laterally referenced in this relation. + */ + if (bms_overlap(path->parent->lateral_relids, + child_rel->top_parent_relids)) + { + new_path->pathtarget = copy_pathtarget(new_path->pathtarget); + ADJUST_CHILD_ATTRS(new_path->pathtarget->exprs); + } + + return new_path; +} + +/* + * reparameterize_pathlist_by_child + * Helper function to reparameterize a list of paths by given child rel. + */ +static List * +reparameterize_pathlist_by_child(PlannerInfo *root, + List *pathlist, + RelOptInfo *child_rel) +{ + ListCell *lc; + List *result = NIL; + + foreach(lc, pathlist) + { + Path *path = reparameterize_path_by_child(root, lfirst(lc), + child_rel); + + if (path == NULL) + { + list_free(result); + return NIL; + } + + result = lappend(result, path); + } + + return result; +} diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c new file mode 100644 index 0000000..3b0f058 --- /dev/null +++ b/src/backend/optimizer/util/placeholder.c @@ -0,0 +1,477 @@ +/*------------------------------------------------------------------------- + * + * placeholder.c + * PlaceHolderVar and PlaceHolderInfo manipulation routines + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/placeholder.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/nodeFuncs.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/placeholder.h" +#include "optimizer/planmain.h" +#include "utils/lsyscache.h" + +/* Local functions */ +static void find_placeholders_recurse(PlannerInfo *root, Node *jtnode); +static void find_placeholders_in_expr(PlannerInfo *root, Node *expr); + + +/* + * make_placeholder_expr + * Make a PlaceHolderVar for the given expression. + * + * phrels is the syntactic location (as a set of baserels) to attribute + * to the expression. + */ +PlaceHolderVar * +make_placeholder_expr(PlannerInfo *root, Expr *expr, Relids phrels) +{ + PlaceHolderVar *phv = makeNode(PlaceHolderVar); + + phv->phexpr = expr; + phv->phrels = phrels; + phv->phid = ++(root->glob->lastPHId); + phv->phlevelsup = 0; + + return phv; +} + +/* + * find_placeholder_info + * Fetch the PlaceHolderInfo for the given PHV + * + * If the PlaceHolderInfo doesn't exist yet, create it if create_new_ph is + * true, else throw an error. + * + * This is separate from make_placeholder_expr because subquery pullup has + * to make PlaceHolderVars for expressions that might not be used at all in + * the upper query, or might not remain after const-expression simplification. + * We build PlaceHolderInfos only for PHVs that are still present in the + * simplified query passed to query_planner(). + * + * Note: this should only be called after query_planner() has started. Also, + * create_new_ph must not be true after deconstruct_jointree begins, because + * make_outerjoininfo assumes that we already know about all placeholders. + */ +PlaceHolderInfo * +find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv, + bool create_new_ph) +{ + PlaceHolderInfo *phinfo; + Relids rels_used; + ListCell *lc; + + /* if this ever isn't true, we'd need to be able to look in parent lists */ + Assert(phv->phlevelsup == 0); + + foreach(lc, root->placeholder_list) + { + phinfo = (PlaceHolderInfo *) lfirst(lc); + if (phinfo->phid == phv->phid) + return phinfo; + } + + /* Not found, so create it */ + if (!create_new_ph) + elog(ERROR, "too late to create a new PlaceHolderInfo"); + + phinfo = makeNode(PlaceHolderInfo); + + phinfo->phid = phv->phid; + phinfo->ph_var = copyObject(phv); + + /* + * Any referenced rels that are outside the PHV's syntactic scope are + * LATERAL references, which should be included in ph_lateral but not in + * ph_eval_at. If no referenced rels are within the syntactic scope, + * force evaluation at the syntactic location. + */ + rels_used = pull_varnos(root, (Node *) phv->phexpr); + phinfo->ph_lateral = bms_difference(rels_used, phv->phrels); + if (bms_is_empty(phinfo->ph_lateral)) + phinfo->ph_lateral = NULL; /* make it exactly NULL if empty */ + phinfo->ph_eval_at = bms_int_members(rels_used, phv->phrels); + /* If no contained vars, force evaluation at syntactic location */ + if (bms_is_empty(phinfo->ph_eval_at)) + { + phinfo->ph_eval_at = bms_copy(phv->phrels); + Assert(!bms_is_empty(phinfo->ph_eval_at)); + } + /* ph_eval_at may change later, see update_placeholder_eval_levels */ + phinfo->ph_needed = NULL; /* initially it's unused */ + /* for the moment, estimate width using just the datatype info */ + phinfo->ph_width = get_typavgwidth(exprType((Node *) phv->phexpr), + exprTypmod((Node *) phv->phexpr)); + + root->placeholder_list = lappend(root->placeholder_list, phinfo); + + /* + * The PHV's contained expression may contain other, lower-level PHVs. We + * now know we need to get those into the PlaceHolderInfo list, too, so we + * may as well do that immediately. + */ + find_placeholders_in_expr(root, (Node *) phinfo->ph_var->phexpr); + + return phinfo; +} + +/* + * find_placeholders_in_jointree + * Search the jointree for PlaceHolderVars, and build PlaceHolderInfos + * + * We don't need to look at the targetlist because build_base_rel_tlists() + * will already have made entries for any PHVs in the tlist. + * + * This is called before we begin deconstruct_jointree. Once we begin + * deconstruct_jointree, all active placeholders must be present in + * root->placeholder_list, because make_outerjoininfo and + * update_placeholder_eval_levels require this info to be available + * while we crawl up the join tree. + */ +void +find_placeholders_in_jointree(PlannerInfo *root) +{ + /* We need do nothing if the query contains no PlaceHolderVars */ + if (root->glob->lastPHId != 0) + { + /* Start recursion at top of jointree */ + Assert(root->parse->jointree != NULL && + IsA(root->parse->jointree, FromExpr)); + find_placeholders_recurse(root, (Node *) root->parse->jointree); + } +} + +/* + * find_placeholders_recurse + * One recursion level of find_placeholders_in_jointree. + * + * jtnode is the current jointree node to examine. + */ +static void +find_placeholders_recurse(PlannerInfo *root, Node *jtnode) +{ + if (jtnode == NULL) + return; + if (IsA(jtnode, RangeTblRef)) + { + /* No quals to deal with here */ + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *l; + + /* + * First, recurse to handle child joins. + */ + foreach(l, f->fromlist) + { + find_placeholders_recurse(root, lfirst(l)); + } + + /* + * Now process the top-level quals. + */ + find_placeholders_in_expr(root, f->quals); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + /* + * First, recurse to handle child joins. + */ + find_placeholders_recurse(root, j->larg); + find_placeholders_recurse(root, j->rarg); + + /* Process the qual clauses */ + find_placeholders_in_expr(root, j->quals); + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); +} + +/* + * find_placeholders_in_expr + * Find all PlaceHolderVars in the given expression, and create + * PlaceHolderInfo entries for them. + */ +static void +find_placeholders_in_expr(PlannerInfo *root, Node *expr) +{ + List *vars; + ListCell *vl; + + /* + * pull_var_clause does more than we need here, but it'll do and it's + * convenient to use. + */ + vars = pull_var_clause(expr, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + foreach(vl, vars) + { + PlaceHolderVar *phv = (PlaceHolderVar *) lfirst(vl); + + /* Ignore any plain Vars */ + if (!IsA(phv, PlaceHolderVar)) + continue; + + /* Create a PlaceHolderInfo entry if there's not one already */ + (void) find_placeholder_info(root, phv, true); + } + list_free(vars); +} + +/* + * update_placeholder_eval_levels + * Adjust the target evaluation levels for placeholders + * + * The initial eval_at level set by find_placeholder_info was the set of + * rels used in the placeholder's expression (or the whole subselect below + * the placeholder's syntactic location, if the expr is variable-free). + * If the query contains any outer joins that can null any of those rels, + * we must delay evaluation to above those joins. + * + * We repeat this operation each time we add another outer join to + * root->join_info_list. It's somewhat annoying to have to do that, but + * since we don't have very much information on the placeholders' locations, + * it's hard to avoid. Each placeholder's eval_at level must be correct + * by the time it starts to figure in outer-join delay decisions for higher + * outer joins. + * + * In future we might want to put additional policy/heuristics here to + * try to determine an optimal evaluation level. The current rules will + * result in evaluation at the lowest possible level. However, pushing a + * placeholder eval up the tree is likely to further constrain evaluation + * order for outer joins, so it could easily be counterproductive; and we + * don't have enough information at this point to make an intelligent choice. + */ +void +update_placeholder_eval_levels(PlannerInfo *root, SpecialJoinInfo *new_sjinfo) +{ + ListCell *lc1; + + foreach(lc1, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc1); + Relids syn_level = phinfo->ph_var->phrels; + Relids eval_at; + bool found_some; + ListCell *lc2; + + /* + * We don't need to do any work on this placeholder unless the + * newly-added outer join is syntactically beneath its location. + */ + if (!bms_is_subset(new_sjinfo->syn_lefthand, syn_level) || + !bms_is_subset(new_sjinfo->syn_righthand, syn_level)) + continue; + + /* + * Check for delays due to lower outer joins. This is the same logic + * as in check_outerjoin_delay in initsplan.c, except that we don't + * have anything to do with the delay_upper_joins flags; delay of + * upper outer joins will be handled later, based on the eval_at + * values we compute now. + */ + eval_at = phinfo->ph_eval_at; + + do + { + found_some = false; + foreach(lc2, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc2); + + /* disregard joins not within the PHV's sub-select */ + if (!bms_is_subset(sjinfo->syn_lefthand, syn_level) || + !bms_is_subset(sjinfo->syn_righthand, syn_level)) + continue; + + /* do we reference any nullable rels of this OJ? */ + if (bms_overlap(eval_at, sjinfo->min_righthand) || + (sjinfo->jointype == JOIN_FULL && + bms_overlap(eval_at, sjinfo->min_lefthand))) + { + /* yes; have we included all its rels in eval_at? */ + if (!bms_is_subset(sjinfo->min_lefthand, eval_at) || + !bms_is_subset(sjinfo->min_righthand, eval_at)) + { + /* no, so add them in */ + eval_at = bms_add_members(eval_at, + sjinfo->min_lefthand); + eval_at = bms_add_members(eval_at, + sjinfo->min_righthand); + /* we'll need another iteration */ + found_some = true; + } + } + } + } while (found_some); + + /* Can't move the PHV's eval_at level to above its syntactic level */ + Assert(bms_is_subset(eval_at, syn_level)); + + phinfo->ph_eval_at = eval_at; + } +} + +/* + * fix_placeholder_input_needed_levels + * Adjust the "needed at" levels for placeholder inputs + * + * This is called after we've finished determining the eval_at levels for + * all placeholders. We need to make sure that all vars and placeholders + * needed to evaluate each placeholder will be available at the scan or join + * level where the evaluation will be done. (It might seem that scan-level + * evaluations aren't interesting, but that's not so: a LATERAL reference + * within a placeholder's expression needs to cause the referenced var or + * placeholder to be marked as needed in the scan where it's evaluated.) + * Note that this loop can have side-effects on the ph_needed sets of other + * PlaceHolderInfos; that's okay because we don't examine ph_needed here, so + * there are no ordering issues to worry about. + */ +void +fix_placeholder_input_needed_levels(PlannerInfo *root) +{ + ListCell *lc; + + foreach(lc, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); + List *vars = pull_var_clause((Node *) phinfo->ph_var->phexpr, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + + add_vars_to_targetlist(root, vars, phinfo->ph_eval_at, false); + list_free(vars); + } +} + +/* + * add_placeholders_to_base_rels + * Add any required PlaceHolderVars to base rels' targetlists. + * + * If any placeholder can be computed at a base rel and is needed above it, + * add it to that rel's targetlist. This might look like it could be merged + * with fix_placeholder_input_needed_levels, but it must be separate because + * join removal happens in between, and can change the ph_eval_at sets. There + * is essentially the same logic in add_placeholders_to_joinrel, but we can't + * do that part until joinrels are formed. + */ +void +add_placeholders_to_base_rels(PlannerInfo *root) +{ + ListCell *lc; + + foreach(lc, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); + Relids eval_at = phinfo->ph_eval_at; + int varno; + + if (bms_get_singleton_member(eval_at, &varno) && + bms_nonempty_difference(phinfo->ph_needed, eval_at)) + { + RelOptInfo *rel = find_base_rel(root, varno); + + rel->reltarget->exprs = lappend(rel->reltarget->exprs, + copyObject(phinfo->ph_var)); + /* reltarget's cost and width fields will be updated later */ + } + } +} + +/* + * add_placeholders_to_joinrel + * Add any required PlaceHolderVars to a join rel's targetlist; + * and if they contain lateral references, add those references to the + * joinrel's direct_lateral_relids. + * + * A join rel should emit a PlaceHolderVar if (a) the PHV can be computed + * at or below this join level and (b) the PHV is needed above this level. + * However, condition (a) is sufficient to add to direct_lateral_relids, + * as explained below. + */ +void +add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outer_rel, RelOptInfo *inner_rel) +{ + Relids relids = joinrel->relids; + ListCell *lc; + + foreach(lc, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); + + /* Is it computable here? */ + if (bms_is_subset(phinfo->ph_eval_at, relids)) + { + /* Is it still needed above this joinrel? */ + if (bms_nonempty_difference(phinfo->ph_needed, relids)) + { + /* Yup, add it to the output */ + joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, + phinfo->ph_var); + joinrel->reltarget->width += phinfo->ph_width; + + /* + * Charge the cost of evaluating the contained expression if + * the PHV can be computed here but not in either input. This + * is a bit bogus because we make the decision based on the + * first pair of possible input relations considered for the + * joinrel. With other pairs, it might be possible to compute + * the PHV in one input or the other, and then we'd be double + * charging the PHV's cost for some join paths. For now, live + * with that; but we might want to improve it later by + * refiguring the reltarget costs for each pair of inputs. + */ + if (!bms_is_subset(phinfo->ph_eval_at, outer_rel->relids) && + !bms_is_subset(phinfo->ph_eval_at, inner_rel->relids)) + { + QualCost cost; + + cost_qual_eval_node(&cost, (Node *) phinfo->ph_var->phexpr, + root); + joinrel->reltarget->cost.startup += cost.startup; + joinrel->reltarget->cost.per_tuple += cost.per_tuple; + } + } + + /* + * Also adjust joinrel's direct_lateral_relids to include the + * PHV's source rel(s). We must do this even if we're not + * actually going to emit the PHV, otherwise join_is_legal() will + * reject valid join orderings. (In principle maybe we could + * instead remove the joinrel's lateral_relids dependency; but + * that's complicated to get right, and cases where we're not + * going to emit the PHV are too rare to justify the work.) + * + * In principle we should only do this if the join doesn't yet + * include the PHV's source rel(s). But our caller + * build_join_rel() will clean things up by removing the join's + * own relids from its direct_lateral_relids, so we needn't + * account for that here. + */ + joinrel->direct_lateral_relids = + bms_add_members(joinrel->direct_lateral_relids, + phinfo->ph_lateral); + } + } +} diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c new file mode 100644 index 0000000..419f2ac --- /dev/null +++ b/src/backend/optimizer/util/plancat.c @@ -0,0 +1,2509 @@ +/*------------------------------------------------------------------------- + * + * plancat.c + * routines for accessing the system catalogs + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/plancat.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "access/tableam.h" +#include "access/transam.h" +#include "access/xlog.h" +#include "catalog/catalog.h" +#include "catalog/heap.h" +#include "catalog/pg_am.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_statistic_ext_data.h" +#include "foreign/fdwapi.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/plancat.h" +#include "optimizer/prep.h" +#include "parser/parse_relation.h" +#include "parser/parsetree.h" +#include "partitioning/partdesc.h" +#include "rewrite/rewriteManip.h" +#include "statistics/statistics.h" +#include "storage/bufmgr.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/partcache.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + +/* GUC parameter */ +int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION; + +/* Hook for plugins to get control in get_relation_info() */ +get_relation_info_hook_type get_relation_info_hook = NULL; + + +static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, + Relation relation, bool inhparent); +static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel, + List *idxExprs); +static List *get_relation_constraints(PlannerInfo *root, + Oid relationObjectId, RelOptInfo *rel, + bool include_noinherit, + bool include_notnull, + bool include_partition); +static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, + Relation heapRelation); +static List *get_relation_statistics(RelOptInfo *rel, Relation relation); +static void set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, + Relation relation); +static PartitionScheme find_partition_scheme(PlannerInfo *root, Relation rel); +static void set_baserel_partition_key_exprs(Relation relation, + RelOptInfo *rel); +static void set_baserel_partition_constraint(Relation relation, + RelOptInfo *rel); + + +/* + * get_relation_info - + * Retrieves catalog information for a given relation. + * + * Given the Oid of the relation, return the following info into fields + * of the RelOptInfo struct: + * + * min_attr lowest valid AttrNumber + * max_attr highest valid AttrNumber + * indexlist list of IndexOptInfos for relation's indexes + * statlist list of StatisticExtInfo for relation's statistic objects + * serverid if it's a foreign table, the server OID + * fdwroutine if it's a foreign table, the FDW function pointers + * pages number of pages + * tuples number of tuples + * rel_parallel_workers user-defined number of parallel workers + * + * Also, add information about the relation's foreign keys to root->fkey_list. + * + * Also, initialize the attr_needed[] and attr_widths[] arrays. In most + * cases these are left as zeroes, but sometimes we need to compute attr + * widths here, and we may as well cache the results for costsize.c. + * + * If inhparent is true, all we need to do is set up the attr arrays: + * the RelOptInfo actually represents the appendrel formed by an inheritance + * tree, and so the parent rel's physical size and index information isn't + * important for it. + */ +void +get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, + RelOptInfo *rel) +{ + Index varno = rel->relid; + Relation relation; + bool hasindex; + List *indexinfos = NIL; + + /* + * We need not lock the relation since it was already locked, either by + * the rewriter or when expand_inherited_rtentry() added it to the query's + * rangetable. + */ + relation = table_open(relationObjectId, NoLock); + + /* + * Relations without a table AM can be used in a query only if they are of + * special-cased relkinds. This check prevents us from crashing later if, + * for example, a view's ON SELECT rule has gone missing. Note that + * table_open() already rejected indexes and composite types; spell the + * error the same way it does. + */ + if (!relation->rd_tableam) + { + if (!(relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE || + relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot open relation \"%s\"", + RelationGetRelationName(relation)), + errdetail_relkind_not_supported(relation->rd_rel->relkind))); + } + + /* Temporary and unlogged relations are inaccessible during recovery. */ + if (!RelationIsPermanent(relation) && RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary or unlogged relations during recovery"))); + + rel->min_attr = FirstLowInvalidHeapAttributeNumber + 1; + rel->max_attr = RelationGetNumberOfAttributes(relation); + rel->reltablespace = RelationGetForm(relation)->reltablespace; + + Assert(rel->max_attr >= rel->min_attr); + rel->attr_needed = (Relids *) + palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids)); + rel->attr_widths = (int32 *) + palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32)); + + /* + * Estimate relation size --- unless it's an inheritance parent, in which + * case the size we want is not the rel's own size but the size of its + * inheritance tree. That will be computed in set_append_rel_size(). + */ + if (!inhparent) + estimate_rel_size(relation, rel->attr_widths - rel->min_attr, + &rel->pages, &rel->tuples, &rel->allvisfrac); + + /* Retrieve the parallel_workers reloption, or -1 if not set. */ + rel->rel_parallel_workers = RelationGetParallelWorkers(relation, -1); + + /* + * Make list of indexes. Ignore indexes on system catalogs if told to. + * Don't bother with indexes for an inheritance parent, either. + */ + if (inhparent || + (IgnoreSystemIndexes && IsSystemRelation(relation))) + hasindex = false; + else + hasindex = relation->rd_rel->relhasindex; + + if (hasindex) + { + List *indexoidlist; + LOCKMODE lmode; + ListCell *l; + + indexoidlist = RelationGetIndexList(relation); + + /* + * For each index, we get the same type of lock that the executor will + * need, and do not release it. This saves a couple of trips to the + * shared lock manager while not creating any real loss of + * concurrency, because no schema changes could be happening on the + * index while we hold lock on the parent rel, and no lock type used + * for queries blocks any other kind of index operation. + */ + lmode = root->simple_rte_array[varno]->rellockmode; + + foreach(l, indexoidlist) + { + Oid indexoid = lfirst_oid(l); + Relation indexRelation; + Form_pg_index index; + IndexAmRoutine *amroutine; + IndexOptInfo *info; + int ncolumns, + nkeycolumns; + int i; + + /* + * Extract info from the relation descriptor for the index. + */ + indexRelation = index_open(indexoid, lmode); + index = indexRelation->rd_index; + + /* + * Ignore invalid indexes, since they can't safely be used for + * queries. Note that this is OK because the data structure we + * are constructing is only used by the planner --- the executor + * still needs to insert into "invalid" indexes, if they're marked + * indisready. + */ + if (!index->indisvalid) + { + index_close(indexRelation, NoLock); + continue; + } + + /* + * Ignore partitioned indexes, since they are not usable for + * queries. + */ + if (indexRelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + { + index_close(indexRelation, NoLock); + continue; + } + + /* + * If the index is valid, but cannot yet be used, ignore it; but + * mark the plan we are generating as transient. See + * src/backend/access/heap/README.HOT for discussion. + */ + if (index->indcheckxmin && + !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data), + TransactionXmin)) + { + root->glob->transientPlan = true; + index_close(indexRelation, NoLock); + continue; + } + + info = makeNode(IndexOptInfo); + + info->indexoid = index->indexrelid; + info->reltablespace = + RelationGetForm(indexRelation)->reltablespace; + info->rel = rel; + info->ncolumns = ncolumns = index->indnatts; + info->nkeycolumns = nkeycolumns = index->indnkeyatts; + + info->indexkeys = (int *) palloc(sizeof(int) * ncolumns); + info->indexcollations = (Oid *) palloc(sizeof(Oid) * nkeycolumns); + info->opfamily = (Oid *) palloc(sizeof(Oid) * nkeycolumns); + info->opcintype = (Oid *) palloc(sizeof(Oid) * nkeycolumns); + info->canreturn = (bool *) palloc(sizeof(bool) * ncolumns); + + for (i = 0; i < ncolumns; i++) + { + info->indexkeys[i] = index->indkey.values[i]; + info->canreturn[i] = index_can_return(indexRelation, i + 1); + } + + for (i = 0; i < nkeycolumns; i++) + { + info->opfamily[i] = indexRelation->rd_opfamily[i]; + info->opcintype[i] = indexRelation->rd_opcintype[i]; + info->indexcollations[i] = indexRelation->rd_indcollation[i]; + } + + info->relam = indexRelation->rd_rel->relam; + + /* We copy just the fields we need, not all of rd_indam */ + amroutine = indexRelation->rd_indam; + info->amcanorderbyop = amroutine->amcanorderbyop; + info->amoptionalkey = amroutine->amoptionalkey; + info->amsearcharray = amroutine->amsearcharray; + info->amsearchnulls = amroutine->amsearchnulls; + info->amcanparallel = amroutine->amcanparallel; + info->amhasgettuple = (amroutine->amgettuple != NULL); + info->amhasgetbitmap = amroutine->amgetbitmap != NULL && + relation->rd_tableam->scan_bitmap_next_block != NULL; + info->amcanmarkpos = (amroutine->ammarkpos != NULL && + amroutine->amrestrpos != NULL); + info->amcostestimate = amroutine->amcostestimate; + Assert(info->amcostestimate != NULL); + + /* Fetch index opclass options */ + info->opclassoptions = RelationGetIndexAttOptions(indexRelation, true); + + /* + * Fetch the ordering information for the index, if any. + */ + if (info->relam == BTREE_AM_OID) + { + /* + * If it's a btree index, we can use its opfamily OIDs + * directly as the sort ordering opfamily OIDs. + */ + Assert(amroutine->amcanorder); + + info->sortopfamily = info->opfamily; + info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns); + info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns); + + for (i = 0; i < nkeycolumns; i++) + { + int16 opt = indexRelation->rd_indoption[i]; + + info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0; + info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0; + } + } + else if (amroutine->amcanorder) + { + /* + * Otherwise, identify the corresponding btree opfamilies by + * trying to map this index's "<" operators into btree. Since + * "<" uniquely defines the behavior of a sort order, this is + * a sufficient test. + * + * XXX This method is rather slow and also requires the + * undesirable assumption that the other index AM numbers its + * strategies the same as btree. It'd be better to have a way + * to explicitly declare the corresponding btree opfamily for + * each opfamily of the other index type. But given the lack + * of current or foreseeable amcanorder index types, it's not + * worth expending more effort on now. + */ + info->sortopfamily = (Oid *) palloc(sizeof(Oid) * nkeycolumns); + info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns); + info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns); + + for (i = 0; i < nkeycolumns; i++) + { + int16 opt = indexRelation->rd_indoption[i]; + Oid ltopr; + Oid btopfamily; + Oid btopcintype; + int16 btstrategy; + + info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0; + info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0; + + ltopr = get_opfamily_member(info->opfamily[i], + info->opcintype[i], + info->opcintype[i], + BTLessStrategyNumber); + if (OidIsValid(ltopr) && + get_ordering_op_properties(ltopr, + &btopfamily, + &btopcintype, + &btstrategy) && + btopcintype == info->opcintype[i] && + btstrategy == BTLessStrategyNumber) + { + /* Successful mapping */ + info->sortopfamily[i] = btopfamily; + } + else + { + /* Fail ... quietly treat index as unordered */ + info->sortopfamily = NULL; + info->reverse_sort = NULL; + info->nulls_first = NULL; + break; + } + } + } + else + { + info->sortopfamily = NULL; + info->reverse_sort = NULL; + info->nulls_first = NULL; + } + + /* + * Fetch the index expressions and predicate, if any. We must + * modify the copies we obtain from the relcache to have the + * correct varno for the parent relation, so that they match up + * correctly against qual clauses. + */ + info->indexprs = RelationGetIndexExpressions(indexRelation); + info->indpred = RelationGetIndexPredicate(indexRelation); + if (info->indexprs && varno != 1) + ChangeVarNodes((Node *) info->indexprs, 1, varno, 0); + if (info->indpred && varno != 1) + ChangeVarNodes((Node *) info->indpred, 1, varno, 0); + + /* Build targetlist using the completed indexprs data */ + info->indextlist = build_index_tlist(root, info, relation); + + info->indrestrictinfo = NIL; /* set later, in indxpath.c */ + info->predOK = false; /* set later, in indxpath.c */ + info->unique = index->indisunique; + info->immediate = index->indimmediate; + info->hypothetical = false; + + /* + * Estimate the index size. If it's not a partial index, we lock + * the number-of-tuples estimate to equal the parent table; if it + * is partial then we have to use the same methods as we would for + * a table, except we can be sure that the index is not larger + * than the table. + */ + if (info->indpred == NIL) + { + info->pages = RelationGetNumberOfBlocks(indexRelation); + info->tuples = rel->tuples; + } + else + { + double allvisfrac; /* dummy */ + + estimate_rel_size(indexRelation, NULL, + &info->pages, &info->tuples, &allvisfrac); + if (info->tuples > rel->tuples) + info->tuples = rel->tuples; + } + + if (info->relam == BTREE_AM_OID) + { + /* For btrees, get tree height while we have the index open */ + info->tree_height = _bt_getrootheight(indexRelation); + } + else + { + /* For other index types, just set it to "unknown" for now */ + info->tree_height = -1; + } + + index_close(indexRelation, NoLock); + + /* + * We've historically used lcons() here. It'd make more sense to + * use lappend(), but that causes the planner to change behavior + * in cases where two indexes seem equally attractive. For now, + * stick with lcons() --- few tables should have so many indexes + * that the O(N^2) behavior of lcons() is really a problem. + */ + indexinfos = lcons(info, indexinfos); + } + + list_free(indexoidlist); + } + + rel->indexlist = indexinfos; + + rel->statlist = get_relation_statistics(rel, relation); + + /* Grab foreign-table info using the relcache, while we have it */ + if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + rel->serverid = GetForeignServerIdByRelId(RelationGetRelid(relation)); + rel->fdwroutine = GetFdwRoutineForRelation(relation, true); + } + else + { + rel->serverid = InvalidOid; + rel->fdwroutine = NULL; + } + + /* Collect info about relation's foreign keys, if relevant */ + get_relation_foreign_keys(root, rel, relation, inhparent); + + /* Collect info about functions implemented by the rel's table AM. */ + if (relation->rd_tableam && + relation->rd_tableam->scan_set_tidrange != NULL && + relation->rd_tableam->scan_getnextslot_tidrange != NULL) + rel->amflags |= AMFLAG_HAS_TID_RANGE; + + /* + * Collect info about relation's partitioning scheme, if any. Only + * inheritance parents may be partitioned. + */ + if (inhparent && relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + set_relation_partition_info(root, rel, relation); + + table_close(relation, NoLock); + + /* + * Allow a plugin to editorialize on the info we obtained from the + * catalogs. Actions might include altering the assumed relation size, + * removing an index, or adding a hypothetical index to the indexlist. + */ + if (get_relation_info_hook) + (*get_relation_info_hook) (root, relationObjectId, inhparent, rel); +} + +/* + * get_relation_foreign_keys - + * Retrieves foreign key information for a given relation. + * + * ForeignKeyOptInfos for relevant foreign keys are created and added to + * root->fkey_list. We do this now while we have the relcache entry open. + * We could sometimes avoid making useless ForeignKeyOptInfos if we waited + * until all RelOptInfos have been built, but the cost of re-opening the + * relcache entries would probably exceed any savings. + */ +static void +get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, + Relation relation, bool inhparent) +{ + List *rtable = root->parse->rtable; + List *cachedfkeys; + ListCell *lc; + + /* + * If it's not a baserel, we don't care about its FKs. Also, if the query + * references only a single relation, we can skip the lookup since no FKs + * could satisfy the requirements below. + */ + if (rel->reloptkind != RELOPT_BASEREL || + list_length(rtable) < 2) + return; + + /* + * If it's the parent of an inheritance tree, ignore its FKs. We could + * make useful FK-based deductions if we found that all members of the + * inheritance tree have equivalent FK constraints, but detecting that + * would require code that hasn't been written. + */ + if (inhparent) + return; + + /* + * Extract data about relation's FKs from the relcache. Note that this + * list belongs to the relcache and might disappear in a cache flush, so + * we must not do any further catalog access within this function. + */ + cachedfkeys = RelationGetFKeyList(relation); + + /* + * Figure out which FKs are of interest for this query, and create + * ForeignKeyOptInfos for them. We want only FKs that reference some + * other RTE of the current query. In queries containing self-joins, + * there might be more than one other RTE for a referenced table, and we + * should make a ForeignKeyOptInfo for each occurrence. + * + * Ideally, we would ignore RTEs that correspond to non-baserels, but it's + * too hard to identify those here, so we might end up making some useless + * ForeignKeyOptInfos. If so, match_foreign_keys_to_quals() will remove + * them again. + */ + foreach(lc, cachedfkeys) + { + ForeignKeyCacheInfo *cachedfk = (ForeignKeyCacheInfo *) lfirst(lc); + Index rti; + ListCell *lc2; + + /* conrelid should always be that of the table we're considering */ + Assert(cachedfk->conrelid == RelationGetRelid(relation)); + + /* Scan to find other RTEs matching confrelid */ + rti = 0; + foreach(lc2, rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2); + ForeignKeyOptInfo *info; + + rti++; + /* Ignore if not the correct table */ + if (rte->rtekind != RTE_RELATION || + rte->relid != cachedfk->confrelid) + continue; + /* Ignore if it's an inheritance parent; doesn't really match */ + if (rte->inh) + continue; + /* Ignore self-referential FKs; we only care about joins */ + if (rti == rel->relid) + continue; + + /* OK, let's make an entry */ + info = makeNode(ForeignKeyOptInfo); + info->con_relid = rel->relid; + info->ref_relid = rti; + info->nkeys = cachedfk->nkeys; + memcpy(info->conkey, cachedfk->conkey, sizeof(info->conkey)); + memcpy(info->confkey, cachedfk->confkey, sizeof(info->confkey)); + memcpy(info->conpfeqop, cachedfk->conpfeqop, sizeof(info->conpfeqop)); + /* zero out fields to be filled by match_foreign_keys_to_quals */ + info->nmatched_ec = 0; + info->nconst_ec = 0; + info->nmatched_rcols = 0; + info->nmatched_ri = 0; + memset(info->eclass, 0, sizeof(info->eclass)); + memset(info->fk_eclass_member, 0, sizeof(info->fk_eclass_member)); + memset(info->rinfos, 0, sizeof(info->rinfos)); + + root->fkey_list = lappend(root->fkey_list, info); + } + } +} + +/* + * infer_arbiter_indexes - + * Determine the unique indexes used to arbitrate speculative insertion. + * + * Uses user-supplied inference clause expressions and predicate to match a + * unique index from those defined and ready on the heap relation (target). + * An exact match is required on columns/expressions (although they can appear + * in any order). However, the predicate given by the user need only restrict + * insertion to a subset of some part of the table covered by some particular + * unique index (in particular, a partial unique index) in order to be + * inferred. + * + * The implementation does not consider which B-Tree operator class any + * particular available unique index attribute uses, unless one was specified + * in the inference specification. The same is true of collations. In + * particular, there is no system dependency on the default operator class for + * the purposes of inference. If no opclass (or collation) is specified, then + * all matching indexes (that may or may not match the default in terms of + * each attribute opclass/collation) are used for inference. + */ +List * +infer_arbiter_indexes(PlannerInfo *root) +{ + OnConflictExpr *onconflict = root->parse->onConflict; + + /* Iteration state */ + RangeTblEntry *rte; + Relation relation; + Oid indexOidFromConstraint = InvalidOid; + List *indexList; + ListCell *l; + + /* Normalized inference attributes and inference expressions: */ + Bitmapset *inferAttrs = NULL; + List *inferElems = NIL; + + /* Results */ + List *results = NIL; + + /* + * Quickly return NIL for ON CONFLICT DO NOTHING without an inference + * specification or named constraint. ON CONFLICT DO UPDATE statements + * must always provide one or the other (but parser ought to have caught + * that already). + */ + if (onconflict->arbiterElems == NIL && + onconflict->constraint == InvalidOid) + return NIL; + + /* + * We need not lock the relation since it was already locked, either by + * the rewriter or when expand_inherited_rtentry() added it to the query's + * rangetable. + */ + rte = rt_fetch(root->parse->resultRelation, root->parse->rtable); + + relation = table_open(rte->relid, NoLock); + + /* + * Build normalized/BMS representation of plain indexed attributes, as + * well as a separate list of expression items. This simplifies matching + * the cataloged definition of indexes. + */ + foreach(l, onconflict->arbiterElems) + { + InferenceElem *elem = (InferenceElem *) lfirst(l); + Var *var; + int attno; + + if (!IsA(elem->expr, Var)) + { + /* If not a plain Var, just shove it in inferElems for now */ + inferElems = lappend(inferElems, elem->expr); + continue; + } + + var = (Var *) elem->expr; + attno = var->varattno; + + if (attno == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("whole row unique index inference specifications are not supported"))); + + inferAttrs = bms_add_member(inferAttrs, + attno - FirstLowInvalidHeapAttributeNumber); + } + + /* + * Lookup named constraint's index. This is not immediately returned + * because some additional sanity checks are required. + */ + if (onconflict->constraint != InvalidOid) + { + indexOidFromConstraint = get_constraint_index(onconflict->constraint); + + if (indexOidFromConstraint == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("constraint in ON CONFLICT clause has no associated index"))); + } + + /* + * Using that representation, iterate through the list of indexes on the + * target relation to try and find a match + */ + indexList = RelationGetIndexList(relation); + + foreach(l, indexList) + { + Oid indexoid = lfirst_oid(l); + Relation idxRel; + Form_pg_index idxForm; + Bitmapset *indexedAttrs; + List *idxExprs; + List *predExprs; + AttrNumber natt; + ListCell *el; + + /* + * Extract info from the relation descriptor for the index. Obtain + * the same lock type that the executor will ultimately use. + * + * Let executor complain about !indimmediate case directly, because + * enforcement needs to occur there anyway when an inference clause is + * omitted. + */ + idxRel = index_open(indexoid, rte->rellockmode); + idxForm = idxRel->rd_index; + + if (!idxForm->indisvalid) + goto next; + + /* + * Note that we do not perform a check against indcheckxmin (like e.g. + * get_relation_info()) here to eliminate candidates, because + * uniqueness checking only cares about the most recently committed + * tuple versions. + */ + + /* + * Look for match on "ON constraint_name" variant, which may not be + * unique constraint. This can only be a constraint name. + */ + if (indexOidFromConstraint == idxForm->indexrelid) + { + if (!idxForm->indisunique && onconflict->action == ONCONFLICT_UPDATE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("ON CONFLICT DO UPDATE not supported with exclusion constraints"))); + + results = lappend_oid(results, idxForm->indexrelid); + list_free(indexList); + index_close(idxRel, NoLock); + table_close(relation, NoLock); + return results; + } + else if (indexOidFromConstraint != InvalidOid) + { + /* No point in further work for index in named constraint case */ + goto next; + } + + /* + * Only considering conventional inference at this point (not named + * constraints), so index under consideration can be immediately + * skipped if it's not unique + */ + if (!idxForm->indisunique) + goto next; + + /* Build BMS representation of plain (non expression) index attrs */ + indexedAttrs = NULL; + for (natt = 0; natt < idxForm->indnkeyatts; natt++) + { + int attno = idxRel->rd_index->indkey.values[natt]; + + if (attno != 0) + indexedAttrs = bms_add_member(indexedAttrs, + attno - FirstLowInvalidHeapAttributeNumber); + } + + /* Non-expression attributes (if any) must match */ + if (!bms_equal(indexedAttrs, inferAttrs)) + goto next; + + /* Expression attributes (if any) must match */ + idxExprs = RelationGetIndexExpressions(idxRel); + foreach(el, onconflict->arbiterElems) + { + InferenceElem *elem = (InferenceElem *) lfirst(el); + + /* + * Ensure that collation/opclass aspects of inference expression + * element match. Even though this loop is primarily concerned + * with matching expressions, it is a convenient point to check + * this for both expressions and ordinary (non-expression) + * attributes appearing as inference elements. + */ + if (!infer_collation_opclass_match(elem, idxRel, idxExprs)) + goto next; + + /* + * Plain Vars don't factor into count of expression elements, and + * the question of whether or not they satisfy the index + * definition has already been considered (they must). + */ + if (IsA(elem->expr, Var)) + continue; + + /* + * Might as well avoid redundant check in the rare cases where + * infer_collation_opclass_match() is required to do real work. + * Otherwise, check that element expression appears in cataloged + * index definition. + */ + if (elem->infercollid != InvalidOid || + elem->inferopclass != InvalidOid || + list_member(idxExprs, elem->expr)) + continue; + + goto next; + } + + /* + * Now that all inference elements were matched, ensure that the + * expression elements from inference clause are not missing any + * cataloged expressions. This does the right thing when unique + * indexes redundantly repeat the same attribute, or if attributes + * redundantly appear multiple times within an inference clause. + */ + if (list_difference(idxExprs, inferElems) != NIL) + goto next; + + /* + * If it's a partial index, its predicate must be implied by the ON + * CONFLICT's WHERE clause. + */ + predExprs = RelationGetIndexPredicate(idxRel); + + if (!predicate_implied_by(predExprs, (List *) onconflict->arbiterWhere, false)) + goto next; + + results = lappend_oid(results, idxForm->indexrelid); +next: + index_close(idxRel, NoLock); + } + + list_free(indexList); + table_close(relation, NoLock); + + if (results == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("there is no unique or exclusion constraint matching the ON CONFLICT specification"))); + + return results; +} + +/* + * infer_collation_opclass_match - ensure infer element opclass/collation match + * + * Given unique index inference element from inference specification, if + * collation was specified, or if opclass was specified, verify that there is + * at least one matching indexed attribute (occasionally, there may be more). + * Skip this in the common case where inference specification does not include + * collation or opclass (instead matching everything, regardless of cataloged + * collation/opclass of indexed attribute). + * + * At least historically, Postgres has not offered collations or opclasses + * with alternative-to-default notions of equality, so these additional + * criteria should only be required infrequently. + * + * Don't give up immediately when an inference element matches some attribute + * cataloged as indexed but not matching additional opclass/collation + * criteria. This is done so that the implementation is as forgiving as + * possible of redundancy within cataloged index attributes (or, less + * usefully, within inference specification elements). If collations actually + * differ between apparently redundantly indexed attributes (redundant within + * or across indexes), then there really is no redundancy as such. + * + * Note that if an inference element specifies an opclass and a collation at + * once, both must match in at least one particular attribute within index + * catalog definition in order for that inference element to be considered + * inferred/satisfied. + */ +static bool +infer_collation_opclass_match(InferenceElem *elem, Relation idxRel, + List *idxExprs) +{ + AttrNumber natt; + Oid inferopfamily = InvalidOid; /* OID of opclass opfamily */ + Oid inferopcinputtype = InvalidOid; /* OID of opclass input type */ + int nplain = 0; /* # plain attrs observed */ + + /* + * If inference specification element lacks collation/opclass, then no + * need to check for exact match. + */ + if (elem->infercollid == InvalidOid && elem->inferopclass == InvalidOid) + return true; + + /* + * Lookup opfamily and input type, for matching indexes + */ + if (elem->inferopclass) + { + inferopfamily = get_opclass_family(elem->inferopclass); + inferopcinputtype = get_opclass_input_type(elem->inferopclass); + } + + for (natt = 1; natt <= idxRel->rd_att->natts; natt++) + { + Oid opfamily = idxRel->rd_opfamily[natt - 1]; + Oid opcinputtype = idxRel->rd_opcintype[natt - 1]; + Oid collation = idxRel->rd_indcollation[natt - 1]; + int attno = idxRel->rd_index->indkey.values[natt - 1]; + + if (attno != 0) + nplain++; + + if (elem->inferopclass != InvalidOid && + (inferopfamily != opfamily || inferopcinputtype != opcinputtype)) + { + /* Attribute needed to match opclass, but didn't */ + continue; + } + + if (elem->infercollid != InvalidOid && + elem->infercollid != collation) + { + /* Attribute needed to match collation, but didn't */ + continue; + } + + /* If one matching index att found, good enough -- return true */ + if (IsA(elem->expr, Var)) + { + if (((Var *) elem->expr)->varattno == attno) + return true; + } + else if (attno == 0) + { + Node *nattExpr = list_nth(idxExprs, (natt - 1) - nplain); + + /* + * Note that unlike routines like match_index_to_operand() we + * don't need to care about RelabelType. Neither the index + * definition nor the inference clause should contain them. + */ + if (equal(elem->expr, nattExpr)) + return true; + } + } + + return false; +} + +/* + * estimate_rel_size - estimate # pages and # tuples in a table or index + * + * We also estimate the fraction of the pages that are marked all-visible in + * the visibility map, for use in estimation of index-only scans. + * + * If attr_widths isn't NULL, it points to the zero-index entry of the + * relation's attr_widths[] cache; we fill this in if we have need to compute + * the attribute widths for estimation purposes. + */ +void +estimate_rel_size(Relation rel, int32 *attr_widths, + BlockNumber *pages, double *tuples, double *allvisfrac) +{ + BlockNumber curpages; + BlockNumber relpages; + double reltuples; + BlockNumber relallvisible; + double density; + + if (RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind)) + { + table_relation_estimate_size(rel, attr_widths, pages, tuples, + allvisfrac); + } + else if (rel->rd_rel->relkind == RELKIND_INDEX) + { + /* + * XXX: It'd probably be good to move this into a callback, individual + * index types e.g. know if they have a metapage. + */ + + /* it has storage, ok to call the smgr */ + curpages = RelationGetNumberOfBlocks(rel); + + /* report estimated # pages */ + *pages = curpages; + /* quick exit if rel is clearly empty */ + if (curpages == 0) + { + *tuples = 0; + *allvisfrac = 0; + return; + } + + /* coerce values in pg_class to more desirable types */ + relpages = (BlockNumber) rel->rd_rel->relpages; + reltuples = (double) rel->rd_rel->reltuples; + relallvisible = (BlockNumber) rel->rd_rel->relallvisible; + + /* + * Discount the metapage while estimating the number of tuples. This + * is a kluge because it assumes more than it ought to about index + * structure. Currently it's OK for btree, hash, and GIN indexes but + * suspect for GiST indexes. + */ + if (relpages > 0) + { + curpages--; + relpages--; + } + + /* estimate number of tuples from previous tuple density */ + if (reltuples >= 0 && relpages > 0) + density = reltuples / (double) relpages; + else + { + /* + * If we have no data because the relation was never vacuumed, + * estimate tuple width from attribute datatypes. We assume here + * that the pages are completely full, which is OK for tables + * (since they've presumably not been VACUUMed yet) but is + * probably an overestimate for indexes. Fortunately + * get_relation_info() can clamp the overestimate to the parent + * table's size. + * + * Note: this code intentionally disregards alignment + * considerations, because (a) that would be gilding the lily + * considering how crude the estimate is, and (b) it creates + * platform dependencies in the default plans which are kind of a + * headache for regression testing. + * + * XXX: Should this logic be more index specific? + */ + int32 tuple_width; + + tuple_width = get_rel_data_width(rel, attr_widths); + tuple_width += MAXALIGN(SizeofHeapTupleHeader); + tuple_width += sizeof(ItemIdData); + /* note: integer division is intentional here */ + density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width; + } + *tuples = rint(density * (double) curpages); + + /* + * We use relallvisible as-is, rather than scaling it up like we do + * for the pages and tuples counts, on the theory that any pages added + * since the last VACUUM are most likely not marked all-visible. But + * costsize.c wants it converted to a fraction. + */ + if (relallvisible == 0 || curpages <= 0) + *allvisfrac = 0; + else if ((double) relallvisible >= curpages) + *allvisfrac = 1; + else + *allvisfrac = (double) relallvisible / curpages; + } + else + { + /* + * Just use whatever's in pg_class. This covers foreign tables, + * sequences, and also relkinds without storage (shouldn't get here?); + * see initializations in AddNewRelationTuple(). Note that FDW must + * cope if reltuples is -1! + */ + *pages = rel->rd_rel->relpages; + *tuples = rel->rd_rel->reltuples; + *allvisfrac = 0; + } +} + + +/* + * get_rel_data_width + * + * Estimate the average width of (the data part of) the relation's tuples. + * + * If attr_widths isn't NULL, it points to the zero-index entry of the + * relation's attr_widths[] cache; use and update that cache as appropriate. + * + * Currently we ignore dropped columns. Ideally those should be included + * in the result, but we haven't got any way to get info about them; and + * since they might be mostly NULLs, treating them as zero-width is not + * necessarily the wrong thing anyway. + */ +int32 +get_rel_data_width(Relation rel, int32 *attr_widths) +{ + int32 tuple_width = 0; + int i; + + for (i = 1; i <= RelationGetNumberOfAttributes(rel); i++) + { + Form_pg_attribute att = TupleDescAttr(rel->rd_att, i - 1); + int32 item_width; + + if (att->attisdropped) + continue; + + /* use previously cached data, if any */ + if (attr_widths != NULL && attr_widths[i] > 0) + { + tuple_width += attr_widths[i]; + continue; + } + + /* This should match set_rel_width() in costsize.c */ + item_width = get_attavgwidth(RelationGetRelid(rel), i); + if (item_width <= 0) + { + item_width = get_typavgwidth(att->atttypid, att->atttypmod); + Assert(item_width > 0); + } + if (attr_widths != NULL) + attr_widths[i] = item_width; + tuple_width += item_width; + } + + return tuple_width; +} + +/* + * get_relation_data_width + * + * External API for get_rel_data_width: same behavior except we have to + * open the relcache entry. + */ +int32 +get_relation_data_width(Oid relid, int32 *attr_widths) +{ + int32 result; + Relation relation; + + /* As above, assume relation is already locked */ + relation = table_open(relid, NoLock); + + result = get_rel_data_width(relation, attr_widths); + + table_close(relation, NoLock); + + return result; +} + + +/* + * get_relation_constraints + * + * Retrieve the applicable constraint expressions of the given relation. + * + * Returns a List (possibly empty) of constraint expressions. Each one + * has been canonicalized, and its Vars are changed to have the varno + * indicated by rel->relid. This allows the expressions to be easily + * compared to expressions taken from WHERE. + * + * If include_noinherit is true, it's okay to include constraints that + * are marked NO INHERIT. + * + * If include_notnull is true, "col IS NOT NULL" expressions are generated + * and added to the result for each column that's marked attnotnull. + * + * If include_partition is true, and the relation is a partition, + * also include the partitioning constraints. + * + * Note: at present this is invoked at most once per relation per planner + * run, and in many cases it won't be invoked at all, so there seems no + * point in caching the data in RelOptInfo. + */ +static List * +get_relation_constraints(PlannerInfo *root, + Oid relationObjectId, RelOptInfo *rel, + bool include_noinherit, + bool include_notnull, + bool include_partition) +{ + List *result = NIL; + Index varno = rel->relid; + Relation relation; + TupleConstr *constr; + + /* + * We assume the relation has already been safely locked. + */ + relation = table_open(relationObjectId, NoLock); + + constr = relation->rd_att->constr; + if (constr != NULL) + { + int num_check = constr->num_check; + int i; + + for (i = 0; i < num_check; i++) + { + Node *cexpr; + + /* + * If this constraint hasn't been fully validated yet, we must + * ignore it here. Also ignore if NO INHERIT and we weren't told + * that that's safe. + */ + if (!constr->check[i].ccvalid) + continue; + if (constr->check[i].ccnoinherit && !include_noinherit) + continue; + + cexpr = stringToNode(constr->check[i].ccbin); + + /* + * Run each expression through const-simplification and + * canonicalization. This is not just an optimization, but is + * necessary, because we will be comparing it to + * similarly-processed qual clauses, and may fail to detect valid + * matches without this. This must match the processing done to + * qual clauses in preprocess_expression()! (We can skip the + * stuff involving subqueries, however, since we don't allow any + * in check constraints.) + */ + cexpr = eval_const_expressions(root, cexpr); + + cexpr = (Node *) canonicalize_qual((Expr *) cexpr, true); + + /* Fix Vars to have the desired varno */ + if (varno != 1) + ChangeVarNodes(cexpr, 1, varno, 0); + + /* + * Finally, convert to implicit-AND format (that is, a List) and + * append the resulting item(s) to our output list. + */ + result = list_concat(result, + make_ands_implicit((Expr *) cexpr)); + } + + /* Add NOT NULL constraints in expression form, if requested */ + if (include_notnull && constr->has_not_null) + { + int natts = relation->rd_att->natts; + + for (i = 1; i <= natts; i++) + { + Form_pg_attribute att = TupleDescAttr(relation->rd_att, i - 1); + + if (att->attnotnull && !att->attisdropped) + { + NullTest *ntest = makeNode(NullTest); + + ntest->arg = (Expr *) makeVar(varno, + i, + att->atttypid, + att->atttypmod, + att->attcollation, + 0); + ntest->nulltesttype = IS_NOT_NULL; + + /* + * argisrow=false is correct even for a composite column, + * because attnotnull does not represent a SQL-spec IS NOT + * NULL test in such a case, just IS DISTINCT FROM NULL. + */ + ntest->argisrow = false; + ntest->location = -1; + result = lappend(result, ntest); + } + } + } + } + + /* + * Add partitioning constraints, if requested. + */ + if (include_partition && relation->rd_rel->relispartition) + { + /* make sure rel->partition_qual is set */ + set_baserel_partition_constraint(relation, rel); + result = list_concat(result, rel->partition_qual); + } + + table_close(relation, NoLock); + + return result; +} + +/* + * Try loading data for the statistics object. + * + * We don't know if the data (specified by statOid and inh value) exist. + * The result is stored in stainfos list. + */ +static void +get_relation_statistics_worker(List **stainfos, RelOptInfo *rel, + Oid statOid, bool inh, + Bitmapset *keys, List *exprs) +{ + Form_pg_statistic_ext_data dataForm; + HeapTuple dtup; + + dtup = SearchSysCache2(STATEXTDATASTXOID, + ObjectIdGetDatum(statOid), BoolGetDatum(inh)); + if (!HeapTupleIsValid(dtup)) + return; + + dataForm = (Form_pg_statistic_ext_data) GETSTRUCT(dtup); + + /* add one StatisticExtInfo for each kind built */ + if (statext_is_kind_built(dtup, STATS_EXT_NDISTINCT)) + { + StatisticExtInfo *info = makeNode(StatisticExtInfo); + + info->statOid = statOid; + info->inherit = dataForm->stxdinherit; + info->rel = rel; + info->kind = STATS_EXT_NDISTINCT; + info->keys = bms_copy(keys); + info->exprs = exprs; + + *stainfos = lappend(*stainfos, info); + } + + if (statext_is_kind_built(dtup, STATS_EXT_DEPENDENCIES)) + { + StatisticExtInfo *info = makeNode(StatisticExtInfo); + + info->statOid = statOid; + info->inherit = dataForm->stxdinherit; + info->rel = rel; + info->kind = STATS_EXT_DEPENDENCIES; + info->keys = bms_copy(keys); + info->exprs = exprs; + + *stainfos = lappend(*stainfos, info); + } + + if (statext_is_kind_built(dtup, STATS_EXT_MCV)) + { + StatisticExtInfo *info = makeNode(StatisticExtInfo); + + info->statOid = statOid; + info->inherit = dataForm->stxdinherit; + info->rel = rel; + info->kind = STATS_EXT_MCV; + info->keys = bms_copy(keys); + info->exprs = exprs; + + *stainfos = lappend(*stainfos, info); + } + + if (statext_is_kind_built(dtup, STATS_EXT_EXPRESSIONS)) + { + StatisticExtInfo *info = makeNode(StatisticExtInfo); + + info->statOid = statOid; + info->inherit = dataForm->stxdinherit; + info->rel = rel; + info->kind = STATS_EXT_EXPRESSIONS; + info->keys = bms_copy(keys); + info->exprs = exprs; + + *stainfos = lappend(*stainfos, info); + } + + ReleaseSysCache(dtup); +} + +/* + * get_relation_statistics + * Retrieve extended statistics defined on the table. + * + * Returns a List (possibly empty) of StatisticExtInfo objects describing + * the statistics. Note that this doesn't load the actual statistics data, + * just the identifying metadata. Only stats actually built are considered. + */ +static List * +get_relation_statistics(RelOptInfo *rel, Relation relation) +{ + Index varno = rel->relid; + List *statoidlist; + List *stainfos = NIL; + ListCell *l; + + statoidlist = RelationGetStatExtList(relation); + + foreach(l, statoidlist) + { + Oid statOid = lfirst_oid(l); + Form_pg_statistic_ext staForm; + HeapTuple htup; + Bitmapset *keys = NULL; + List *exprs = NIL; + int i; + + htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid)); + if (!HeapTupleIsValid(htup)) + elog(ERROR, "cache lookup failed for statistics object %u", statOid); + staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); + + /* + * First, build the array of columns covered. This is ultimately + * wasted if no stats within the object have actually been built, but + * it doesn't seem worth troubling over that case. + */ + for (i = 0; i < staForm->stxkeys.dim1; i++) + keys = bms_add_member(keys, staForm->stxkeys.values[i]); + + /* + * Preprocess expressions (if any). We read the expressions, run them + * through eval_const_expressions, and fix the varnos. + * + * XXX We don't know yet if there are any data for this stats object, + * with either stxdinherit value. But it's reasonable to assume there + * is at least one of those, possibly both. So it's better to process + * keys and expressions here. + */ + { + bool isnull; + Datum datum; + + /* decode expression (if any) */ + datum = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_stxexprs, &isnull); + + if (!isnull) + { + char *exprsString; + + exprsString = TextDatumGetCString(datum); + exprs = (List *) stringToNode(exprsString); + pfree(exprsString); + + /* + * Run the expressions through eval_const_expressions. This is + * not just an optimization, but is necessary, because the + * planner will be comparing them to similarly-processed qual + * clauses, and may fail to detect valid matches without this. + * We must not use canonicalize_qual, however, since these + * aren't qual expressions. + */ + exprs = (List *) eval_const_expressions(NULL, (Node *) exprs); + + /* May as well fix opfuncids too */ + fix_opfuncids((Node *) exprs); + + /* + * Modify the copies we obtain from the relcache to have the + * correct varno for the parent relation, so that they match + * up correctly against qual clauses. + */ + if (varno != 1) + ChangeVarNodes((Node *) exprs, 1, varno, 0); + } + } + + /* extract statistics for possible values of stxdinherit flag */ + + get_relation_statistics_worker(&stainfos, rel, statOid, true, keys, exprs); + + get_relation_statistics_worker(&stainfos, rel, statOid, false, keys, exprs); + + ReleaseSysCache(htup); + bms_free(keys); + } + + list_free(statoidlist); + + return stainfos; +} + +/* + * relation_excluded_by_constraints + * + * Detect whether the relation need not be scanned because it has either + * self-inconsistent restrictions, or restrictions inconsistent with the + * relation's applicable constraints. + * + * Note: this examines only rel->relid, rel->reloptkind, and + * rel->baserestrictinfo; therefore it can be called before filling in + * other fields of the RelOptInfo. + */ +bool +relation_excluded_by_constraints(PlannerInfo *root, + RelOptInfo *rel, RangeTblEntry *rte) +{ + bool include_noinherit; + bool include_notnull; + bool include_partition = false; + List *safe_restrictions; + List *constraint_pred; + List *safe_constraints; + ListCell *lc; + + /* As of now, constraint exclusion works only with simple relations. */ + Assert(IS_SIMPLE_REL(rel)); + + /* + * If there are no base restriction clauses, we have no hope of proving + * anything below, so fall out quickly. + */ + if (rel->baserestrictinfo == NIL) + return false; + + /* + * Regardless of the setting of constraint_exclusion, detect + * constant-FALSE-or-NULL restriction clauses. Because const-folding will + * reduce "anything AND FALSE" to just "FALSE", any such case should + * result in exactly one baserestrictinfo entry. This doesn't fire very + * often, but it seems cheap enough to be worth doing anyway. (Without + * this, we'd miss some optimizations that 9.5 and earlier found via much + * more roundabout methods.) + */ + if (list_length(rel->baserestrictinfo) == 1) + { + RestrictInfo *rinfo = (RestrictInfo *) linitial(rel->baserestrictinfo); + Expr *clause = rinfo->clause; + + if (clause && IsA(clause, Const) && + (((Const *) clause)->constisnull || + !DatumGetBool(((Const *) clause)->constvalue))) + return true; + } + + /* + * Skip further tests, depending on constraint_exclusion. + */ + switch (constraint_exclusion) + { + case CONSTRAINT_EXCLUSION_OFF: + /* In 'off' mode, never make any further tests */ + return false; + + case CONSTRAINT_EXCLUSION_PARTITION: + + /* + * When constraint_exclusion is set to 'partition' we only handle + * appendrel members. Partition pruning has already been applied, + * so there is no need to consider the rel's partition constraints + * here. + */ + if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL) + break; /* appendrel member, so process it */ + return false; + + case CONSTRAINT_EXCLUSION_ON: + + /* + * In 'on' mode, always apply constraint exclusion. If we are + * considering a baserel that is a partition (i.e., it was + * directly named rather than expanded from a parent table), then + * its partition constraints haven't been considered yet, so + * include them in the processing here. + */ + if (rel->reloptkind == RELOPT_BASEREL) + include_partition = true; + break; /* always try to exclude */ + } + + /* + * Check for self-contradictory restriction clauses. We dare not make + * deductions with non-immutable functions, but any immutable clauses that + * are self-contradictory allow us to conclude the scan is unnecessary. + * + * Note: strip off RestrictInfo because predicate_refuted_by() isn't + * expecting to see any in its predicate argument. + */ + safe_restrictions = NIL; + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (!contain_mutable_functions((Node *) rinfo->clause)) + safe_restrictions = lappend(safe_restrictions, rinfo->clause); + } + + /* + * We can use weak refutation here, since we're comparing restriction + * clauses with restriction clauses. + */ + if (predicate_refuted_by(safe_restrictions, safe_restrictions, true)) + return true; + + /* + * Only plain relations have constraints, so stop here for other rtekinds. + */ + if (rte->rtekind != RTE_RELATION) + return false; + + /* + * If we are scanning just this table, we can use NO INHERIT constraints, + * but not if we're scanning its children too. (Note that partitioned + * tables should never have NO INHERIT constraints; but it's not necessary + * for us to assume that here.) + */ + include_noinherit = !rte->inh; + + /* + * Currently, attnotnull constraints must be treated as NO INHERIT unless + * this is a partitioned table. In future we might track their + * inheritance status more accurately, allowing this to be refined. + */ + include_notnull = (!rte->inh || rte->relkind == RELKIND_PARTITIONED_TABLE); + + /* + * Fetch the appropriate set of constraint expressions. + */ + constraint_pred = get_relation_constraints(root, rte->relid, rel, + include_noinherit, + include_notnull, + include_partition); + + /* + * We do not currently enforce that CHECK constraints contain only + * immutable functions, so it's necessary to check here. We daren't draw + * conclusions from plan-time evaluation of non-immutable functions. Since + * they're ANDed, we can just ignore any mutable constraints in the list, + * and reason about the rest. + */ + safe_constraints = NIL; + foreach(lc, constraint_pred) + { + Node *pred = (Node *) lfirst(lc); + + if (!contain_mutable_functions(pred)) + safe_constraints = lappend(safe_constraints, pred); + } + + /* + * The constraints are effectively ANDed together, so we can just try to + * refute the entire collection at once. This may allow us to make proofs + * that would fail if we took them individually. + * + * Note: we use rel->baserestrictinfo, not safe_restrictions as might seem + * an obvious optimization. Some of the clauses might be OR clauses that + * have volatile and nonvolatile subclauses, and it's OK to make + * deductions with the nonvolatile parts. + * + * We need strong refutation because we have to prove that the constraints + * would yield false, not just NULL. + */ + if (predicate_refuted_by(safe_constraints, rel->baserestrictinfo, false)) + return true; + + return false; +} + + +/* + * build_physical_tlist + * + * Build a targetlist consisting of exactly the relation's user attributes, + * in order. The executor can special-case such tlists to avoid a projection + * step at runtime, so we use such tlists preferentially for scan nodes. + * + * Exception: if there are any dropped or missing columns, we punt and return + * NIL. Ideally we would like to handle these cases too. However this + * creates problems for ExecTypeFromTL, which may be asked to build a tupdesc + * for a tlist that includes vars of no-longer-existent types. In theory we + * could dig out the required info from the pg_attribute entries of the + * relation, but that data is not readily available to ExecTypeFromTL. + * For now, we don't apply the physical-tlist optimization when there are + * dropped cols. + * + * We also support building a "physical" tlist for subqueries, functions, + * values lists, table expressions, and CTEs, since the same optimization can + * occur in SubqueryScan, FunctionScan, ValuesScan, CteScan, TableFunc, + * NamedTuplestoreScan, and WorkTableScan nodes. + */ +List * +build_physical_tlist(PlannerInfo *root, RelOptInfo *rel) +{ + List *tlist = NIL; + Index varno = rel->relid; + RangeTblEntry *rte = planner_rt_fetch(varno, root); + Relation relation; + Query *subquery; + Var *var; + ListCell *l; + int attrno, + numattrs; + List *colvars; + + switch (rte->rtekind) + { + case RTE_RELATION: + /* Assume we already have adequate lock */ + relation = table_open(rte->relid, NoLock); + + numattrs = RelationGetNumberOfAttributes(relation); + for (attrno = 1; attrno <= numattrs; attrno++) + { + Form_pg_attribute att_tup = TupleDescAttr(relation->rd_att, + attrno - 1); + + if (att_tup->attisdropped || att_tup->atthasmissing) + { + /* found a dropped or missing col, so punt */ + tlist = NIL; + break; + } + + var = makeVar(varno, + attrno, + att_tup->atttypid, + att_tup->atttypmod, + att_tup->attcollation, + 0); + + tlist = lappend(tlist, + makeTargetEntry((Expr *) var, + attrno, + NULL, + false)); + } + + table_close(relation, NoLock); + break; + + case RTE_SUBQUERY: + subquery = rte->subquery; + foreach(l, subquery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + /* + * A resjunk column of the subquery can be reflected as + * resjunk in the physical tlist; we need not punt. + */ + var = makeVarFromTargetEntry(varno, tle); + + tlist = lappend(tlist, + makeTargetEntry((Expr *) var, + tle->resno, + NULL, + tle->resjunk)); + } + break; + + case RTE_FUNCTION: + case RTE_TABLEFUNC: + case RTE_VALUES: + case RTE_CTE: + case RTE_NAMEDTUPLESTORE: + case RTE_RESULT: + /* Not all of these can have dropped cols, but share code anyway */ + expandRTE(rte, varno, 0, -1, true /* include dropped */ , + NULL, &colvars); + foreach(l, colvars) + { + var = (Var *) lfirst(l); + + /* + * A non-Var in expandRTE's output means a dropped column; + * must punt. + */ + if (!IsA(var, Var)) + { + tlist = NIL; + break; + } + + tlist = lappend(tlist, + makeTargetEntry((Expr *) var, + var->varattno, + NULL, + false)); + } + break; + + default: + /* caller error */ + elog(ERROR, "unsupported RTE kind %d in build_physical_tlist", + (int) rte->rtekind); + break; + } + + return tlist; +} + +/* + * build_index_tlist + * + * Build a targetlist representing the columns of the specified index. + * Each column is represented by a Var for the corresponding base-relation + * column, or an expression in base-relation Vars, as appropriate. + * + * There are never any dropped columns in indexes, so unlike + * build_physical_tlist, we need no failure case. + */ +static List * +build_index_tlist(PlannerInfo *root, IndexOptInfo *index, + Relation heapRelation) +{ + List *tlist = NIL; + Index varno = index->rel->relid; + ListCell *indexpr_item; + int i; + + indexpr_item = list_head(index->indexprs); + for (i = 0; i < index->ncolumns; i++) + { + int indexkey = index->indexkeys[i]; + Expr *indexvar; + + if (indexkey != 0) + { + /* simple column */ + const FormData_pg_attribute *att_tup; + + if (indexkey < 0) + att_tup = SystemAttributeDefinition(indexkey); + else + att_tup = TupleDescAttr(heapRelation->rd_att, indexkey - 1); + + indexvar = (Expr *) makeVar(varno, + indexkey, + att_tup->atttypid, + att_tup->atttypmod, + att_tup->attcollation, + 0); + } + else + { + /* expression column */ + if (indexpr_item == NULL) + elog(ERROR, "wrong number of index expressions"); + indexvar = (Expr *) lfirst(indexpr_item); + indexpr_item = lnext(index->indexprs, indexpr_item); + } + + tlist = lappend(tlist, + makeTargetEntry(indexvar, + i + 1, + NULL, + false)); + } + if (indexpr_item != NULL) + elog(ERROR, "wrong number of index expressions"); + + return tlist; +} + +/* + * restriction_selectivity + * + * Returns the selectivity of a specified restriction operator clause. + * This code executes registered procedures stored in the + * operator relation, by calling the function manager. + * + * See clause_selectivity() for the meaning of the additional parameters. + */ +Selectivity +restriction_selectivity(PlannerInfo *root, + Oid operatorid, + List *args, + Oid inputcollid, + int varRelid) +{ + RegProcedure oprrest = get_oprrest(operatorid); + float8 result; + + /* + * if the oprrest procedure is missing for whatever reason, use a + * selectivity of 0.5 + */ + if (!oprrest) + return (Selectivity) 0.5; + + result = DatumGetFloat8(OidFunctionCall4Coll(oprrest, + inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operatorid), + PointerGetDatum(args), + Int32GetDatum(varRelid))); + + if (result < 0.0 || result > 1.0) + elog(ERROR, "invalid restriction selectivity: %f", result); + + return (Selectivity) result; +} + +/* + * join_selectivity + * + * Returns the selectivity of a specified join operator clause. + * This code executes registered procedures stored in the + * operator relation, by calling the function manager. + * + * See clause_selectivity() for the meaning of the additional parameters. + */ +Selectivity +join_selectivity(PlannerInfo *root, + Oid operatorid, + List *args, + Oid inputcollid, + JoinType jointype, + SpecialJoinInfo *sjinfo) +{ + RegProcedure oprjoin = get_oprjoin(operatorid); + float8 result; + + /* + * if the oprjoin procedure is missing for whatever reason, use a + * selectivity of 0.5 + */ + if (!oprjoin) + return (Selectivity) 0.5; + + result = DatumGetFloat8(OidFunctionCall5Coll(oprjoin, + inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operatorid), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); + + if (result < 0.0 || result > 1.0) + elog(ERROR, "invalid join selectivity: %f", result); + + return (Selectivity) result; +} + +/* + * function_selectivity + * + * Returns the selectivity of a specified boolean function clause. + * This code executes registered procedures stored in the + * pg_proc relation, by calling the function manager. + * + * See clause_selectivity() for the meaning of the additional parameters. + */ +Selectivity +function_selectivity(PlannerInfo *root, + Oid funcid, + List *args, + Oid inputcollid, + bool is_join, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo) +{ + RegProcedure prosupport = get_func_support(funcid); + SupportRequestSelectivity req; + SupportRequestSelectivity *sresult; + + /* + * If no support function is provided, use our historical default + * estimate, 0.3333333. This seems a pretty unprincipled choice, but + * Postgres has been using that estimate for function calls since 1992. + * The hoariness of this behavior suggests that we should not be in too + * much hurry to use another value. + */ + if (!prosupport) + return (Selectivity) 0.3333333; + + req.type = T_SupportRequestSelectivity; + req.root = root; + req.funcid = funcid; + req.args = args; + req.inputcollid = inputcollid; + req.is_join = is_join; + req.varRelid = varRelid; + req.jointype = jointype; + req.sjinfo = sjinfo; + req.selectivity = -1; /* to catch failure to set the value */ + + sresult = (SupportRequestSelectivity *) + DatumGetPointer(OidFunctionCall1(prosupport, + PointerGetDatum(&req))); + + /* If support function fails, use default */ + if (sresult != &req) + return (Selectivity) 0.3333333; + + if (req.selectivity < 0.0 || req.selectivity > 1.0) + elog(ERROR, "invalid function selectivity: %f", req.selectivity); + + return (Selectivity) req.selectivity; +} + +/* + * add_function_cost + * + * Get an estimate of the execution cost of a function, and *add* it to + * the contents of *cost. The estimate may include both one-time and + * per-tuple components, since QualCost does. + * + * The funcid must always be supplied. If it is being called as the + * implementation of a specific parsetree node (FuncExpr, OpExpr, + * WindowFunc, etc), pass that as "node", else pass NULL. + * + * In some usages root might be NULL, too. + */ +void +add_function_cost(PlannerInfo *root, Oid funcid, Node *node, + QualCost *cost) +{ + HeapTuple proctup; + Form_pg_proc procform; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + elog(ERROR, "cache lookup failed for function %u", funcid); + procform = (Form_pg_proc) GETSTRUCT(proctup); + + if (OidIsValid(procform->prosupport)) + { + SupportRequestCost req; + SupportRequestCost *sresult; + + req.type = T_SupportRequestCost; + req.root = root; + req.funcid = funcid; + req.node = node; + + /* Initialize cost fields so that support function doesn't have to */ + req.startup = 0; + req.per_tuple = 0; + + sresult = (SupportRequestCost *) + DatumGetPointer(OidFunctionCall1(procform->prosupport, + PointerGetDatum(&req))); + + if (sresult == &req) + { + /* Success, so accumulate support function's estimate into *cost */ + cost->startup += req.startup; + cost->per_tuple += req.per_tuple; + ReleaseSysCache(proctup); + return; + } + } + + /* No support function, or it failed, so rely on procost */ + cost->per_tuple += procform->procost * cpu_operator_cost; + + ReleaseSysCache(proctup); +} + +/* + * get_function_rows + * + * Get an estimate of the number of rows returned by a set-returning function. + * + * The funcid must always be supplied. In current usage, the calling node + * will always be supplied, and will be either a FuncExpr or OpExpr. + * But it's a good idea to not fail if it's NULL. + * + * In some usages root might be NULL, too. + * + * Note: this returns the unfiltered result of the support function, if any. + * It's usually a good idea to apply clamp_row_est() to the result, but we + * leave it to the caller to do so. + */ +double +get_function_rows(PlannerInfo *root, Oid funcid, Node *node) +{ + HeapTuple proctup; + Form_pg_proc procform; + double result; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + elog(ERROR, "cache lookup failed for function %u", funcid); + procform = (Form_pg_proc) GETSTRUCT(proctup); + + Assert(procform->proretset); /* else caller error */ + + if (OidIsValid(procform->prosupport)) + { + SupportRequestRows req; + SupportRequestRows *sresult; + + req.type = T_SupportRequestRows; + req.root = root; + req.funcid = funcid; + req.node = node; + + req.rows = 0; /* just for sanity */ + + sresult = (SupportRequestRows *) + DatumGetPointer(OidFunctionCall1(procform->prosupport, + PointerGetDatum(&req))); + + if (sresult == &req) + { + /* Success */ + ReleaseSysCache(proctup); + return req.rows; + } + } + + /* No support function, or it failed, so rely on prorows */ + result = procform->prorows; + + ReleaseSysCache(proctup); + + return result; +} + +/* + * has_unique_index + * + * Detect whether there is a unique index on the specified attribute + * of the specified relation, thus allowing us to conclude that all + * the (non-null) values of the attribute are distinct. + * + * This function does not check the index's indimmediate property, which + * means that uniqueness may transiently fail to hold intra-transaction. + * That's appropriate when we are making statistical estimates, but beware + * of using this for any correctness proofs. + */ +bool +has_unique_index(RelOptInfo *rel, AttrNumber attno) +{ + ListCell *ilist; + + foreach(ilist, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); + + /* + * Note: ignore partial indexes, since they don't allow us to conclude + * that all attr values are distinct, *unless* they are marked predOK + * which means we know the index's predicate is satisfied by the + * query. We don't take any interest in expressional indexes either. + * Also, a multicolumn unique index doesn't allow us to conclude that + * just the specified attr is unique. + */ + if (index->unique && + index->nkeycolumns == 1 && + index->indexkeys[0] == attno && + (index->indpred == NIL || index->predOK)) + return true; + } + return false; +} + + +/* + * has_row_triggers + * + * Detect whether the specified relation has any row-level triggers for event. + */ +bool +has_row_triggers(PlannerInfo *root, Index rti, CmdType event) +{ + RangeTblEntry *rte = planner_rt_fetch(rti, root); + Relation relation; + TriggerDesc *trigDesc; + bool result = false; + + /* Assume we already have adequate lock */ + relation = table_open(rte->relid, NoLock); + + trigDesc = relation->trigdesc; + switch (event) + { + case CMD_INSERT: + if (trigDesc && + (trigDesc->trig_insert_after_row || + trigDesc->trig_insert_before_row)) + result = true; + break; + case CMD_UPDATE: + if (trigDesc && + (trigDesc->trig_update_after_row || + trigDesc->trig_update_before_row)) + result = true; + break; + case CMD_DELETE: + if (trigDesc && + (trigDesc->trig_delete_after_row || + trigDesc->trig_delete_before_row)) + result = true; + break; + /* There is no separate event for MERGE, only INSERT/UPDATE/DELETE */ + case CMD_MERGE: + result = false; + break; + default: + elog(ERROR, "unrecognized CmdType: %d", (int) event); + break; + } + + table_close(relation, NoLock); + return result; +} + +/* + * has_stored_generated_columns + * + * Does table identified by RTI have any STORED GENERATED columns? + */ +bool +has_stored_generated_columns(PlannerInfo *root, Index rti) +{ + RangeTblEntry *rte = planner_rt_fetch(rti, root); + Relation relation; + TupleDesc tupdesc; + bool result = false; + + /* Assume we already have adequate lock */ + relation = table_open(rte->relid, NoLock); + + tupdesc = RelationGetDescr(relation); + result = tupdesc->constr && tupdesc->constr->has_generated_stored; + + table_close(relation, NoLock); + + return result; +} + +/* + * get_dependent_generated_columns + * + * Get the column numbers of any STORED GENERATED columns of the relation + * that depend on any column listed in target_cols. Both the input and + * result bitmapsets contain column numbers offset by + * FirstLowInvalidHeapAttributeNumber. + */ +Bitmapset * +get_dependent_generated_columns(PlannerInfo *root, Index rti, + Bitmapset *target_cols) +{ + Bitmapset *dependentCols = NULL; + RangeTblEntry *rte = planner_rt_fetch(rti, root); + Relation relation; + TupleDesc tupdesc; + TupleConstr *constr; + + /* Assume we already have adequate lock */ + relation = table_open(rte->relid, NoLock); + + tupdesc = RelationGetDescr(relation); + constr = tupdesc->constr; + + if (constr && constr->has_generated_stored) + { + for (int i = 0; i < constr->num_defval; i++) + { + AttrDefault *defval = &constr->defval[i]; + Node *expr; + Bitmapset *attrs_used = NULL; + + /* skip if not generated column */ + if (!TupleDescAttr(tupdesc, defval->adnum - 1)->attgenerated) + continue; + + /* identify columns this generated column depends on */ + expr = stringToNode(defval->adbin); + pull_varattnos(expr, 1, &attrs_used); + + if (bms_overlap(target_cols, attrs_used)) + dependentCols = bms_add_member(dependentCols, + defval->adnum - FirstLowInvalidHeapAttributeNumber); + } + } + + table_close(relation, NoLock); + + return dependentCols; +} + +/* + * set_relation_partition_info + * + * Set partitioning scheme and related information for a partitioned table. + */ +static void +set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, + Relation relation) +{ + PartitionDesc partdesc; + + /* + * Create the PartitionDirectory infrastructure if we didn't already. + */ + if (root->glob->partition_directory == NULL) + { + root->glob->partition_directory = + CreatePartitionDirectory(CurrentMemoryContext, true); + } + + partdesc = PartitionDirectoryLookup(root->glob->partition_directory, + relation); + rel->part_scheme = find_partition_scheme(root, relation); + Assert(partdesc != NULL && rel->part_scheme != NULL); + rel->boundinfo = partdesc->boundinfo; + rel->nparts = partdesc->nparts; + set_baserel_partition_key_exprs(relation, rel); + set_baserel_partition_constraint(relation, rel); +} + +/* + * find_partition_scheme + * + * Find or create a PartitionScheme for this Relation. + */ +static PartitionScheme +find_partition_scheme(PlannerInfo *root, Relation relation) +{ + PartitionKey partkey = RelationGetPartitionKey(relation); + ListCell *lc; + int partnatts, + i; + PartitionScheme part_scheme; + + /* A partitioned table should have a partition key. */ + Assert(partkey != NULL); + + partnatts = partkey->partnatts; + + /* Search for a matching partition scheme and return if found one. */ + foreach(lc, root->part_schemes) + { + part_scheme = lfirst(lc); + + /* Match partitioning strategy and number of keys. */ + if (partkey->strategy != part_scheme->strategy || + partnatts != part_scheme->partnatts) + continue; + + /* Match partition key type properties. */ + if (memcmp(partkey->partopfamily, part_scheme->partopfamily, + sizeof(Oid) * partnatts) != 0 || + memcmp(partkey->partopcintype, part_scheme->partopcintype, + sizeof(Oid) * partnatts) != 0 || + memcmp(partkey->partcollation, part_scheme->partcollation, + sizeof(Oid) * partnatts) != 0) + continue; + + /* + * Length and byval information should match when partopcintype + * matches. + */ + Assert(memcmp(partkey->parttyplen, part_scheme->parttyplen, + sizeof(int16) * partnatts) == 0); + Assert(memcmp(partkey->parttypbyval, part_scheme->parttypbyval, + sizeof(bool) * partnatts) == 0); + + /* + * If partopfamily and partopcintype matched, must have the same + * partition comparison functions. Note that we cannot reliably + * Assert the equality of function structs themselves for they might + * be different across PartitionKey's, so just Assert for the function + * OIDs. + */ +#ifdef USE_ASSERT_CHECKING + for (i = 0; i < partkey->partnatts; i++) + Assert(partkey->partsupfunc[i].fn_oid == + part_scheme->partsupfunc[i].fn_oid); +#endif + + /* Found matching partition scheme. */ + return part_scheme; + } + + /* + * Did not find matching partition scheme. Create one copying relevant + * information from the relcache. We need to copy the contents of the + * array since the relcache entry may not survive after we have closed the + * relation. + */ + part_scheme = (PartitionScheme) palloc0(sizeof(PartitionSchemeData)); + part_scheme->strategy = partkey->strategy; + part_scheme->partnatts = partkey->partnatts; + + part_scheme->partopfamily = (Oid *) palloc(sizeof(Oid) * partnatts); + memcpy(part_scheme->partopfamily, partkey->partopfamily, + sizeof(Oid) * partnatts); + + part_scheme->partopcintype = (Oid *) palloc(sizeof(Oid) * partnatts); + memcpy(part_scheme->partopcintype, partkey->partopcintype, + sizeof(Oid) * partnatts); + + part_scheme->partcollation = (Oid *) palloc(sizeof(Oid) * partnatts); + memcpy(part_scheme->partcollation, partkey->partcollation, + sizeof(Oid) * partnatts); + + part_scheme->parttyplen = (int16 *) palloc(sizeof(int16) * partnatts); + memcpy(part_scheme->parttyplen, partkey->parttyplen, + sizeof(int16) * partnatts); + + part_scheme->parttypbyval = (bool *) palloc(sizeof(bool) * partnatts); + memcpy(part_scheme->parttypbyval, partkey->parttypbyval, + sizeof(bool) * partnatts); + + part_scheme->partsupfunc = (FmgrInfo *) + palloc(sizeof(FmgrInfo) * partnatts); + for (i = 0; i < partnatts; i++) + fmgr_info_copy(&part_scheme->partsupfunc[i], &partkey->partsupfunc[i], + CurrentMemoryContext); + + /* Add the partitioning scheme to PlannerInfo. */ + root->part_schemes = lappend(root->part_schemes, part_scheme); + + return part_scheme; +} + +/* + * set_baserel_partition_key_exprs + * + * Builds partition key expressions for the given base relation and fills + * rel->partexprs. + */ +static void +set_baserel_partition_key_exprs(Relation relation, + RelOptInfo *rel) +{ + PartitionKey partkey = RelationGetPartitionKey(relation); + int partnatts; + int cnt; + List **partexprs; + ListCell *lc; + Index varno = rel->relid; + + Assert(IS_SIMPLE_REL(rel) && rel->relid > 0); + + /* A partitioned table should have a partition key. */ + Assert(partkey != NULL); + + partnatts = partkey->partnatts; + partexprs = (List **) palloc(sizeof(List *) * partnatts); + lc = list_head(partkey->partexprs); + + for (cnt = 0; cnt < partnatts; cnt++) + { + Expr *partexpr; + AttrNumber attno = partkey->partattrs[cnt]; + + if (attno != InvalidAttrNumber) + { + /* Single column partition key is stored as a Var node. */ + Assert(attno > 0); + + partexpr = (Expr *) makeVar(varno, attno, + partkey->parttypid[cnt], + partkey->parttypmod[cnt], + partkey->parttypcoll[cnt], 0); + } + else + { + if (lc == NULL) + elog(ERROR, "wrong number of partition key expressions"); + + /* Re-stamp the expression with given varno. */ + partexpr = (Expr *) copyObject(lfirst(lc)); + ChangeVarNodes((Node *) partexpr, 1, varno, 0); + lc = lnext(partkey->partexprs, lc); + } + + /* Base relations have a single expression per key. */ + partexprs[cnt] = list_make1(partexpr); + } + + rel->partexprs = partexprs; + + /* + * A base relation does not have nullable partition key expressions, since + * no outer join is involved. We still allocate an array of empty + * expression lists to keep partition key expression handling code simple. + * See build_joinrel_partition_info() and match_expr_to_partition_keys(). + */ + rel->nullable_partexprs = (List **) palloc0(sizeof(List *) * partnatts); +} + +/* + * set_baserel_partition_constraint + * + * Builds the partition constraint for the given base relation and sets it + * in the given RelOptInfo. All Var nodes are restamped with the relid of the + * given relation. + */ +static void +set_baserel_partition_constraint(Relation relation, RelOptInfo *rel) +{ + List *partconstr; + + if (rel->partition_qual) /* already done */ + return; + + /* + * Run the partition quals through const-simplification similar to check + * constraints. We skip canonicalize_qual, though, because partition + * quals should be in canonical form already; also, since the qual is in + * implicit-AND format, we'd have to explicitly convert it to explicit-AND + * format and back again. + */ + partconstr = RelationGetPartitionQual(relation); + if (partconstr) + { + partconstr = (List *) expression_planner((Expr *) partconstr); + if (rel->relid != 1) + ChangeVarNodes((Node *) partconstr, 1, rel->relid, 0); + rel->partition_qual = partconstr; + } +} diff --git a/src/backend/optimizer/util/predtest.c b/src/backend/optimizer/util/predtest.c new file mode 100644 index 0000000..182d5b1 --- /dev/null +++ b/src/backend/optimizer/util/predtest.c @@ -0,0 +1,2224 @@ +/*------------------------------------------------------------------------- + * + * predtest.c + * Routines to attempt to prove logical implications between predicate + * expressions. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/predtest.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/pathnodes.h" +#include "optimizer/optimizer.h" +#include "utils/array.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + + +/* + * Proof attempts involving large arrays in ScalarArrayOpExpr nodes are + * likely to require O(N^2) time, and more often than not fail anyway. + * So we set an arbitrary limit on the number of array elements that + * we will allow to be treated as an AND or OR clause. + * XXX is it worth exposing this as a GUC knob? + */ +#define MAX_SAOP_ARRAY_SIZE 100 + +/* + * To avoid redundant coding in predicate_implied_by_recurse and + * predicate_refuted_by_recurse, we need to abstract out the notion of + * iterating over the components of an expression that is logically an AND + * or OR structure. There are multiple sorts of expression nodes that can + * be treated as ANDs or ORs, and we don't want to code each one separately. + * Hence, these types and support routines. + */ +typedef enum +{ + CLASS_ATOM, /* expression that's not AND or OR */ + CLASS_AND, /* expression with AND semantics */ + CLASS_OR /* expression with OR semantics */ +} PredClass; + +typedef struct PredIterInfoData *PredIterInfo; + +typedef struct PredIterInfoData +{ + /* node-type-specific iteration state */ + void *state; + List *state_list; + /* initialize to do the iteration */ + void (*startup_fn) (Node *clause, PredIterInfo info); + /* next-component iteration function */ + Node *(*next_fn) (PredIterInfo info); + /* release resources when done with iteration */ + void (*cleanup_fn) (PredIterInfo info); +} PredIterInfoData; + +#define iterate_begin(item, clause, info) \ + do { \ + Node *item; \ + (info).startup_fn((clause), &(info)); \ + while ((item = (info).next_fn(&(info))) != NULL) + +#define iterate_end(info) \ + (info).cleanup_fn(&(info)); \ + } while (0) + + +static bool predicate_implied_by_recurse(Node *clause, Node *predicate, + bool weak); +static bool predicate_refuted_by_recurse(Node *clause, Node *predicate, + bool weak); +static PredClass predicate_classify(Node *clause, PredIterInfo info); +static void list_startup_fn(Node *clause, PredIterInfo info); +static Node *list_next_fn(PredIterInfo info); +static void list_cleanup_fn(PredIterInfo info); +static void boolexpr_startup_fn(Node *clause, PredIterInfo info); +static void arrayconst_startup_fn(Node *clause, PredIterInfo info); +static Node *arrayconst_next_fn(PredIterInfo info); +static void arrayconst_cleanup_fn(PredIterInfo info); +static void arrayexpr_startup_fn(Node *clause, PredIterInfo info); +static Node *arrayexpr_next_fn(PredIterInfo info); +static void arrayexpr_cleanup_fn(PredIterInfo info); +static bool predicate_implied_by_simple_clause(Expr *predicate, Node *clause, + bool weak); +static bool predicate_refuted_by_simple_clause(Expr *predicate, Node *clause, + bool weak); +static Node *extract_not_arg(Node *clause); +static Node *extract_strong_not_arg(Node *clause); +static bool clause_is_strict_for(Node *clause, Node *subexpr, bool allow_false); +static bool operator_predicate_proof(Expr *predicate, Node *clause, + bool refute_it, bool weak); +static bool operator_same_subexprs_proof(Oid pred_op, Oid clause_op, + bool refute_it); +static bool operator_same_subexprs_lookup(Oid pred_op, Oid clause_op, + bool refute_it); +static Oid get_btree_test_op(Oid pred_op, Oid clause_op, bool refute_it); +static void InvalidateOprProofCacheCallBack(Datum arg, int cacheid, uint32 hashvalue); + + +/* + * predicate_implied_by + * Recursively checks whether the clauses in clause_list imply that the + * given predicate is true. + * + * We support two definitions of implication: + * + * "Strong" implication: A implies B means that truth of A implies truth of B. + * We use this to prove that a row satisfying one WHERE clause or index + * predicate must satisfy another one. + * + * "Weak" implication: A implies B means that non-falsity of A implies + * non-falsity of B ("non-false" means "either true or NULL"). We use this to + * prove that a row satisfying one CHECK constraint must satisfy another one. + * + * Strong implication can also be used to prove that a WHERE clause implies a + * CHECK constraint, although it will fail to prove a few cases where we could + * safely conclude that the implication holds. There's no support for proving + * the converse case, since only a few kinds of CHECK constraint would allow + * deducing anything. + * + * The top-level List structure of each list corresponds to an AND list. + * We assume that eval_const_expressions() has been applied and so there + * are no un-flattened ANDs or ORs (e.g., no AND immediately within an AND, + * including AND just below the top-level List structure). + * If this is not true we might fail to prove an implication that is + * valid, but no worse consequences will ensue. + * + * We assume the predicate has already been checked to contain only + * immutable functions and operators. (In many current uses this is known + * true because the predicate is part of an index predicate that has passed + * CheckPredicate(); otherwise, the caller must check it.) We dare not make + * deductions based on non-immutable functions, because they might change + * answers between the time we make the plan and the time we execute the plan. + * Immutability of functions in the clause_list is checked here, if necessary. + */ +bool +predicate_implied_by(List *predicate_list, List *clause_list, + bool weak) +{ + Node *p, + *c; + + if (predicate_list == NIL) + return true; /* no predicate: implication is vacuous */ + if (clause_list == NIL) + return false; /* no restriction: implication must fail */ + + /* + * If either input is a single-element list, replace it with its lone + * member; this avoids one useless level of AND-recursion. We only need + * to worry about this at top level, since eval_const_expressions should + * have gotten rid of any trivial ANDs or ORs below that. + */ + if (list_length(predicate_list) == 1) + p = (Node *) linitial(predicate_list); + else + p = (Node *) predicate_list; + if (list_length(clause_list) == 1) + c = (Node *) linitial(clause_list); + else + c = (Node *) clause_list; + + /* And away we go ... */ + return predicate_implied_by_recurse(c, p, weak); +} + +/* + * predicate_refuted_by + * Recursively checks whether the clauses in clause_list refute the given + * predicate (that is, prove it false). + * + * This is NOT the same as !(predicate_implied_by), though it is similar + * in the technique and structure of the code. + * + * We support two definitions of refutation: + * + * "Strong" refutation: A refutes B means truth of A implies falsity of B. + * We use this to disprove a CHECK constraint given a WHERE clause, i.e., + * prove that any row satisfying the WHERE clause would violate the CHECK + * constraint. (Observe we must prove B yields false, not just not-true.) + * + * "Weak" refutation: A refutes B means truth of A implies non-truth of B + * (i.e., B must yield false or NULL). We use this to detect mutually + * contradictory WHERE clauses. + * + * Weak refutation can be proven in some cases where strong refutation doesn't + * hold, so it's useful to use it when possible. We don't currently have + * support for disproving one CHECK constraint based on another one, nor for + * disproving WHERE based on CHECK. (As with implication, the last case + * doesn't seem very practical. CHECK-vs-CHECK might be useful, but isn't + * currently needed anywhere.) + * + * The top-level List structure of each list corresponds to an AND list. + * We assume that eval_const_expressions() has been applied and so there + * are no un-flattened ANDs or ORs (e.g., no AND immediately within an AND, + * including AND just below the top-level List structure). + * If this is not true we might fail to prove an implication that is + * valid, but no worse consequences will ensue. + * + * We assume the predicate has already been checked to contain only + * immutable functions and operators. We dare not make deductions based on + * non-immutable functions, because they might change answers between the + * time we make the plan and the time we execute the plan. + * Immutability of functions in the clause_list is checked here, if necessary. + */ +bool +predicate_refuted_by(List *predicate_list, List *clause_list, + bool weak) +{ + Node *p, + *c; + + if (predicate_list == NIL) + return false; /* no predicate: no refutation is possible */ + if (clause_list == NIL) + return false; /* no restriction: refutation must fail */ + + /* + * If either input is a single-element list, replace it with its lone + * member; this avoids one useless level of AND-recursion. We only need + * to worry about this at top level, since eval_const_expressions should + * have gotten rid of any trivial ANDs or ORs below that. + */ + if (list_length(predicate_list) == 1) + p = (Node *) linitial(predicate_list); + else + p = (Node *) predicate_list; + if (list_length(clause_list) == 1) + c = (Node *) linitial(clause_list); + else + c = (Node *) clause_list; + + /* And away we go ... */ + return predicate_refuted_by_recurse(c, p, weak); +} + +/*---------- + * predicate_implied_by_recurse + * Does the predicate implication test for non-NULL restriction and + * predicate clauses. + * + * The logic followed here is ("=>" means "implies"): + * atom A => atom B iff: predicate_implied_by_simple_clause says so + * atom A => AND-expr B iff: A => each of B's components + * atom A => OR-expr B iff: A => any of B's components + * AND-expr A => atom B iff: any of A's components => B + * AND-expr A => AND-expr B iff: A => each of B's components + * AND-expr A => OR-expr B iff: A => any of B's components, + * *or* any of A's components => B + * OR-expr A => atom B iff: each of A's components => B + * OR-expr A => AND-expr B iff: A => each of B's components + * OR-expr A => OR-expr B iff: each of A's components => any of B's + * + * An "atom" is anything other than an AND or OR node. Notice that we don't + * have any special logic to handle NOT nodes; these should have been pushed + * down or eliminated where feasible during eval_const_expressions(). + * + * All of these rules apply equally to strong or weak implication. + * + * We can't recursively expand either side first, but have to interleave + * the expansions per the above rules, to be sure we handle all of these + * examples: + * (x OR y) => (x OR y OR z) + * (x AND y AND z) => (x AND y) + * (x AND y) => ((x AND y) OR z) + * ((x OR y) AND z) => (x OR y) + * This is still not an exhaustive test, but it handles most normal cases + * under the assumption that both inputs have been AND/OR flattened. + * + * We have to be prepared to handle RestrictInfo nodes in the restrictinfo + * tree, though not in the predicate tree. + *---------- + */ +static bool +predicate_implied_by_recurse(Node *clause, Node *predicate, + bool weak) +{ + PredIterInfoData clause_info; + PredIterInfoData pred_info; + PredClass pclass; + bool result; + + /* skip through RestrictInfo */ + Assert(clause != NULL); + if (IsA(clause, RestrictInfo)) + clause = (Node *) ((RestrictInfo *) clause)->clause; + + pclass = predicate_classify(predicate, &pred_info); + + switch (predicate_classify(clause, &clause_info)) + { + case CLASS_AND: + switch (pclass) + { + case CLASS_AND: + + /* + * AND-clause => AND-clause if A implies each of B's items + */ + result = true; + iterate_begin(pitem, predicate, pred_info) + { + if (!predicate_implied_by_recurse(clause, pitem, + weak)) + { + result = false; + break; + } + } + iterate_end(pred_info); + return result; + + case CLASS_OR: + + /* + * AND-clause => OR-clause if A implies any of B's items + * + * Needed to handle (x AND y) => ((x AND y) OR z) + */ + result = false; + iterate_begin(pitem, predicate, pred_info) + { + if (predicate_implied_by_recurse(clause, pitem, + weak)) + { + result = true; + break; + } + } + iterate_end(pred_info); + if (result) + return result; + + /* + * Also check if any of A's items implies B + * + * Needed to handle ((x OR y) AND z) => (x OR y) + */ + iterate_begin(citem, clause, clause_info) + { + if (predicate_implied_by_recurse(citem, predicate, + weak)) + { + result = true; + break; + } + } + iterate_end(clause_info); + return result; + + case CLASS_ATOM: + + /* + * AND-clause => atom if any of A's items implies B + */ + result = false; + iterate_begin(citem, clause, clause_info) + { + if (predicate_implied_by_recurse(citem, predicate, + weak)) + { + result = true; + break; + } + } + iterate_end(clause_info); + return result; + } + break; + + case CLASS_OR: + switch (pclass) + { + case CLASS_OR: + + /* + * OR-clause => OR-clause if each of A's items implies any + * of B's items. Messy but can't do it any more simply. + */ + result = true; + iterate_begin(citem, clause, clause_info) + { + bool presult = false; + + iterate_begin(pitem, predicate, pred_info) + { + if (predicate_implied_by_recurse(citem, pitem, + weak)) + { + presult = true; + break; + } + } + iterate_end(pred_info); + if (!presult) + { + result = false; /* doesn't imply any of B's */ + break; + } + } + iterate_end(clause_info); + return result; + + case CLASS_AND: + case CLASS_ATOM: + + /* + * OR-clause => AND-clause if each of A's items implies B + * + * OR-clause => atom if each of A's items implies B + */ + result = true; + iterate_begin(citem, clause, clause_info) + { + if (!predicate_implied_by_recurse(citem, predicate, + weak)) + { + result = false; + break; + } + } + iterate_end(clause_info); + return result; + } + break; + + case CLASS_ATOM: + switch (pclass) + { + case CLASS_AND: + + /* + * atom => AND-clause if A implies each of B's items + */ + result = true; + iterate_begin(pitem, predicate, pred_info) + { + if (!predicate_implied_by_recurse(clause, pitem, + weak)) + { + result = false; + break; + } + } + iterate_end(pred_info); + return result; + + case CLASS_OR: + + /* + * atom => OR-clause if A implies any of B's items + */ + result = false; + iterate_begin(pitem, predicate, pred_info) + { + if (predicate_implied_by_recurse(clause, pitem, + weak)) + { + result = true; + break; + } + } + iterate_end(pred_info); + return result; + + case CLASS_ATOM: + + /* + * atom => atom is the base case + */ + return + predicate_implied_by_simple_clause((Expr *) predicate, + clause, + weak); + } + break; + } + + /* can't get here */ + elog(ERROR, "predicate_classify returned a bogus value"); + return false; +} + +/*---------- + * predicate_refuted_by_recurse + * Does the predicate refutation test for non-NULL restriction and + * predicate clauses. + * + * The logic followed here is ("R=>" means "refutes"): + * atom A R=> atom B iff: predicate_refuted_by_simple_clause says so + * atom A R=> AND-expr B iff: A R=> any of B's components + * atom A R=> OR-expr B iff: A R=> each of B's components + * AND-expr A R=> atom B iff: any of A's components R=> B + * AND-expr A R=> AND-expr B iff: A R=> any of B's components, + * *or* any of A's components R=> B + * AND-expr A R=> OR-expr B iff: A R=> each of B's components + * OR-expr A R=> atom B iff: each of A's components R=> B + * OR-expr A R=> AND-expr B iff: each of A's components R=> any of B's + * OR-expr A R=> OR-expr B iff: A R=> each of B's components + * + * All of the above rules apply equally to strong or weak refutation. + * + * In addition, if the predicate is a NOT-clause then we can use + * A R=> NOT B if: A => B + * This works for several different SQL constructs that assert the non-truth + * of their argument, ie NOT, IS FALSE, IS NOT TRUE, IS UNKNOWN, although some + * of them require that we prove strong implication. Likewise, we can use + * NOT A R=> B if: B => A + * but here we must be careful about strong vs. weak refutation and make + * the appropriate type of implication proof (weak or strong respectively). + * + * Other comments are as for predicate_implied_by_recurse(). + *---------- + */ +static bool +predicate_refuted_by_recurse(Node *clause, Node *predicate, + bool weak) +{ + PredIterInfoData clause_info; + PredIterInfoData pred_info; + PredClass pclass; + Node *not_arg; + bool result; + + /* skip through RestrictInfo */ + Assert(clause != NULL); + if (IsA(clause, RestrictInfo)) + clause = (Node *) ((RestrictInfo *) clause)->clause; + + pclass = predicate_classify(predicate, &pred_info); + + switch (predicate_classify(clause, &clause_info)) + { + case CLASS_AND: + switch (pclass) + { + case CLASS_AND: + + /* + * AND-clause R=> AND-clause if A refutes any of B's items + * + * Needed to handle (x AND y) R=> ((!x OR !y) AND z) + */ + result = false; + iterate_begin(pitem, predicate, pred_info) + { + if (predicate_refuted_by_recurse(clause, pitem, + weak)) + { + result = true; + break; + } + } + iterate_end(pred_info); + if (result) + return result; + + /* + * Also check if any of A's items refutes B + * + * Needed to handle ((x OR y) AND z) R=> (!x AND !y) + */ + iterate_begin(citem, clause, clause_info) + { + if (predicate_refuted_by_recurse(citem, predicate, + weak)) + { + result = true; + break; + } + } + iterate_end(clause_info); + return result; + + case CLASS_OR: + + /* + * AND-clause R=> OR-clause if A refutes each of B's items + */ + result = true; + iterate_begin(pitem, predicate, pred_info) + { + if (!predicate_refuted_by_recurse(clause, pitem, + weak)) + { + result = false; + break; + } + } + iterate_end(pred_info); + return result; + + case CLASS_ATOM: + + /* + * If B is a NOT-type clause, A R=> B if A => B's arg + * + * Since, for either type of refutation, we are starting + * with the premise that A is true, we can use a strong + * implication test in all cases. That proves B's arg is + * true, which is more than we need for weak refutation if + * B is a simple NOT, but it allows not worrying about + * exactly which kind of negation clause we have. + */ + not_arg = extract_not_arg(predicate); + if (not_arg && + predicate_implied_by_recurse(clause, not_arg, + false)) + return true; + + /* + * AND-clause R=> atom if any of A's items refutes B + */ + result = false; + iterate_begin(citem, clause, clause_info) + { + if (predicate_refuted_by_recurse(citem, predicate, + weak)) + { + result = true; + break; + } + } + iterate_end(clause_info); + return result; + } + break; + + case CLASS_OR: + switch (pclass) + { + case CLASS_OR: + + /* + * OR-clause R=> OR-clause if A refutes each of B's items + */ + result = true; + iterate_begin(pitem, predicate, pred_info) + { + if (!predicate_refuted_by_recurse(clause, pitem, + weak)) + { + result = false; + break; + } + } + iterate_end(pred_info); + return result; + + case CLASS_AND: + + /* + * OR-clause R=> AND-clause if each of A's items refutes + * any of B's items. + */ + result = true; + iterate_begin(citem, clause, clause_info) + { + bool presult = false; + + iterate_begin(pitem, predicate, pred_info) + { + if (predicate_refuted_by_recurse(citem, pitem, + weak)) + { + presult = true; + break; + } + } + iterate_end(pred_info); + if (!presult) + { + result = false; /* citem refutes nothing */ + break; + } + } + iterate_end(clause_info); + return result; + + case CLASS_ATOM: + + /* + * If B is a NOT-type clause, A R=> B if A => B's arg + * + * Same logic as for the AND-clause case above. + */ + not_arg = extract_not_arg(predicate); + if (not_arg && + predicate_implied_by_recurse(clause, not_arg, + false)) + return true; + + /* + * OR-clause R=> atom if each of A's items refutes B + */ + result = true; + iterate_begin(citem, clause, clause_info) + { + if (!predicate_refuted_by_recurse(citem, predicate, + weak)) + { + result = false; + break; + } + } + iterate_end(clause_info); + return result; + } + break; + + case CLASS_ATOM: + + /* + * If A is a strong NOT-clause, A R=> B if B => A's arg + * + * Since A is strong, we may assume A's arg is false (not just + * not-true). If B weakly implies A's arg, then B can be neither + * true nor null, so that strong refutation is proven. If B + * strongly implies A's arg, then B cannot be true, so that weak + * refutation is proven. + */ + not_arg = extract_strong_not_arg(clause); + if (not_arg && + predicate_implied_by_recurse(predicate, not_arg, + !weak)) + return true; + + switch (pclass) + { + case CLASS_AND: + + /* + * atom R=> AND-clause if A refutes any of B's items + */ + result = false; + iterate_begin(pitem, predicate, pred_info) + { + if (predicate_refuted_by_recurse(clause, pitem, + weak)) + { + result = true; + break; + } + } + iterate_end(pred_info); + return result; + + case CLASS_OR: + + /* + * atom R=> OR-clause if A refutes each of B's items + */ + result = true; + iterate_begin(pitem, predicate, pred_info) + { + if (!predicate_refuted_by_recurse(clause, pitem, + weak)) + { + result = false; + break; + } + } + iterate_end(pred_info); + return result; + + case CLASS_ATOM: + + /* + * If B is a NOT-type clause, A R=> B if A => B's arg + * + * Same logic as for the AND-clause case above. + */ + not_arg = extract_not_arg(predicate); + if (not_arg && + predicate_implied_by_recurse(clause, not_arg, + false)) + return true; + + /* + * atom R=> atom is the base case + */ + return + predicate_refuted_by_simple_clause((Expr *) predicate, + clause, + weak); + } + break; + } + + /* can't get here */ + elog(ERROR, "predicate_classify returned a bogus value"); + return false; +} + + +/* + * predicate_classify + * Classify an expression node as AND-type, OR-type, or neither (an atom). + * + * If the expression is classified as AND- or OR-type, then *info is filled + * in with the functions needed to iterate over its components. + * + * This function also implements enforcement of MAX_SAOP_ARRAY_SIZE: if a + * ScalarArrayOpExpr's array has too many elements, we just classify it as an + * atom. (This will result in its being passed as-is to the simple_clause + * functions, many of which will fail to prove anything about it.) Note that we + * cannot just stop after considering MAX_SAOP_ARRAY_SIZE elements; in general + * that would result in wrong proofs, rather than failing to prove anything. + */ +static PredClass +predicate_classify(Node *clause, PredIterInfo info) +{ + /* Caller should not pass us NULL, nor a RestrictInfo clause */ + Assert(clause != NULL); + Assert(!IsA(clause, RestrictInfo)); + + /* + * If we see a List, assume it's an implicit-AND list; this is the correct + * semantics for lists of RestrictInfo nodes. + */ + if (IsA(clause, List)) + { + info->startup_fn = list_startup_fn; + info->next_fn = list_next_fn; + info->cleanup_fn = list_cleanup_fn; + return CLASS_AND; + } + + /* Handle normal AND and OR boolean clauses */ + if (is_andclause(clause)) + { + info->startup_fn = boolexpr_startup_fn; + info->next_fn = list_next_fn; + info->cleanup_fn = list_cleanup_fn; + return CLASS_AND; + } + if (is_orclause(clause)) + { + info->startup_fn = boolexpr_startup_fn; + info->next_fn = list_next_fn; + info->cleanup_fn = list_cleanup_fn; + return CLASS_OR; + } + + /* Handle ScalarArrayOpExpr */ + if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + Node *arraynode = (Node *) lsecond(saop->args); + + /* + * We can break this down into an AND or OR structure, but only if we + * know how to iterate through expressions for the array's elements. + * We can do that if the array operand is a non-null constant or a + * simple ArrayExpr. + */ + if (arraynode && IsA(arraynode, Const) && + !((Const *) arraynode)->constisnull) + { + ArrayType *arrayval; + int nelems; + + arrayval = DatumGetArrayTypeP(((Const *) arraynode)->constvalue); + nelems = ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval)); + if (nelems <= MAX_SAOP_ARRAY_SIZE) + { + info->startup_fn = arrayconst_startup_fn; + info->next_fn = arrayconst_next_fn; + info->cleanup_fn = arrayconst_cleanup_fn; + return saop->useOr ? CLASS_OR : CLASS_AND; + } + } + else if (arraynode && IsA(arraynode, ArrayExpr) && + !((ArrayExpr *) arraynode)->multidims && + list_length(((ArrayExpr *) arraynode)->elements) <= MAX_SAOP_ARRAY_SIZE) + { + info->startup_fn = arrayexpr_startup_fn; + info->next_fn = arrayexpr_next_fn; + info->cleanup_fn = arrayexpr_cleanup_fn; + return saop->useOr ? CLASS_OR : CLASS_AND; + } + } + + /* None of the above, so it's an atom */ + return CLASS_ATOM; +} + +/* + * PredIterInfo routines for iterating over regular Lists. The iteration + * state variable is the next ListCell to visit. + */ +static void +list_startup_fn(Node *clause, PredIterInfo info) +{ + info->state_list = (List *) clause; + info->state = (void *) list_head(info->state_list); +} + +static Node * +list_next_fn(PredIterInfo info) +{ + ListCell *l = (ListCell *) info->state; + Node *n; + + if (l == NULL) + return NULL; + n = lfirst(l); + info->state = (void *) lnext(info->state_list, l); + return n; +} + +static void +list_cleanup_fn(PredIterInfo info) +{ + /* Nothing to clean up */ +} + +/* + * BoolExpr needs its own startup function, but can use list_next_fn and + * list_cleanup_fn. + */ +static void +boolexpr_startup_fn(Node *clause, PredIterInfo info) +{ + info->state_list = ((BoolExpr *) clause)->args; + info->state = (void *) list_head(info->state_list); +} + +/* + * PredIterInfo routines for iterating over a ScalarArrayOpExpr with a + * constant array operand. + */ +typedef struct +{ + OpExpr opexpr; + Const constexpr; + int next_elem; + int num_elems; + Datum *elem_values; + bool *elem_nulls; +} ArrayConstIterState; + +static void +arrayconst_startup_fn(Node *clause, PredIterInfo info) +{ + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + ArrayConstIterState *state; + Const *arrayconst; + ArrayType *arrayval; + int16 elmlen; + bool elmbyval; + char elmalign; + + /* Create working state struct */ + state = (ArrayConstIterState *) palloc(sizeof(ArrayConstIterState)); + info->state = (void *) state; + + /* Deconstruct the array literal */ + arrayconst = (Const *) lsecond(saop->args); + arrayval = DatumGetArrayTypeP(arrayconst->constvalue); + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elmlen, &elmbyval, &elmalign); + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elmlen, elmbyval, elmalign, + &state->elem_values, &state->elem_nulls, + &state->num_elems); + + /* Set up a dummy OpExpr to return as the per-item node */ + state->opexpr.xpr.type = T_OpExpr; + state->opexpr.opno = saop->opno; + state->opexpr.opfuncid = saop->opfuncid; + state->opexpr.opresulttype = BOOLOID; + state->opexpr.opretset = false; + state->opexpr.opcollid = InvalidOid; + state->opexpr.inputcollid = saop->inputcollid; + state->opexpr.args = list_copy(saop->args); + + /* Set up a dummy Const node to hold the per-element values */ + state->constexpr.xpr.type = T_Const; + state->constexpr.consttype = ARR_ELEMTYPE(arrayval); + state->constexpr.consttypmod = -1; + state->constexpr.constcollid = arrayconst->constcollid; + state->constexpr.constlen = elmlen; + state->constexpr.constbyval = elmbyval; + lsecond(state->opexpr.args) = &state->constexpr; + + /* Initialize iteration state */ + state->next_elem = 0; +} + +static Node * +arrayconst_next_fn(PredIterInfo info) +{ + ArrayConstIterState *state = (ArrayConstIterState *) info->state; + + if (state->next_elem >= state->num_elems) + return NULL; + state->constexpr.constvalue = state->elem_values[state->next_elem]; + state->constexpr.constisnull = state->elem_nulls[state->next_elem]; + state->next_elem++; + return (Node *) &(state->opexpr); +} + +static void +arrayconst_cleanup_fn(PredIterInfo info) +{ + ArrayConstIterState *state = (ArrayConstIterState *) info->state; + + pfree(state->elem_values); + pfree(state->elem_nulls); + list_free(state->opexpr.args); + pfree(state); +} + +/* + * PredIterInfo routines for iterating over a ScalarArrayOpExpr with a + * one-dimensional ArrayExpr array operand. + */ +typedef struct +{ + OpExpr opexpr; + ListCell *next; +} ArrayExprIterState; + +static void +arrayexpr_startup_fn(Node *clause, PredIterInfo info) +{ + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + ArrayExprIterState *state; + ArrayExpr *arrayexpr; + + /* Create working state struct */ + state = (ArrayExprIterState *) palloc(sizeof(ArrayExprIterState)); + info->state = (void *) state; + + /* Set up a dummy OpExpr to return as the per-item node */ + state->opexpr.xpr.type = T_OpExpr; + state->opexpr.opno = saop->opno; + state->opexpr.opfuncid = saop->opfuncid; + state->opexpr.opresulttype = BOOLOID; + state->opexpr.opretset = false; + state->opexpr.opcollid = InvalidOid; + state->opexpr.inputcollid = saop->inputcollid; + state->opexpr.args = list_copy(saop->args); + + /* Initialize iteration variable to first member of ArrayExpr */ + arrayexpr = (ArrayExpr *) lsecond(saop->args); + info->state_list = arrayexpr->elements; + state->next = list_head(arrayexpr->elements); +} + +static Node * +arrayexpr_next_fn(PredIterInfo info) +{ + ArrayExprIterState *state = (ArrayExprIterState *) info->state; + + if (state->next == NULL) + return NULL; + lsecond(state->opexpr.args) = lfirst(state->next); + state->next = lnext(info->state_list, state->next); + return (Node *) &(state->opexpr); +} + +static void +arrayexpr_cleanup_fn(PredIterInfo info) +{ + ArrayExprIterState *state = (ArrayExprIterState *) info->state; + + list_free(state->opexpr.args); + pfree(state); +} + + +/*---------- + * predicate_implied_by_simple_clause + * Does the predicate implication test for a "simple clause" predicate + * and a "simple clause" restriction. + * + * We return true if able to prove the implication, false if not. + * + * We have three strategies for determining whether one simple clause + * implies another: + * + * A simple and general way is to see if they are equal(); this works for any + * kind of expression, and for either implication definition. (Actually, + * there is an implied assumption that the functions in the expression are + * immutable --- but this was checked for the predicate by the caller.) + * + * If the predicate is of the form "foo IS NOT NULL", and we are considering + * strong implication, we can conclude that the predicate is implied if the + * clause is strict for "foo", i.e., it must yield false or NULL when "foo" + * is NULL. In that case truth of the clause ensures that "foo" isn't NULL. + * (Again, this is a safe conclusion because "foo" must be immutable.) + * This doesn't work for weak implication, though. + * + * Finally, if both clauses are binary operator expressions, we may be able + * to prove something using the system's knowledge about operators; those + * proof rules are encapsulated in operator_predicate_proof(). + *---------- + */ +static bool +predicate_implied_by_simple_clause(Expr *predicate, Node *clause, + bool weak) +{ + /* Allow interrupting long proof attempts */ + CHECK_FOR_INTERRUPTS(); + + /* First try the equal() test */ + if (equal((Node *) predicate, clause)) + return true; + + /* Next try the IS NOT NULL case */ + if (!weak && + predicate && IsA(predicate, NullTest)) + { + NullTest *ntest = (NullTest *) predicate; + + /* row IS NOT NULL does not act in the simple way we have in mind */ + if (ntest->nulltesttype == IS_NOT_NULL && + !ntest->argisrow) + { + /* strictness of clause for foo implies foo IS NOT NULL */ + if (clause_is_strict_for(clause, (Node *) ntest->arg, true)) + return true; + } + return false; /* we can't succeed below... */ + } + + /* Else try operator-related knowledge */ + return operator_predicate_proof(predicate, clause, false, weak); +} + +/*---------- + * predicate_refuted_by_simple_clause + * Does the predicate refutation test for a "simple clause" predicate + * and a "simple clause" restriction. + * + * We return true if able to prove the refutation, false if not. + * + * Unlike the implication case, checking for equal() clauses isn't helpful. + * But relation_excluded_by_constraints() checks for self-contradictions in a + * list of clauses, so that we may get here with predicate and clause being + * actually pointer-equal, and that is worth eliminating quickly. + * + * When the predicate is of the form "foo IS NULL", we can conclude that + * the predicate is refuted if the clause is strict for "foo" (see notes for + * implication case), or is "foo IS NOT NULL". That works for either strong + * or weak refutation. + * + * A clause "foo IS NULL" refutes a predicate "foo IS NOT NULL" in all cases. + * If we are considering weak refutation, it also refutes a predicate that + * is strict for "foo", since then the predicate must yield false or NULL + * (and since "foo" appears in the predicate, it's known immutable). + * + * (The main motivation for covering these IS [NOT] NULL cases is to support + * using IS NULL/IS NOT NULL as partition-defining constraints.) + * + * Finally, if both clauses are binary operator expressions, we may be able + * to prove something using the system's knowledge about operators; those + * proof rules are encapsulated in operator_predicate_proof(). + *---------- + */ +static bool +predicate_refuted_by_simple_clause(Expr *predicate, Node *clause, + bool weak) +{ + /* Allow interrupting long proof attempts */ + CHECK_FOR_INTERRUPTS(); + + /* A simple clause can't refute itself */ + /* Worth checking because of relation_excluded_by_constraints() */ + if ((Node *) predicate == clause) + return false; + + /* Try the predicate-IS-NULL case */ + if (predicate && IsA(predicate, NullTest) && + ((NullTest *) predicate)->nulltesttype == IS_NULL) + { + Expr *isnullarg = ((NullTest *) predicate)->arg; + + /* row IS NULL does not act in the simple way we have in mind */ + if (((NullTest *) predicate)->argisrow) + return false; + + /* strictness of clause for foo refutes foo IS NULL */ + if (clause_is_strict_for(clause, (Node *) isnullarg, true)) + return true; + + /* foo IS NOT NULL refutes foo IS NULL */ + if (clause && IsA(clause, NullTest) && + ((NullTest *) clause)->nulltesttype == IS_NOT_NULL && + !((NullTest *) clause)->argisrow && + equal(((NullTest *) clause)->arg, isnullarg)) + return true; + + return false; /* we can't succeed below... */ + } + + /* Try the clause-IS-NULL case */ + if (clause && IsA(clause, NullTest) && + ((NullTest *) clause)->nulltesttype == IS_NULL) + { + Expr *isnullarg = ((NullTest *) clause)->arg; + + /* row IS NULL does not act in the simple way we have in mind */ + if (((NullTest *) clause)->argisrow) + return false; + + /* foo IS NULL refutes foo IS NOT NULL */ + if (predicate && IsA(predicate, NullTest) && + ((NullTest *) predicate)->nulltesttype == IS_NOT_NULL && + !((NullTest *) predicate)->argisrow && + equal(((NullTest *) predicate)->arg, isnullarg)) + return true; + + /* foo IS NULL weakly refutes any predicate that is strict for foo */ + if (weak && + clause_is_strict_for((Node *) predicate, (Node *) isnullarg, true)) + return true; + + return false; /* we can't succeed below... */ + } + + /* Else try operator-related knowledge */ + return operator_predicate_proof(predicate, clause, true, weak); +} + + +/* + * If clause asserts the non-truth of a subclause, return that subclause; + * otherwise return NULL. + */ +static Node * +extract_not_arg(Node *clause) +{ + if (clause == NULL) + return NULL; + if (IsA(clause, BoolExpr)) + { + BoolExpr *bexpr = (BoolExpr *) clause; + + if (bexpr->boolop == NOT_EXPR) + return (Node *) linitial(bexpr->args); + } + else if (IsA(clause, BooleanTest)) + { + BooleanTest *btest = (BooleanTest *) clause; + + if (btest->booltesttype == IS_NOT_TRUE || + btest->booltesttype == IS_FALSE || + btest->booltesttype == IS_UNKNOWN) + return (Node *) btest->arg; + } + return NULL; +} + +/* + * If clause asserts the falsity of a subclause, return that subclause; + * otherwise return NULL. + */ +static Node * +extract_strong_not_arg(Node *clause) +{ + if (clause == NULL) + return NULL; + if (IsA(clause, BoolExpr)) + { + BoolExpr *bexpr = (BoolExpr *) clause; + + if (bexpr->boolop == NOT_EXPR) + return (Node *) linitial(bexpr->args); + } + else if (IsA(clause, BooleanTest)) + { + BooleanTest *btest = (BooleanTest *) clause; + + if (btest->booltesttype == IS_FALSE) + return (Node *) btest->arg; + } + return NULL; +} + + +/* + * Can we prove that "clause" returns NULL (or FALSE) if "subexpr" is + * assumed to yield NULL? + * + * In most places in the planner, "strictness" refers to a guarantee that + * an expression yields NULL output for a NULL input, and that's mostly what + * we're looking for here. However, at top level where the clause is known + * to yield boolean, it may be sufficient to prove that it cannot return TRUE + * when "subexpr" is NULL. The caller should pass allow_false = true when + * this weaker property is acceptable. (When this function recurses + * internally, we pass down allow_false = false since we need to prove actual + * nullness of the subexpression.) + * + * We assume that the caller checked that least one of the input expressions + * is immutable. All of the proof rules here involve matching "subexpr" to + * some portion of "clause", so that this allows assuming that "subexpr" is + * immutable without a separate check. + * + * The base case is that clause and subexpr are equal(). + * + * We can also report success if the subexpr appears as a subexpression + * of "clause" in a place where it'd force nullness of the overall result. + */ +static bool +clause_is_strict_for(Node *clause, Node *subexpr, bool allow_false) +{ + ListCell *lc; + + /* safety checks */ + if (clause == NULL || subexpr == NULL) + return false; + + /* + * Look through any RelabelType nodes, so that we can match, say, + * varcharcol with lower(varcharcol::text). (In general we could recurse + * through any nullness-preserving, immutable operation.) We should not + * see stacked RelabelTypes here. + */ + if (IsA(clause, RelabelType)) + clause = (Node *) ((RelabelType *) clause)->arg; + if (IsA(subexpr, RelabelType)) + subexpr = (Node *) ((RelabelType *) subexpr)->arg; + + /* Base case */ + if (equal(clause, subexpr)) + return true; + + /* + * If we have a strict operator or function, a NULL result is guaranteed + * if any input is forced NULL by subexpr. This is OK even if the op or + * func isn't immutable, since it won't even be called on NULL input. + */ + if (is_opclause(clause) && + op_strict(((OpExpr *) clause)->opno)) + { + foreach(lc, ((OpExpr *) clause)->args) + { + if (clause_is_strict_for((Node *) lfirst(lc), subexpr, false)) + return true; + } + return false; + } + if (is_funcclause(clause) && + func_strict(((FuncExpr *) clause)->funcid)) + { + foreach(lc, ((FuncExpr *) clause)->args) + { + if (clause_is_strict_for((Node *) lfirst(lc), subexpr, false)) + return true; + } + return false; + } + + /* + * CoerceViaIO is strict (whether or not the I/O functions it calls are). + * Likewise, ArrayCoerceExpr is strict for its array argument (regardless + * of what the per-element expression is), ConvertRowtypeExpr is strict at + * the row level, and CoerceToDomain is strict too. These are worth + * checking mainly because it saves us having to explain to users why some + * type coercions are known strict and others aren't. + */ + if (IsA(clause, CoerceViaIO)) + return clause_is_strict_for((Node *) ((CoerceViaIO *) clause)->arg, + subexpr, false); + if (IsA(clause, ArrayCoerceExpr)) + return clause_is_strict_for((Node *) ((ArrayCoerceExpr *) clause)->arg, + subexpr, false); + if (IsA(clause, ConvertRowtypeExpr)) + return clause_is_strict_for((Node *) ((ConvertRowtypeExpr *) clause)->arg, + subexpr, false); + if (IsA(clause, CoerceToDomain)) + return clause_is_strict_for((Node *) ((CoerceToDomain *) clause)->arg, + subexpr, false); + + /* + * ScalarArrayOpExpr is a special case. Note that we'd only reach here + * with a ScalarArrayOpExpr clause if we failed to deconstruct it into an + * AND or OR tree, as for example if it has too many array elements. + */ + if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + Node *scalarnode = (Node *) linitial(saop->args); + Node *arraynode = (Node *) lsecond(saop->args); + + /* + * If we can prove the scalar input to be null, and the operator is + * strict, then the SAOP result has to be null --- unless the array is + * empty. For an empty array, we'd get either false (for ANY) or true + * (for ALL). So if allow_false = true then the proof succeeds anyway + * for the ANY case; otherwise we can only make the proof if we can + * prove the array non-empty. + */ + if (clause_is_strict_for(scalarnode, subexpr, false) && + op_strict(saop->opno)) + { + int nelems = 0; + + if (allow_false && saop->useOr) + return true; /* can succeed even if array is empty */ + + if (arraynode && IsA(arraynode, Const)) + { + Const *arrayconst = (Const *) arraynode; + ArrayType *arrval; + + /* + * If array is constant NULL then we can succeed, as in the + * case below. + */ + if (arrayconst->constisnull) + return true; + + /* Otherwise, we can compute the number of elements. */ + arrval = DatumGetArrayTypeP(arrayconst->constvalue); + nelems = ArrayGetNItems(ARR_NDIM(arrval), ARR_DIMS(arrval)); + } + else if (arraynode && IsA(arraynode, ArrayExpr) && + !((ArrayExpr *) arraynode)->multidims) + { + /* + * We can also reliably count the number of array elements if + * the input is a non-multidim ARRAY[] expression. + */ + nelems = list_length(((ArrayExpr *) arraynode)->elements); + } + + /* Proof succeeds if array is definitely non-empty */ + if (nelems > 0) + return true; + } + + /* + * If we can prove the array input to be null, the proof succeeds in + * all cases, since ScalarArrayOpExpr will always return NULL for a + * NULL array. Otherwise, we're done here. + */ + return clause_is_strict_for(arraynode, subexpr, false); + } + + /* + * When recursing into an expression, we might find a NULL constant. + * That's certainly NULL, whether it matches subexpr or not. + */ + if (IsA(clause, Const)) + return ((Const *) clause)->constisnull; + + return false; +} + + +/* + * Define "operator implication tables" for btree operators ("strategies"), + * and similar tables for refutation. + * + * The strategy numbers defined by btree indexes (see access/stratnum.h) are: + * 1 < 2 <= 3 = 4 >= 5 > + * and in addition we use 6 to represent <>. <> is not a btree-indexable + * operator, but we assume here that if an equality operator of a btree + * opfamily has a negator operator, the negator behaves as <> for the opfamily. + * (This convention is also known to get_op_btree_interpretation().) + * + * BT_implies_table[] and BT_refutes_table[] are used for cases where we have + * two identical subexpressions and we want to know whether one operator + * expression implies or refutes the other. That is, if the "clause" is + * EXPR1 clause_op EXPR2 and the "predicate" is EXPR1 pred_op EXPR2 for the + * same two (immutable) subexpressions: + * BT_implies_table[clause_op-1][pred_op-1] + * is true if the clause implies the predicate + * BT_refutes_table[clause_op-1][pred_op-1] + * is true if the clause refutes the predicate + * where clause_op and pred_op are strategy numbers (from 1 to 6) in the + * same btree opfamily. For example, "x < y" implies "x <= y" and refutes + * "x > y". + * + * BT_implic_table[] and BT_refute_table[] are used where we have two + * constants that we need to compare. The interpretation of: + * + * test_op = BT_implic_table[clause_op-1][pred_op-1] + * + * where test_op, clause_op and pred_op are strategy numbers (from 1 to 6) + * of btree operators, is as follows: + * + * If you know, for some EXPR, that "EXPR clause_op CONST1" is true, and you + * want to determine whether "EXPR pred_op CONST2" must also be true, then + * you can use "CONST2 test_op CONST1" as a test. If this test returns true, + * then the predicate expression must be true; if the test returns false, + * then the predicate expression may be false. + * + * For example, if clause is "Quantity > 10" and pred is "Quantity > 5" + * then we test "5 <= 10" which evals to true, so clause implies pred. + * + * Similarly, the interpretation of a BT_refute_table entry is: + * + * If you know, for some EXPR, that "EXPR clause_op CONST1" is true, and you + * want to determine whether "EXPR pred_op CONST2" must be false, then + * you can use "CONST2 test_op CONST1" as a test. If this test returns true, + * then the predicate expression must be false; if the test returns false, + * then the predicate expression may be true. + * + * For example, if clause is "Quantity > 10" and pred is "Quantity < 5" + * then we test "5 <= 10" which evals to true, so clause refutes pred. + * + * An entry where test_op == 0 means the implication cannot be determined. + */ + +#define BTLT BTLessStrategyNumber +#define BTLE BTLessEqualStrategyNumber +#define BTEQ BTEqualStrategyNumber +#define BTGE BTGreaterEqualStrategyNumber +#define BTGT BTGreaterStrategyNumber +#define BTNE ROWCOMPARE_NE + +/* We use "none" for 0/false to make the tables align nicely */ +#define none 0 + +static const bool BT_implies_table[6][6] = { +/* + * The predicate operator: + * LT LE EQ GE GT NE + */ + {true, true, none, none, none, true}, /* LT */ + {none, true, none, none, none, none}, /* LE */ + {none, true, true, true, none, none}, /* EQ */ + {none, none, none, true, none, none}, /* GE */ + {none, none, none, true, true, true}, /* GT */ + {none, none, none, none, none, true} /* NE */ +}; + +static const bool BT_refutes_table[6][6] = { +/* + * The predicate operator: + * LT LE EQ GE GT NE + */ + {none, none, true, true, true, none}, /* LT */ + {none, none, none, none, true, none}, /* LE */ + {true, none, none, none, true, true}, /* EQ */ + {true, none, none, none, none, none}, /* GE */ + {true, true, true, none, none, none}, /* GT */ + {none, none, true, none, none, none} /* NE */ +}; + +static const StrategyNumber BT_implic_table[6][6] = { +/* + * The predicate operator: + * LT LE EQ GE GT NE + */ + {BTGE, BTGE, none, none, none, BTGE}, /* LT */ + {BTGT, BTGE, none, none, none, BTGT}, /* LE */ + {BTGT, BTGE, BTEQ, BTLE, BTLT, BTNE}, /* EQ */ + {none, none, none, BTLE, BTLT, BTLT}, /* GE */ + {none, none, none, BTLE, BTLE, BTLE}, /* GT */ + {none, none, none, none, none, BTEQ} /* NE */ +}; + +static const StrategyNumber BT_refute_table[6][6] = { +/* + * The predicate operator: + * LT LE EQ GE GT NE + */ + {none, none, BTGE, BTGE, BTGE, none}, /* LT */ + {none, none, BTGT, BTGT, BTGE, none}, /* LE */ + {BTLE, BTLT, BTNE, BTGT, BTGE, BTEQ}, /* EQ */ + {BTLE, BTLT, BTLT, none, none, none}, /* GE */ + {BTLE, BTLE, BTLE, none, none, none}, /* GT */ + {none, none, BTEQ, none, none, none} /* NE */ +}; + + +/* + * operator_predicate_proof + * Does the predicate implication or refutation test for a "simple clause" + * predicate and a "simple clause" restriction, when both are operator + * clauses using related operators and identical input expressions. + * + * When refute_it == false, we want to prove the predicate true; + * when refute_it == true, we want to prove the predicate false. + * (There is enough common code to justify handling these two cases + * in one routine.) We return true if able to make the proof, false + * if not able to prove it. + * + * We mostly need not distinguish strong vs. weak implication/refutation here. + * This depends on the assumption that a pair of related operators (i.e., + * commutators, negators, or btree opfamily siblings) will not return one NULL + * and one non-NULL result for the same inputs. Then, for the proof types + * where we start with an assumption of truth of the clause, the predicate + * operator could not return NULL either, so it doesn't matter whether we are + * trying to make a strong or weak proof. For weak implication, it could be + * that the clause operator returned NULL, but then the predicate operator + * would as well, so that the weak implication still holds. This argument + * doesn't apply in the case where we are considering two different constant + * values, since then the operators aren't being given identical inputs. But + * we only support that for btree operators, for which we can assume that all + * non-null inputs result in non-null outputs, so that it doesn't matter which + * two non-null constants we consider. If either constant is NULL, we have + * to think harder, but sometimes the proof still works, as explained below. + * + * We can make proofs involving several expression forms (here "foo" and "bar" + * represent subexpressions that are identical according to equal()): + * "foo op1 bar" refutes "foo op2 bar" if op1 is op2's negator + * "foo op1 bar" implies "bar op2 foo" if op1 is op2's commutator + * "foo op1 bar" refutes "bar op2 foo" if op1 is negator of op2's commutator + * "foo op1 bar" can imply/refute "foo op2 bar" based on btree semantics + * "foo op1 bar" can imply/refute "bar op2 foo" based on btree semantics + * "foo op1 const1" can imply/refute "foo op2 const2" based on btree semantics + * + * For the last three cases, op1 and op2 have to be members of the same btree + * operator family. When both subexpressions are identical, the idea is that, + * for instance, x < y implies x <= y, independently of exactly what x and y + * are. If we have two different constants compared to the same expression + * foo, we have to execute a comparison between the two constant values + * in order to determine the result; for instance, foo < c1 implies foo < c2 + * if c1 <= c2. We assume it's safe to compare the constants at plan time + * if the comparison operator is immutable. + * + * Note: all the operators and subexpressions have to be immutable for the + * proof to be safe. We assume the predicate expression is entirely immutable, + * so no explicit check on the subexpressions is needed here, but in some + * cases we need an extra check of operator immutability. In particular, + * btree opfamilies can contain cross-type operators that are merely stable, + * and we dare not make deductions with those. + */ +static bool +operator_predicate_proof(Expr *predicate, Node *clause, + bool refute_it, bool weak) +{ + OpExpr *pred_opexpr, + *clause_opexpr; + Oid pred_collation, + clause_collation; + Oid pred_op, + clause_op, + test_op; + Node *pred_leftop, + *pred_rightop, + *clause_leftop, + *clause_rightop; + Const *pred_const, + *clause_const; + Expr *test_expr; + ExprState *test_exprstate; + Datum test_result; + bool isNull; + EState *estate; + MemoryContext oldcontext; + + /* + * Both expressions must be binary opclauses, else we can't do anything. + * + * Note: in future we might extend this logic to other operator-based + * constructs such as DistinctExpr. But the planner isn't very smart + * about DistinctExpr in general, and this probably isn't the first place + * to fix if you want to improve that. + */ + if (!is_opclause(predicate)) + return false; + pred_opexpr = (OpExpr *) predicate; + if (list_length(pred_opexpr->args) != 2) + return false; + if (!is_opclause(clause)) + return false; + clause_opexpr = (OpExpr *) clause; + if (list_length(clause_opexpr->args) != 2) + return false; + + /* + * If they're marked with different collations then we can't do anything. + * This is a cheap test so let's get it out of the way early. + */ + pred_collation = pred_opexpr->inputcollid; + clause_collation = clause_opexpr->inputcollid; + if (pred_collation != clause_collation) + return false; + + /* Grab the operator OIDs now too. We may commute these below. */ + pred_op = pred_opexpr->opno; + clause_op = clause_opexpr->opno; + + /* + * We have to match up at least one pair of input expressions. + */ + pred_leftop = (Node *) linitial(pred_opexpr->args); + pred_rightop = (Node *) lsecond(pred_opexpr->args); + clause_leftop = (Node *) linitial(clause_opexpr->args); + clause_rightop = (Node *) lsecond(clause_opexpr->args); + + if (equal(pred_leftop, clause_leftop)) + { + if (equal(pred_rightop, clause_rightop)) + { + /* We have x op1 y and x op2 y */ + return operator_same_subexprs_proof(pred_op, clause_op, refute_it); + } + else + { + /* Fail unless rightops are both Consts */ + if (pred_rightop == NULL || !IsA(pred_rightop, Const)) + return false; + pred_const = (Const *) pred_rightop; + if (clause_rightop == NULL || !IsA(clause_rightop, Const)) + return false; + clause_const = (Const *) clause_rightop; + } + } + else if (equal(pred_rightop, clause_rightop)) + { + /* Fail unless leftops are both Consts */ + if (pred_leftop == NULL || !IsA(pred_leftop, Const)) + return false; + pred_const = (Const *) pred_leftop; + if (clause_leftop == NULL || !IsA(clause_leftop, Const)) + return false; + clause_const = (Const *) clause_leftop; + /* Commute both operators so we can assume Consts are on the right */ + pred_op = get_commutator(pred_op); + if (!OidIsValid(pred_op)) + return false; + clause_op = get_commutator(clause_op); + if (!OidIsValid(clause_op)) + return false; + } + else if (equal(pred_leftop, clause_rightop)) + { + if (equal(pred_rightop, clause_leftop)) + { + /* We have x op1 y and y op2 x */ + /* Commute pred_op that we can treat this like a straight match */ + pred_op = get_commutator(pred_op); + if (!OidIsValid(pred_op)) + return false; + return operator_same_subexprs_proof(pred_op, clause_op, refute_it); + } + else + { + /* Fail unless pred_rightop/clause_leftop are both Consts */ + if (pred_rightop == NULL || !IsA(pred_rightop, Const)) + return false; + pred_const = (Const *) pred_rightop; + if (clause_leftop == NULL || !IsA(clause_leftop, Const)) + return false; + clause_const = (Const *) clause_leftop; + /* Commute clause_op so we can assume Consts are on the right */ + clause_op = get_commutator(clause_op); + if (!OidIsValid(clause_op)) + return false; + } + } + else if (equal(pred_rightop, clause_leftop)) + { + /* Fail unless pred_leftop/clause_rightop are both Consts */ + if (pred_leftop == NULL || !IsA(pred_leftop, Const)) + return false; + pred_const = (Const *) pred_leftop; + if (clause_rightop == NULL || !IsA(clause_rightop, Const)) + return false; + clause_const = (Const *) clause_rightop; + /* Commute pred_op so we can assume Consts are on the right */ + pred_op = get_commutator(pred_op); + if (!OidIsValid(pred_op)) + return false; + } + else + { + /* Failed to match up any of the subexpressions, so we lose */ + return false; + } + + /* + * We have two identical subexpressions, and two other subexpressions that + * are not identical but are both Consts; and we have commuted the + * operators if necessary so that the Consts are on the right. We'll need + * to compare the Consts' values. If either is NULL, we can't do that, so + * usually the proof fails ... but in some cases we can claim success. + */ + if (clause_const->constisnull) + { + /* If clause_op isn't strict, we can't prove anything */ + if (!op_strict(clause_op)) + return false; + + /* + * At this point we know that the clause returns NULL. For proof + * types that assume truth of the clause, this means the proof is + * vacuously true (a/k/a "false implies anything"). That's all proof + * types except weak implication. + */ + if (!(weak && !refute_it)) + return true; + + /* + * For weak implication, it's still possible for the proof to succeed, + * if the predicate can also be proven NULL. In that case we've got + * NULL => NULL which is valid for this proof type. + */ + if (pred_const->constisnull && op_strict(pred_op)) + return true; + /* Else the proof fails */ + return false; + } + if (pred_const->constisnull) + { + /* + * If the pred_op is strict, we know the predicate yields NULL, which + * means the proof succeeds for either weak implication or weak + * refutation. + */ + if (weak && op_strict(pred_op)) + return true; + /* Else the proof fails */ + return false; + } + + /* + * Lookup the constant-comparison operator using the system catalogs and + * the operator implication tables. + */ + test_op = get_btree_test_op(pred_op, clause_op, refute_it); + + if (!OidIsValid(test_op)) + { + /* couldn't find a suitable comparison operator */ + return false; + } + + /* + * Evaluate the test. For this we need an EState. + */ + estate = CreateExecutorState(); + + /* We can use the estate's working context to avoid memory leaks. */ + oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); + + /* Build expression tree */ + test_expr = make_opclause(test_op, + BOOLOID, + false, + (Expr *) pred_const, + (Expr *) clause_const, + InvalidOid, + pred_collation); + + /* Fill in opfuncids */ + fix_opfuncids((Node *) test_expr); + + /* Prepare it for execution */ + test_exprstate = ExecInitExpr(test_expr, NULL); + + /* And execute it. */ + test_result = ExecEvalExprSwitchContext(test_exprstate, + GetPerTupleExprContext(estate), + &isNull); + + /* Get back to outer memory context */ + MemoryContextSwitchTo(oldcontext); + + /* Release all the junk we just created */ + FreeExecutorState(estate); + + if (isNull) + { + /* Treat a null result as non-proof ... but it's a tad fishy ... */ + elog(DEBUG2, "null predicate test result"); + return false; + } + return DatumGetBool(test_result); +} + + +/* + * operator_same_subexprs_proof + * Assuming that EXPR1 clause_op EXPR2 is true, try to prove or refute + * EXPR1 pred_op EXPR2. + * + * Return true if able to make the proof, false if not able to prove it. + */ +static bool +operator_same_subexprs_proof(Oid pred_op, Oid clause_op, bool refute_it) +{ + /* + * A simple and general rule is that the predicate is proven if clause_op + * and pred_op are the same, or refuted if they are each other's negators. + * We need not check immutability since the pred_op is already known + * immutable. (Actually, by this point we may have the commutator of a + * known-immutable pred_op, but that should certainly be immutable too. + * Likewise we don't worry whether the pred_op's negator is immutable.) + * + * Note: the "same" case won't get here if we actually had EXPR1 clause_op + * EXPR2 and EXPR1 pred_op EXPR2, because the overall-expression-equality + * test in predicate_implied_by_simple_clause would have caught it. But + * we can see the same operator after having commuted the pred_op. + */ + if (refute_it) + { + if (get_negator(pred_op) == clause_op) + return true; + } + else + { + if (pred_op == clause_op) + return true; + } + + /* + * Otherwise, see if we can determine the implication by finding the + * operators' relationship via some btree opfamily. + */ + return operator_same_subexprs_lookup(pred_op, clause_op, refute_it); +} + + +/* + * We use a lookaside table to cache the result of btree proof operator + * lookups, since the actual lookup is pretty expensive and doesn't change + * for any given pair of operators (at least as long as pg_amop doesn't + * change). A single hash entry stores both implication and refutation + * results for a given pair of operators; but note we may have determined + * only one of those sets of results as yet. + */ +typedef struct OprProofCacheKey +{ + Oid pred_op; /* predicate operator */ + Oid clause_op; /* clause operator */ +} OprProofCacheKey; + +typedef struct OprProofCacheEntry +{ + /* the hash lookup key MUST BE FIRST */ + OprProofCacheKey key; + + bool have_implic; /* do we know the implication result? */ + bool have_refute; /* do we know the refutation result? */ + bool same_subexprs_implies; /* X clause_op Y implies X pred_op Y? */ + bool same_subexprs_refutes; /* X clause_op Y refutes X pred_op Y? */ + Oid implic_test_op; /* OID of the test operator, or 0 if none */ + Oid refute_test_op; /* OID of the test operator, or 0 if none */ +} OprProofCacheEntry; + +static HTAB *OprProofCacheHash = NULL; + + +/* + * lookup_proof_cache + * Get, and fill in if necessary, the appropriate cache entry. + */ +static OprProofCacheEntry * +lookup_proof_cache(Oid pred_op, Oid clause_op, bool refute_it) +{ + OprProofCacheKey key; + OprProofCacheEntry *cache_entry; + bool cfound; + bool same_subexprs = false; + Oid test_op = InvalidOid; + bool found = false; + List *pred_op_infos, + *clause_op_infos; + ListCell *lcp, + *lcc; + + /* + * Find or make a cache entry for this pair of operators. + */ + if (OprProofCacheHash == NULL) + { + /* First time through: initialize the hash table */ + HASHCTL ctl; + + ctl.keysize = sizeof(OprProofCacheKey); + ctl.entrysize = sizeof(OprProofCacheEntry); + OprProofCacheHash = hash_create("Btree proof lookup cache", 256, + &ctl, HASH_ELEM | HASH_BLOBS); + + /* Arrange to flush cache on pg_amop changes */ + CacheRegisterSyscacheCallback(AMOPOPID, + InvalidateOprProofCacheCallBack, + (Datum) 0); + } + + key.pred_op = pred_op; + key.clause_op = clause_op; + cache_entry = (OprProofCacheEntry *) hash_search(OprProofCacheHash, + (void *) &key, + HASH_ENTER, &cfound); + if (!cfound) + { + /* new cache entry, set it invalid */ + cache_entry->have_implic = false; + cache_entry->have_refute = false; + } + else + { + /* pre-existing cache entry, see if we know the answer yet */ + if (refute_it ? cache_entry->have_refute : cache_entry->have_implic) + return cache_entry; + } + + /* + * Try to find a btree opfamily containing the given operators. + * + * We must find a btree opfamily that contains both operators, else the + * implication can't be determined. Also, the opfamily must contain a + * suitable test operator taking the operators' righthand datatypes. + * + * If there are multiple matching opfamilies, assume we can use any one to + * determine the logical relationship of the two operators and the correct + * corresponding test operator. This should work for any logically + * consistent opfamilies. + * + * Note that we can determine the operators' relationship for + * same-subexprs cases even from an opfamily that lacks a usable test + * operator. This can happen in cases with incomplete sets of cross-type + * comparison operators. + */ + clause_op_infos = get_op_btree_interpretation(clause_op); + if (clause_op_infos) + pred_op_infos = get_op_btree_interpretation(pred_op); + else /* no point in looking */ + pred_op_infos = NIL; + + foreach(lcp, pred_op_infos) + { + OpBtreeInterpretation *pred_op_info = lfirst(lcp); + Oid opfamily_id = pred_op_info->opfamily_id; + + foreach(lcc, clause_op_infos) + { + OpBtreeInterpretation *clause_op_info = lfirst(lcc); + StrategyNumber pred_strategy, + clause_strategy, + test_strategy; + + /* Must find them in same opfamily */ + if (opfamily_id != clause_op_info->opfamily_id) + continue; + /* Lefttypes should match */ + Assert(clause_op_info->oplefttype == pred_op_info->oplefttype); + + pred_strategy = pred_op_info->strategy; + clause_strategy = clause_op_info->strategy; + + /* + * Check to see if we can make a proof for same-subexpressions + * cases based on the operators' relationship in this opfamily. + */ + if (refute_it) + same_subexprs |= BT_refutes_table[clause_strategy - 1][pred_strategy - 1]; + else + same_subexprs |= BT_implies_table[clause_strategy - 1][pred_strategy - 1]; + + /* + * Look up the "test" strategy number in the implication table + */ + if (refute_it) + test_strategy = BT_refute_table[clause_strategy - 1][pred_strategy - 1]; + else + test_strategy = BT_implic_table[clause_strategy - 1][pred_strategy - 1]; + + if (test_strategy == 0) + { + /* Can't determine implication using this interpretation */ + continue; + } + + /* + * See if opfamily has an operator for the test strategy and the + * datatypes. + */ + if (test_strategy == BTNE) + { + test_op = get_opfamily_member(opfamily_id, + pred_op_info->oprighttype, + clause_op_info->oprighttype, + BTEqualStrategyNumber); + if (OidIsValid(test_op)) + test_op = get_negator(test_op); + } + else + { + test_op = get_opfamily_member(opfamily_id, + pred_op_info->oprighttype, + clause_op_info->oprighttype, + test_strategy); + } + + if (!OidIsValid(test_op)) + continue; + + /* + * Last check: test_op must be immutable. + * + * Note that we require only the test_op to be immutable, not the + * original clause_op. (pred_op is assumed to have been checked + * immutable by the caller.) Essentially we are assuming that the + * opfamily is consistent even if it contains operators that are + * merely stable. + */ + if (op_volatile(test_op) == PROVOLATILE_IMMUTABLE) + { + found = true; + break; + } + } + + if (found) + break; + } + + list_free_deep(pred_op_infos); + list_free_deep(clause_op_infos); + + if (!found) + { + /* couldn't find a suitable comparison operator */ + test_op = InvalidOid; + } + + /* + * If we think we were able to prove something about same-subexpressions + * cases, check to make sure the clause_op is immutable before believing + * it completely. (Usually, the clause_op would be immutable if the + * pred_op is, but it's not entirely clear that this must be true in all + * cases, so let's check.) + */ + if (same_subexprs && + op_volatile(clause_op) != PROVOLATILE_IMMUTABLE) + same_subexprs = false; + + /* Cache the results, whether positive or negative */ + if (refute_it) + { + cache_entry->refute_test_op = test_op; + cache_entry->same_subexprs_refutes = same_subexprs; + cache_entry->have_refute = true; + } + else + { + cache_entry->implic_test_op = test_op; + cache_entry->same_subexprs_implies = same_subexprs; + cache_entry->have_implic = true; + } + + return cache_entry; +} + +/* + * operator_same_subexprs_lookup + * Convenience subroutine to look up the cached answer for + * same-subexpressions cases. + */ +static bool +operator_same_subexprs_lookup(Oid pred_op, Oid clause_op, bool refute_it) +{ + OprProofCacheEntry *cache_entry; + + cache_entry = lookup_proof_cache(pred_op, clause_op, refute_it); + if (refute_it) + return cache_entry->same_subexprs_refutes; + else + return cache_entry->same_subexprs_implies; +} + +/* + * get_btree_test_op + * Identify the comparison operator needed for a btree-operator + * proof or refutation involving comparison of constants. + * + * Given the truth of a clause "var clause_op const1", we are attempting to + * prove or refute a predicate "var pred_op const2". The identities of the + * two operators are sufficient to determine the operator (if any) to compare + * const2 to const1 with. + * + * Returns the OID of the operator to use, or InvalidOid if no proof is + * possible. + */ +static Oid +get_btree_test_op(Oid pred_op, Oid clause_op, bool refute_it) +{ + OprProofCacheEntry *cache_entry; + + cache_entry = lookup_proof_cache(pred_op, clause_op, refute_it); + if (refute_it) + return cache_entry->refute_test_op; + else + return cache_entry->implic_test_op; +} + + +/* + * Callback for pg_amop inval events + */ +static void +InvalidateOprProofCacheCallBack(Datum arg, int cacheid, uint32 hashvalue) +{ + HASH_SEQ_STATUS status; + OprProofCacheEntry *hentry; + + Assert(OprProofCacheHash != NULL); + + /* Currently we just reset all entries; hard to be smarter ... */ + hash_seq_init(&status, OprProofCacheHash); + + while ((hentry = (OprProofCacheEntry *) hash_seq_search(&status)) != NULL) + { + hentry->have_implic = false; + hentry->have_refute = false; + } +} diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c new file mode 100644 index 0000000..3c75fd5 --- /dev/null +++ b/src/backend/optimizer/util/relnode.c @@ -0,0 +1,2047 @@ +/*------------------------------------------------------------------------- + * + * relnode.c + * Relation-node lookup/construction routines + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/relnode.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/appendinfo.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/inherit.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/placeholder.h" +#include "optimizer/plancat.h" +#include "optimizer/restrictinfo.h" +#include "optimizer/tlist.h" +#include "utils/hsearch.h" +#include "utils/lsyscache.h" + + +typedef struct JoinHashEntry +{ + Relids join_relids; /* hash key --- MUST BE FIRST */ + RelOptInfo *join_rel; +} JoinHashEntry; + +static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *input_rel); +static List *build_joinrel_restrictlist(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); +static void build_joinrel_joinlist(RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); +static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel, + List *joininfo_list, + List *new_restrictlist); +static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel, + List *joininfo_list, + List *new_joininfo); +static void set_foreign_rel_properties(RelOptInfo *joinrel, + RelOptInfo *outer_rel, RelOptInfo *inner_rel); +static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel); +static void build_joinrel_partition_info(RelOptInfo *joinrel, + RelOptInfo *outer_rel, RelOptInfo *inner_rel, + List *restrictlist, JoinType jointype); +static bool have_partkey_equi_join(RelOptInfo *joinrel, + RelOptInfo *rel1, RelOptInfo *rel2, + JoinType jointype, List *restrictlist); +static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, + bool strict_op); +static void set_joinrel_partition_key_exprs(RelOptInfo *joinrel, + RelOptInfo *outer_rel, RelOptInfo *inner_rel, + JoinType jointype); +static void build_child_join_reltarget(PlannerInfo *root, + RelOptInfo *parentrel, + RelOptInfo *childrel, + int nappinfos, + AppendRelInfo **appinfos); + + +/* + * setup_simple_rel_arrays + * Prepare the arrays we use for quickly accessing base relations + * and AppendRelInfos. + */ +void +setup_simple_rel_arrays(PlannerInfo *root) +{ + int size; + Index rti; + ListCell *lc; + + /* Arrays are accessed using RT indexes (1..N) */ + size = list_length(root->parse->rtable) + 1; + root->simple_rel_array_size = size; + + /* + * simple_rel_array is initialized to all NULLs, since no RelOptInfos + * exist yet. It'll be filled by later calls to build_simple_rel(). + */ + root->simple_rel_array = (RelOptInfo **) + palloc0(size * sizeof(RelOptInfo *)); + + /* simple_rte_array is an array equivalent of the rtable list */ + root->simple_rte_array = (RangeTblEntry **) + palloc0(size * sizeof(RangeTblEntry *)); + rti = 1; + foreach(lc, root->parse->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + root->simple_rte_array[rti++] = rte; + } + + /* append_rel_array is not needed if there are no AppendRelInfos */ + if (root->append_rel_list == NIL) + { + root->append_rel_array = NULL; + return; + } + + root->append_rel_array = (AppendRelInfo **) + palloc0(size * sizeof(AppendRelInfo *)); + + /* + * append_rel_array is filled with any already-existing AppendRelInfos, + * which currently could only come from UNION ALL flattening. We might + * add more later during inheritance expansion, but it's the + * responsibility of the expansion code to update the array properly. + */ + foreach(lc, root->append_rel_list) + { + AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc); + int child_relid = appinfo->child_relid; + + /* Sanity check */ + Assert(child_relid < size); + + if (root->append_rel_array[child_relid]) + elog(ERROR, "child relation already exists"); + + root->append_rel_array[child_relid] = appinfo; + } +} + +/* + * expand_planner_arrays + * Expand the PlannerInfo's per-RTE arrays by add_size members + * and initialize the newly added entries to NULLs + * + * Note: this causes the append_rel_array to become allocated even if + * it was not before. This is okay for current uses, because we only call + * this when adding child relations, which always have AppendRelInfos. + */ +void +expand_planner_arrays(PlannerInfo *root, int add_size) +{ + int new_size; + + Assert(add_size > 0); + + new_size = root->simple_rel_array_size + add_size; + + root->simple_rel_array = (RelOptInfo **) + repalloc(root->simple_rel_array, + sizeof(RelOptInfo *) * new_size); + MemSet(root->simple_rel_array + root->simple_rel_array_size, + 0, sizeof(RelOptInfo *) * add_size); + + root->simple_rte_array = (RangeTblEntry **) + repalloc(root->simple_rte_array, + sizeof(RangeTblEntry *) * new_size); + MemSet(root->simple_rte_array + root->simple_rel_array_size, + 0, sizeof(RangeTblEntry *) * add_size); + + if (root->append_rel_array) + { + root->append_rel_array = (AppendRelInfo **) + repalloc(root->append_rel_array, + sizeof(AppendRelInfo *) * new_size); + MemSet(root->append_rel_array + root->simple_rel_array_size, + 0, sizeof(AppendRelInfo *) * add_size); + } + else + { + root->append_rel_array = (AppendRelInfo **) + palloc0(sizeof(AppendRelInfo *) * new_size); + } + + root->simple_rel_array_size = new_size; +} + +/* + * build_simple_rel + * Construct a new RelOptInfo for a base relation or 'other' relation. + */ +RelOptInfo * +build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) +{ + RelOptInfo *rel; + RangeTblEntry *rte; + + /* Rel should not exist already */ + Assert(relid > 0 && relid < root->simple_rel_array_size); + if (root->simple_rel_array[relid] != NULL) + elog(ERROR, "rel %d already exists", relid); + + /* Fetch RTE for relation */ + rte = root->simple_rte_array[relid]; + Assert(rte != NULL); + + rel = makeNode(RelOptInfo); + rel->reloptkind = parent ? RELOPT_OTHER_MEMBER_REL : RELOPT_BASEREL; + rel->relids = bms_make_singleton(relid); + rel->rows = 0; + /* cheap startup cost is interesting iff not all tuples to be retrieved */ + rel->consider_startup = (root->tuple_fraction > 0); + rel->consider_param_startup = false; /* might get changed later */ + rel->consider_parallel = false; /* might get changed later */ + rel->reltarget = create_empty_pathtarget(); + rel->pathlist = NIL; + rel->ppilist = NIL; + rel->partial_pathlist = NIL; + rel->cheapest_startup_path = NULL; + rel->cheapest_total_path = NULL; + rel->cheapest_unique_path = NULL; + rel->cheapest_parameterized_paths = NIL; + rel->relid = relid; + rel->rtekind = rte->rtekind; + /* min_attr, max_attr, attr_needed, attr_widths are set below */ + rel->lateral_vars = NIL; + rel->indexlist = NIL; + rel->statlist = NIL; + rel->pages = 0; + rel->tuples = 0; + rel->allvisfrac = 0; + rel->eclass_indexes = NULL; + rel->subroot = NULL; + rel->subplan_params = NIL; + rel->rel_parallel_workers = -1; /* set up in get_relation_info */ + rel->amflags = 0; + rel->serverid = InvalidOid; + rel->userid = rte->checkAsUser; + rel->useridiscurrent = false; + rel->fdwroutine = NULL; + rel->fdw_private = NULL; + rel->unique_for_rels = NIL; + rel->non_unique_for_rels = NIL; + rel->baserestrictinfo = NIL; + rel->baserestrictcost.startup = 0; + rel->baserestrictcost.per_tuple = 0; + rel->baserestrict_min_security = UINT_MAX; + rel->joininfo = NIL; + rel->has_eclass_joins = false; + rel->consider_partitionwise_join = false; /* might get changed later */ + rel->part_scheme = NULL; + rel->nparts = -1; + rel->boundinfo = NULL; + rel->partbounds_merged = false; + rel->partition_qual = NIL; + rel->part_rels = NULL; + rel->live_parts = NULL; + rel->all_partrels = NULL; + rel->partexprs = NULL; + rel->nullable_partexprs = NULL; + + /* + * Pass assorted information down the inheritance hierarchy. + */ + if (parent) + { + /* + * Each direct or indirect child wants to know the relids of its + * topmost parent. + */ + if (parent->top_parent_relids) + rel->top_parent_relids = parent->top_parent_relids; + else + rel->top_parent_relids = bms_copy(parent->relids); + + /* + * Also propagate lateral-reference information from appendrel parent + * rels to their child rels. We intentionally give each child rel the + * same minimum parameterization, even though it's quite possible that + * some don't reference all the lateral rels. This is because any + * append path for the parent will have to have the same + * parameterization for every child anyway, and there's no value in + * forcing extra reparameterize_path() calls. Similarly, a lateral + * reference to the parent prevents use of otherwise-movable join rels + * for each child. + * + * It's possible for child rels to have their own children, in which + * case the topmost parent's lateral info propagates all the way down. + */ + rel->direct_lateral_relids = parent->direct_lateral_relids; + rel->lateral_relids = parent->lateral_relids; + rel->lateral_referencers = parent->lateral_referencers; + } + else + { + rel->top_parent_relids = NULL; + rel->direct_lateral_relids = NULL; + rel->lateral_relids = NULL; + rel->lateral_referencers = NULL; + } + + /* Check type of rtable entry */ + switch (rte->rtekind) + { + case RTE_RELATION: + /* Table --- retrieve statistics from the system catalogs */ + get_relation_info(root, rte->relid, rte->inh, rel); + break; + case RTE_SUBQUERY: + case RTE_FUNCTION: + case RTE_TABLEFUNC: + case RTE_VALUES: + case RTE_CTE: + case RTE_NAMEDTUPLESTORE: + + /* + * Subquery, function, tablefunc, values list, CTE, or ENR --- set + * up attr range and arrays + * + * Note: 0 is included in range to support whole-row Vars + */ + rel->min_attr = 0; + rel->max_attr = list_length(rte->eref->colnames); + rel->attr_needed = (Relids *) + palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids)); + rel->attr_widths = (int32 *) + palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32)); + break; + case RTE_RESULT: + /* RTE_RESULT has no columns, nor could it have whole-row Var */ + rel->min_attr = 0; + rel->max_attr = -1; + rel->attr_needed = NULL; + rel->attr_widths = NULL; + break; + default: + elog(ERROR, "unrecognized RTE kind: %d", + (int) rte->rtekind); + break; + } + + /* + * Copy the parent's quals to the child, with appropriate substitution of + * variables. If any constant false or NULL clauses turn up, we can mark + * the child as dummy right away. (We must do this immediately so that + * pruning works correctly when recursing in expand_partitioned_rtentry.) + */ + if (parent) + { + AppendRelInfo *appinfo = root->append_rel_array[relid]; + + Assert(appinfo != NULL); + if (!apply_child_basequals(root, parent, rel, rte, appinfo)) + { + /* + * Some restriction clause reduced to constant FALSE or NULL after + * substitution, so this child need not be scanned. + */ + mark_dummy_rel(rel); + } + } + + /* Save the finished struct in the query's simple_rel_array */ + root->simple_rel_array[relid] = rel; + + return rel; +} + +/* + * find_base_rel + * Find a base or other relation entry, which must already exist. + */ +RelOptInfo * +find_base_rel(PlannerInfo *root, int relid) +{ + RelOptInfo *rel; + + Assert(relid > 0); + + if (relid < root->simple_rel_array_size) + { + rel = root->simple_rel_array[relid]; + if (rel) + return rel; + } + + elog(ERROR, "no relation entry for relid %d", relid); + + return NULL; /* keep compiler quiet */ +} + +/* + * build_join_rel_hash + * Construct the auxiliary hash table for join relations. + */ +static void +build_join_rel_hash(PlannerInfo *root) +{ + HTAB *hashtab; + HASHCTL hash_ctl; + ListCell *l; + + /* Create the hash table */ + hash_ctl.keysize = sizeof(Relids); + hash_ctl.entrysize = sizeof(JoinHashEntry); + hash_ctl.hash = bitmap_hash; + hash_ctl.match = bitmap_match; + hash_ctl.hcxt = CurrentMemoryContext; + hashtab = hash_create("JoinRelHashTable", + 256L, + &hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); + + /* Insert all the already-existing joinrels */ + foreach(l, root->join_rel_list) + { + RelOptInfo *rel = (RelOptInfo *) lfirst(l); + JoinHashEntry *hentry; + bool found; + + hentry = (JoinHashEntry *) hash_search(hashtab, + &(rel->relids), + HASH_ENTER, + &found); + Assert(!found); + hentry->join_rel = rel; + } + + root->join_rel_hash = hashtab; +} + +/* + * find_join_rel + * Returns relation entry corresponding to 'relids' (a set of RT indexes), + * or NULL if none exists. This is for join relations. + */ +RelOptInfo * +find_join_rel(PlannerInfo *root, Relids relids) +{ + /* + * Switch to using hash lookup when list grows "too long". The threshold + * is arbitrary and is known only here. + */ + if (!root->join_rel_hash && list_length(root->join_rel_list) > 32) + build_join_rel_hash(root); + + /* + * Use either hashtable lookup or linear search, as appropriate. + * + * Note: the seemingly redundant hashkey variable is used to avoid taking + * the address of relids; unless the compiler is exceedingly smart, doing + * so would force relids out of a register and thus probably slow down the + * list-search case. + */ + if (root->join_rel_hash) + { + Relids hashkey = relids; + JoinHashEntry *hentry; + + hentry = (JoinHashEntry *) hash_search(root->join_rel_hash, + &hashkey, + HASH_FIND, + NULL); + if (hentry) + return hentry->join_rel; + } + else + { + ListCell *l; + + foreach(l, root->join_rel_list) + { + RelOptInfo *rel = (RelOptInfo *) lfirst(l); + + if (bms_equal(rel->relids, relids)) + return rel; + } + } + + return NULL; +} + +/* + * set_foreign_rel_properties + * Set up foreign-join fields if outer and inner relation are foreign + * tables (or joins) belonging to the same server and assigned to the same + * user to check access permissions as. + * + * In addition to an exact match of userid, we allow the case where one side + * has zero userid (implying current user) and the other side has explicit + * userid that happens to equal the current user; but in that case, pushdown of + * the join is only valid for the current user. The useridiscurrent field + * records whether we had to make such an assumption for this join or any + * sub-join. + * + * Otherwise these fields are left invalid, so GetForeignJoinPaths will not be + * called for the join relation. + * + */ +static void +set_foreign_rel_properties(RelOptInfo *joinrel, RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + if (OidIsValid(outer_rel->serverid) && + inner_rel->serverid == outer_rel->serverid) + { + if (inner_rel->userid == outer_rel->userid) + { + joinrel->serverid = outer_rel->serverid; + joinrel->userid = outer_rel->userid; + joinrel->useridiscurrent = outer_rel->useridiscurrent || inner_rel->useridiscurrent; + joinrel->fdwroutine = outer_rel->fdwroutine; + } + else if (!OidIsValid(inner_rel->userid) && + outer_rel->userid == GetUserId()) + { + joinrel->serverid = outer_rel->serverid; + joinrel->userid = outer_rel->userid; + joinrel->useridiscurrent = true; + joinrel->fdwroutine = outer_rel->fdwroutine; + } + else if (!OidIsValid(outer_rel->userid) && + inner_rel->userid == GetUserId()) + { + joinrel->serverid = outer_rel->serverid; + joinrel->userid = inner_rel->userid; + joinrel->useridiscurrent = true; + joinrel->fdwroutine = outer_rel->fdwroutine; + } + } +} + +/* + * add_join_rel + * Add given join relation to the list of join relations in the given + * PlannerInfo. Also add it to the auxiliary hashtable if there is one. + */ +static void +add_join_rel(PlannerInfo *root, RelOptInfo *joinrel) +{ + /* GEQO requires us to append the new joinrel to the end of the list! */ + root->join_rel_list = lappend(root->join_rel_list, joinrel); + + /* store it into the auxiliary hashtable if there is one. */ + if (root->join_rel_hash) + { + JoinHashEntry *hentry; + bool found; + + hentry = (JoinHashEntry *) hash_search(root->join_rel_hash, + &(joinrel->relids), + HASH_ENTER, + &found); + Assert(!found); + hentry->join_rel = joinrel; + } +} + +/* + * build_join_rel + * Returns relation entry corresponding to the union of two given rels, + * creating a new relation entry if none already exists. + * + * 'joinrelids' is the Relids set that uniquely identifies the join + * 'outer_rel' and 'inner_rel' are relation nodes for the relations to be + * joined + * 'sjinfo': join context info + * 'restrictlist_ptr': result variable. If not NULL, *restrictlist_ptr + * receives the list of RestrictInfo nodes that apply to this + * particular pair of joinable relations. + * + * restrictlist_ptr makes the routine's API a little grotty, but it saves + * duplicated calculation of the restrictlist... + */ +RelOptInfo * +build_join_rel(PlannerInfo *root, + Relids joinrelids, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + SpecialJoinInfo *sjinfo, + List **restrictlist_ptr) +{ + RelOptInfo *joinrel; + List *restrictlist; + + /* This function should be used only for join between parents. */ + Assert(!IS_OTHER_REL(outer_rel) && !IS_OTHER_REL(inner_rel)); + + /* + * See if we already have a joinrel for this set of base rels. + */ + joinrel = find_join_rel(root, joinrelids); + + if (joinrel) + { + /* + * Yes, so we only need to figure the restrictlist for this particular + * pair of component relations. + */ + if (restrictlist_ptr) + *restrictlist_ptr = build_joinrel_restrictlist(root, + joinrel, + outer_rel, + inner_rel); + return joinrel; + } + + /* + * Nope, so make one. + */ + joinrel = makeNode(RelOptInfo); + joinrel->reloptkind = RELOPT_JOINREL; + joinrel->relids = bms_copy(joinrelids); + joinrel->rows = 0; + /* cheap startup cost is interesting iff not all tuples to be retrieved */ + joinrel->consider_startup = (root->tuple_fraction > 0); + joinrel->consider_param_startup = false; + joinrel->consider_parallel = false; + joinrel->reltarget = create_empty_pathtarget(); + joinrel->pathlist = NIL; + joinrel->ppilist = NIL; + joinrel->partial_pathlist = NIL; + joinrel->cheapest_startup_path = NULL; + joinrel->cheapest_total_path = NULL; + joinrel->cheapest_unique_path = NULL; + joinrel->cheapest_parameterized_paths = NIL; + /* init direct_lateral_relids from children; we'll finish it up below */ + joinrel->direct_lateral_relids = + bms_union(outer_rel->direct_lateral_relids, + inner_rel->direct_lateral_relids); + joinrel->lateral_relids = min_join_parameterization(root, joinrel->relids, + outer_rel, inner_rel); + joinrel->relid = 0; /* indicates not a baserel */ + joinrel->rtekind = RTE_JOIN; + joinrel->min_attr = 0; + joinrel->max_attr = 0; + joinrel->attr_needed = NULL; + joinrel->attr_widths = NULL; + joinrel->lateral_vars = NIL; + joinrel->lateral_referencers = NULL; + joinrel->indexlist = NIL; + joinrel->statlist = NIL; + joinrel->pages = 0; + joinrel->tuples = 0; + joinrel->allvisfrac = 0; + joinrel->eclass_indexes = NULL; + joinrel->subroot = NULL; + joinrel->subplan_params = NIL; + joinrel->rel_parallel_workers = -1; + joinrel->amflags = 0; + joinrel->serverid = InvalidOid; + joinrel->userid = InvalidOid; + joinrel->useridiscurrent = false; + joinrel->fdwroutine = NULL; + joinrel->fdw_private = NULL; + joinrel->unique_for_rels = NIL; + joinrel->non_unique_for_rels = NIL; + joinrel->baserestrictinfo = NIL; + joinrel->baserestrictcost.startup = 0; + joinrel->baserestrictcost.per_tuple = 0; + joinrel->baserestrict_min_security = UINT_MAX; + joinrel->joininfo = NIL; + joinrel->has_eclass_joins = false; + joinrel->consider_partitionwise_join = false; /* might get changed later */ + joinrel->top_parent_relids = NULL; + joinrel->part_scheme = NULL; + joinrel->nparts = -1; + joinrel->boundinfo = NULL; + joinrel->partbounds_merged = false; + joinrel->partition_qual = NIL; + joinrel->part_rels = NULL; + joinrel->live_parts = NULL; + joinrel->all_partrels = NULL; + joinrel->partexprs = NULL; + joinrel->nullable_partexprs = NULL; + + /* Compute information relevant to the foreign relations. */ + set_foreign_rel_properties(joinrel, outer_rel, inner_rel); + + /* + * Create a new tlist containing just the vars that need to be output from + * this join (ie, are needed for higher joinclauses or final output). + * + * NOTE: the tlist order for a join rel will depend on which pair of outer + * and inner rels we first try to build it from. But the contents should + * be the same regardless. + */ + build_joinrel_tlist(root, joinrel, outer_rel); + build_joinrel_tlist(root, joinrel, inner_rel); + add_placeholders_to_joinrel(root, joinrel, outer_rel, inner_rel); + + /* + * add_placeholders_to_joinrel also took care of adding the ph_lateral + * sets of any PlaceHolderVars computed here to direct_lateral_relids, so + * now we can finish computing that. This is much like the computation of + * the transitively-closed lateral_relids in min_join_parameterization, + * except that here we *do* have to consider the added PHVs. + */ + joinrel->direct_lateral_relids = + bms_del_members(joinrel->direct_lateral_relids, joinrel->relids); + if (bms_is_empty(joinrel->direct_lateral_relids)) + joinrel->direct_lateral_relids = NULL; + + /* + * Construct restrict and join clause lists for the new joinrel. (The + * caller might or might not need the restrictlist, but I need it anyway + * for set_joinrel_size_estimates().) + */ + restrictlist = build_joinrel_restrictlist(root, joinrel, + outer_rel, inner_rel); + if (restrictlist_ptr) + *restrictlist_ptr = restrictlist; + build_joinrel_joinlist(joinrel, outer_rel, inner_rel); + + /* + * This is also the right place to check whether the joinrel has any + * pending EquivalenceClass joins. + */ + joinrel->has_eclass_joins = has_relevant_eclass_joinclause(root, joinrel); + + /* Store the partition information. */ + build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist, + sjinfo->jointype); + + /* + * Set estimates of the joinrel's size. + */ + set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, + sjinfo, restrictlist); + + /* + * Set the consider_parallel flag if this joinrel could potentially be + * scanned within a parallel worker. If this flag is false for either + * inner_rel or outer_rel, then it must be false for the joinrel also. + * Even if both are true, there might be parallel-restricted expressions + * in the targetlist or quals. + * + * Note that if there are more than two rels in this relation, they could + * be divided between inner_rel and outer_rel in any arbitrary way. We + * assume this doesn't matter, because we should hit all the same baserels + * and joinclauses while building up to this joinrel no matter which we + * take; therefore, we should make the same decision here however we get + * here. + */ + if (inner_rel->consider_parallel && outer_rel->consider_parallel && + is_parallel_safe(root, (Node *) restrictlist) && + is_parallel_safe(root, (Node *) joinrel->reltarget->exprs)) + joinrel->consider_parallel = true; + + /* Add the joinrel to the PlannerInfo. */ + add_join_rel(root, joinrel); + + /* + * Also, if dynamic-programming join search is active, add the new joinrel + * to the appropriate sublist. Note: you might think the Assert on number + * of members should be for equality, but some of the level 1 rels might + * have been joinrels already, so we can only assert <=. + */ + if (root->join_rel_level) + { + Assert(root->join_cur_level > 0); + Assert(root->join_cur_level <= bms_num_members(joinrel->relids)); + root->join_rel_level[root->join_cur_level] = + lappend(root->join_rel_level[root->join_cur_level], joinrel); + } + + return joinrel; +} + +/* + * build_child_join_rel + * Builds RelOptInfo representing join between given two child relations. + * + * 'outer_rel' and 'inner_rel' are the RelOptInfos of child relations being + * joined + * 'parent_joinrel' is the RelOptInfo representing the join between parent + * relations. Some of the members of new RelOptInfo are produced by + * translating corresponding members of this RelOptInfo + * 'sjinfo': child-join context info + * 'restrictlist': list of RestrictInfo nodes that apply to this particular + * pair of joinable relations + * 'jointype' is the join type (inner, left, full, etc) + */ +RelOptInfo * +build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, + RelOptInfo *inner_rel, RelOptInfo *parent_joinrel, + List *restrictlist, SpecialJoinInfo *sjinfo, + JoinType jointype) +{ + RelOptInfo *joinrel = makeNode(RelOptInfo); + AppendRelInfo **appinfos; + int nappinfos; + + /* Only joins between "other" relations land here. */ + Assert(IS_OTHER_REL(outer_rel) && IS_OTHER_REL(inner_rel)); + + /* The parent joinrel should have consider_partitionwise_join set. */ + Assert(parent_joinrel->consider_partitionwise_join); + + joinrel->reloptkind = RELOPT_OTHER_JOINREL; + joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids); + joinrel->rows = 0; + /* cheap startup cost is interesting iff not all tuples to be retrieved */ + joinrel->consider_startup = (root->tuple_fraction > 0); + joinrel->consider_param_startup = false; + joinrel->consider_parallel = false; + joinrel->reltarget = create_empty_pathtarget(); + joinrel->pathlist = NIL; + joinrel->ppilist = NIL; + joinrel->partial_pathlist = NIL; + joinrel->cheapest_startup_path = NULL; + joinrel->cheapest_total_path = NULL; + joinrel->cheapest_unique_path = NULL; + joinrel->cheapest_parameterized_paths = NIL; + joinrel->direct_lateral_relids = NULL; + joinrel->lateral_relids = NULL; + joinrel->relid = 0; /* indicates not a baserel */ + joinrel->rtekind = RTE_JOIN; + joinrel->min_attr = 0; + joinrel->max_attr = 0; + joinrel->attr_needed = NULL; + joinrel->attr_widths = NULL; + joinrel->lateral_vars = NIL; + joinrel->lateral_referencers = NULL; + joinrel->indexlist = NIL; + joinrel->pages = 0; + joinrel->tuples = 0; + joinrel->allvisfrac = 0; + joinrel->eclass_indexes = NULL; + joinrel->subroot = NULL; + joinrel->subplan_params = NIL; + joinrel->amflags = 0; + joinrel->serverid = InvalidOid; + joinrel->userid = InvalidOid; + joinrel->useridiscurrent = false; + joinrel->fdwroutine = NULL; + joinrel->fdw_private = NULL; + joinrel->baserestrictinfo = NIL; + joinrel->baserestrictcost.startup = 0; + joinrel->baserestrictcost.per_tuple = 0; + joinrel->joininfo = NIL; + joinrel->has_eclass_joins = false; + joinrel->consider_partitionwise_join = false; /* might get changed later */ + joinrel->top_parent_relids = NULL; + joinrel->part_scheme = NULL; + joinrel->nparts = -1; + joinrel->boundinfo = NULL; + joinrel->partbounds_merged = false; + joinrel->partition_qual = NIL; + joinrel->part_rels = NULL; + joinrel->live_parts = NULL; + joinrel->all_partrels = NULL; + joinrel->partexprs = NULL; + joinrel->nullable_partexprs = NULL; + + joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, + inner_rel->top_parent_relids); + + /* Compute information relevant to foreign relations. */ + set_foreign_rel_properties(joinrel, outer_rel, inner_rel); + + /* Compute information needed for mapping Vars to the child rel */ + appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos); + + /* Set up reltarget struct */ + build_child_join_reltarget(root, parent_joinrel, joinrel, + nappinfos, appinfos); + + /* Construct joininfo list. */ + joinrel->joininfo = (List *) adjust_appendrel_attrs(root, + (Node *) parent_joinrel->joininfo, + nappinfos, + appinfos); + + /* + * Lateral relids referred in child join will be same as that referred in + * the parent relation. + */ + joinrel->direct_lateral_relids = (Relids) bms_copy(parent_joinrel->direct_lateral_relids); + joinrel->lateral_relids = (Relids) bms_copy(parent_joinrel->lateral_relids); + + /* + * If the parent joinrel has pending equivalence classes, so does the + * child. + */ + joinrel->has_eclass_joins = parent_joinrel->has_eclass_joins; + + /* Is the join between partitions itself partitioned? */ + build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist, + jointype); + + /* Child joinrel is parallel safe if parent is parallel safe. */ + joinrel->consider_parallel = parent_joinrel->consider_parallel; + + /* Set estimates of the child-joinrel's size. */ + set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, + sjinfo, restrictlist); + + /* We build the join only once. */ + Assert(!find_join_rel(root, joinrel->relids)); + + /* Add the relation to the PlannerInfo. */ + add_join_rel(root, joinrel); + + /* + * We might need EquivalenceClass members corresponding to the child join, + * so that we can represent sort pathkeys for it. As with children of + * baserels, we shouldn't need this unless there are relevant eclass joins + * (implying that a merge join might be possible) or pathkeys to sort by. + */ + if (joinrel->has_eclass_joins || has_useful_pathkeys(root, parent_joinrel)) + add_child_join_rel_equivalences(root, + nappinfos, appinfos, + parent_joinrel, joinrel); + + pfree(appinfos); + + return joinrel; +} + +/* + * min_join_parameterization + * + * Determine the minimum possible parameterization of a joinrel, that is, the + * set of other rels it contains LATERAL references to. We save this value in + * the join's RelOptInfo. This function is split out of build_join_rel() + * because join_is_legal() needs the value to check a prospective join. + */ +Relids +min_join_parameterization(PlannerInfo *root, + Relids joinrelids, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + Relids result; + + /* + * Basically we just need the union of the inputs' lateral_relids, less + * whatever is already in the join. + * + * It's not immediately obvious that this is a valid way to compute the + * result, because it might seem that we're ignoring possible lateral refs + * of PlaceHolderVars that are due to be computed at the join but not in + * either input. However, because create_lateral_join_info() already + * charged all such PHV refs to each member baserel of the join, they'll + * be accounted for already in the inputs' lateral_relids. Likewise, we + * do not need to worry about doing transitive closure here, because that + * was already accounted for in the original baserel lateral_relids. + */ + result = bms_union(outer_rel->lateral_relids, inner_rel->lateral_relids); + result = bms_del_members(result, joinrelids); + + /* Maintain invariant that result is exactly NULL if empty */ + if (bms_is_empty(result)) + result = NULL; + + return result; +} + +/* + * build_joinrel_tlist + * Builds a join relation's target list from an input relation. + * (This is invoked twice to handle the two input relations.) + * + * The join's targetlist includes all Vars of its member relations that + * will still be needed above the join. This subroutine adds all such + * Vars from the specified input rel's tlist to the join rel's tlist. + * + * We also compute the expected width of the join's output, making use + * of data that was cached at the baserel level by set_rel_width(). + */ +static void +build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *input_rel) +{ + Relids relids = joinrel->relids; + ListCell *vars; + + foreach(vars, input_rel->reltarget->exprs) + { + Var *var = (Var *) lfirst(vars); + + /* + * Ignore PlaceHolderVars in the input tlists; we'll make our own + * decisions about whether to copy them. + */ + if (IsA(var, PlaceHolderVar)) + continue; + + /* + * Otherwise, anything in a baserel or joinrel targetlist ought to be + * a Var. (More general cases can only appear in appendrel child + * rels, which will never be seen here.) + */ + if (!IsA(var, Var)) + elog(ERROR, "unexpected node type in rel targetlist: %d", + (int) nodeTag(var)); + + if (var->varno == ROWID_VAR) + { + /* UPDATE/DELETE/MERGE row identity vars are always needed */ + RowIdentityVarInfo *ridinfo = (RowIdentityVarInfo *) + list_nth(root->row_identity_vars, var->varattno - 1); + + joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, + var); + /* Vars have cost zero, so no need to adjust reltarget->cost */ + joinrel->reltarget->width += ridinfo->rowidwidth; + } + else + { + RelOptInfo *baserel; + int ndx; + + /* Get the Var's original base rel */ + baserel = find_base_rel(root, var->varno); + + /* Is it still needed above this joinrel? */ + ndx = var->varattno - baserel->min_attr; + if (bms_nonempty_difference(baserel->attr_needed[ndx], relids)) + { + /* Yup, add it to the output */ + joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, + var); + /* Vars have cost zero, so no need to adjust reltarget->cost */ + joinrel->reltarget->width += baserel->attr_widths[ndx]; + } + } + } +} + +/* + * build_joinrel_restrictlist + * build_joinrel_joinlist + * These routines build lists of restriction and join clauses for a + * join relation from the joininfo lists of the relations it joins. + * + * These routines are separate because the restriction list must be + * built afresh for each pair of input sub-relations we consider, whereas + * the join list need only be computed once for any join RelOptInfo. + * The join list is fully determined by the set of rels making up the + * joinrel, so we should get the same results (up to ordering) from any + * candidate pair of sub-relations. But the restriction list is whatever + * is not handled in the sub-relations, so it depends on which + * sub-relations are considered. + * + * If a join clause from an input relation refers to base rels still not + * present in the joinrel, then it is still a join clause for the joinrel; + * we put it into the joininfo list for the joinrel. Otherwise, + * the clause is now a restrict clause for the joined relation, and we + * return it to the caller of build_joinrel_restrictlist() to be stored in + * join paths made from this pair of sub-relations. (It will not need to + * be considered further up the join tree.) + * + * In many cases we will find the same RestrictInfos in both input + * relations' joinlists, so be careful to eliminate duplicates. + * Pointer equality should be a sufficient test for dups, since all + * the various joinlist entries ultimately refer to RestrictInfos + * pushed into them by distribute_restrictinfo_to_rels(). + * + * 'joinrel' is a join relation node + * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined + * to form joinrel. + * + * build_joinrel_restrictlist() returns a list of relevant restrictinfos, + * whereas build_joinrel_joinlist() stores its results in the joinrel's + * joininfo list. One or the other must accept each given clause! + * + * NB: Formerly, we made deep(!) copies of each input RestrictInfo to pass + * up to the join relation. I believe this is no longer necessary, because + * RestrictInfo nodes are no longer context-dependent. Instead, just include + * the original nodes in the lists made for the join relation. + */ +static List * +build_joinrel_restrictlist(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + List *result; + + /* + * Collect all the clauses that syntactically belong at this level, + * eliminating any duplicates (important since we will see many of the + * same clauses arriving from both input relations). + */ + result = subbuild_joinrel_restrictlist(joinrel, outer_rel->joininfo, NIL); + result = subbuild_joinrel_restrictlist(joinrel, inner_rel->joininfo, result); + + /* + * Add on any clauses derived from EquivalenceClasses. These cannot be + * redundant with the clauses in the joininfo lists, so don't bother + * checking. + */ + result = list_concat(result, + generate_join_implied_equalities(root, + joinrel->relids, + outer_rel->relids, + inner_rel)); + + return result; +} + +static void +build_joinrel_joinlist(RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + List *result; + + /* + * Collect all the clauses that syntactically belong above this level, + * eliminating any duplicates (important since we will see many of the + * same clauses arriving from both input relations). + */ + result = subbuild_joinrel_joinlist(joinrel, outer_rel->joininfo, NIL); + result = subbuild_joinrel_joinlist(joinrel, inner_rel->joininfo, result); + + joinrel->joininfo = result; +} + +static List * +subbuild_joinrel_restrictlist(RelOptInfo *joinrel, + List *joininfo_list, + List *new_restrictlist) +{ + ListCell *l; + + foreach(l, joininfo_list) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + + if (bms_is_subset(rinfo->required_relids, joinrel->relids)) + { + /* + * This clause becomes a restriction clause for the joinrel, since + * it refers to no outside rels. Add it to the list, being + * careful to eliminate duplicates. (Since RestrictInfo nodes in + * different joinlists will have been multiply-linked rather than + * copied, pointer equality should be a sufficient test.) + */ + new_restrictlist = list_append_unique_ptr(new_restrictlist, rinfo); + } + else + { + /* + * This clause is still a join clause at this level, so we ignore + * it in this routine. + */ + } + } + + return new_restrictlist; +} + +static List * +subbuild_joinrel_joinlist(RelOptInfo *joinrel, + List *joininfo_list, + List *new_joininfo) +{ + ListCell *l; + + /* Expected to be called only for join between parent relations. */ + Assert(joinrel->reloptkind == RELOPT_JOINREL); + + foreach(l, joininfo_list) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + + if (bms_is_subset(rinfo->required_relids, joinrel->relids)) + { + /* + * This clause becomes a restriction clause for the joinrel, since + * it refers to no outside rels. So we can ignore it in this + * routine. + */ + } + else + { + /* + * This clause is still a join clause at this level, so add it to + * the new joininfo list, being careful to eliminate duplicates. + * (Since RestrictInfo nodes in different joinlists will have been + * multiply-linked rather than copied, pointer equality should be + * a sufficient test.) + */ + new_joininfo = list_append_unique_ptr(new_joininfo, rinfo); + } + } + + return new_joininfo; +} + + +/* + * fetch_upper_rel + * Build a RelOptInfo describing some post-scan/join query processing, + * or return a pre-existing one if somebody already built it. + * + * An "upper" relation is identified by an UpperRelationKind and a Relids set. + * The meaning of the Relids set is not specified here, and very likely will + * vary for different relation kinds. + * + * Most of the fields in an upper-level RelOptInfo are not used and are not + * set here (though makeNode should ensure they're zeroes). We basically only + * care about fields that are of interest to add_path() and set_cheapest(). + */ +RelOptInfo * +fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids) +{ + RelOptInfo *upperrel; + ListCell *lc; + + /* + * For the moment, our indexing data structure is just a List for each + * relation kind. If we ever get so many of one kind that this stops + * working well, we can improve it. No code outside this function should + * assume anything about how to find a particular upperrel. + */ + + /* If we already made this upperrel for the query, return it */ + foreach(lc, root->upper_rels[kind]) + { + upperrel = (RelOptInfo *) lfirst(lc); + + if (bms_equal(upperrel->relids, relids)) + return upperrel; + } + + upperrel = makeNode(RelOptInfo); + upperrel->reloptkind = RELOPT_UPPER_REL; + upperrel->relids = bms_copy(relids); + + /* cheap startup cost is interesting iff not all tuples to be retrieved */ + upperrel->consider_startup = (root->tuple_fraction > 0); + upperrel->consider_param_startup = false; + upperrel->consider_parallel = false; /* might get changed later */ + upperrel->reltarget = create_empty_pathtarget(); + upperrel->pathlist = NIL; + upperrel->cheapest_startup_path = NULL; + upperrel->cheapest_total_path = NULL; + upperrel->cheapest_unique_path = NULL; + upperrel->cheapest_parameterized_paths = NIL; + + root->upper_rels[kind] = lappend(root->upper_rels[kind], upperrel); + + return upperrel; +} + + +/* + * find_childrel_parents + * Compute the set of parent relids of an appendrel child rel. + * + * Since appendrels can be nested, a child could have multiple levels of + * appendrel ancestors. This function computes a Relids set of all the + * parent relation IDs. + */ +Relids +find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) +{ + Relids result = NULL; + + Assert(rel->reloptkind == RELOPT_OTHER_MEMBER_REL); + Assert(rel->relid > 0 && rel->relid < root->simple_rel_array_size); + + do + { + AppendRelInfo *appinfo = root->append_rel_array[rel->relid]; + Index prelid = appinfo->parent_relid; + + result = bms_add_member(result, prelid); + + /* traverse up to the parent rel, loop if it's also a child rel */ + rel = find_base_rel(root, prelid); + } while (rel->reloptkind == RELOPT_OTHER_MEMBER_REL); + + Assert(rel->reloptkind == RELOPT_BASEREL); + + return result; +} + + +/* + * get_baserel_parampathinfo + * Get the ParamPathInfo for a parameterized path for a base relation, + * constructing one if we don't have one already. + * + * This centralizes estimating the rowcounts for parameterized paths. + * We need to cache those to be sure we use the same rowcount for all paths + * of the same parameterization for a given rel. This is also a convenient + * place to determine which movable join clauses the parameterized path will + * be responsible for evaluating. + */ +ParamPathInfo * +get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, + Relids required_outer) +{ + ParamPathInfo *ppi; + Relids joinrelids; + List *pclauses; + double rows; + ListCell *lc; + + /* If rel has LATERAL refs, every path for it should account for them */ + Assert(bms_is_subset(baserel->lateral_relids, required_outer)); + + /* Unparameterized paths have no ParamPathInfo */ + if (bms_is_empty(required_outer)) + return NULL; + + Assert(!bms_overlap(baserel->relids, required_outer)); + + /* If we already have a PPI for this parameterization, just return it */ + if ((ppi = find_param_path_info(baserel, required_outer))) + return ppi; + + /* + * Identify all joinclauses that are movable to this base rel given this + * parameterization. + */ + joinrelids = bms_union(baserel->relids, required_outer); + pclauses = NIL; + foreach(lc, baserel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (join_clause_is_movable_into(rinfo, + baserel->relids, + joinrelids)) + pclauses = lappend(pclauses, rinfo); + } + + /* + * Add in joinclauses generated by EquivalenceClasses, too. (These + * necessarily satisfy join_clause_is_movable_into.) + */ + pclauses = list_concat(pclauses, + generate_join_implied_equalities(root, + joinrelids, + required_outer, + baserel)); + + /* Estimate the number of rows returned by the parameterized scan */ + rows = get_parameterized_baserel_size(root, baserel, pclauses); + + /* And now we can build the ParamPathInfo */ + ppi = makeNode(ParamPathInfo); + ppi->ppi_req_outer = required_outer; + ppi->ppi_rows = rows; + ppi->ppi_clauses = pclauses; + baserel->ppilist = lappend(baserel->ppilist, ppi); + + return ppi; +} + +/* + * get_joinrel_parampathinfo + * Get the ParamPathInfo for a parameterized path for a join relation, + * constructing one if we don't have one already. + * + * This centralizes estimating the rowcounts for parameterized paths. + * We need to cache those to be sure we use the same rowcount for all paths + * of the same parameterization for a given rel. This is also a convenient + * place to determine which movable join clauses the parameterized path will + * be responsible for evaluating. + * + * outer_path and inner_path are a pair of input paths that can be used to + * construct the join, and restrict_clauses is the list of regular join + * clauses (including clauses derived from EquivalenceClasses) that must be + * applied at the join node when using these inputs. + * + * Unlike the situation for base rels, the set of movable join clauses to be + * enforced at a join varies with the selected pair of input paths, so we + * must calculate that and pass it back, even if we already have a matching + * ParamPathInfo. We handle this by adding any clauses moved down to this + * join to *restrict_clauses, which is an in/out parameter. (The addition + * is done in such a way as to not modify the passed-in List structure.) + * + * Note: when considering a nestloop join, the caller must have removed from + * restrict_clauses any movable clauses that are themselves scheduled to be + * pushed into the right-hand path. We do not do that here since it's + * unnecessary for other join types. + */ +ParamPathInfo * +get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + SpecialJoinInfo *sjinfo, + Relids required_outer, + List **restrict_clauses) +{ + ParamPathInfo *ppi; + Relids join_and_req; + Relids outer_and_req; + Relids inner_and_req; + List *pclauses; + List *eclauses; + List *dropped_ecs; + double rows; + ListCell *lc; + + /* If rel has LATERAL refs, every path for it should account for them */ + Assert(bms_is_subset(joinrel->lateral_relids, required_outer)); + + /* Unparameterized paths have no ParamPathInfo or extra join clauses */ + if (bms_is_empty(required_outer)) + return NULL; + + Assert(!bms_overlap(joinrel->relids, required_outer)); + + /* + * Identify all joinclauses that are movable to this join rel given this + * parameterization. These are the clauses that are movable into this + * join, but not movable into either input path. Treat an unparameterized + * input path as not accepting parameterized clauses (because it won't, + * per the shortcut exit above), even though the joinclause movement rules + * might allow the same clauses to be moved into a parameterized path for + * that rel. + */ + join_and_req = bms_union(joinrel->relids, required_outer); + if (outer_path->param_info) + outer_and_req = bms_union(outer_path->parent->relids, + PATH_REQ_OUTER(outer_path)); + else + outer_and_req = NULL; /* outer path does not accept parameters */ + if (inner_path->param_info) + inner_and_req = bms_union(inner_path->parent->relids, + PATH_REQ_OUTER(inner_path)); + else + inner_and_req = NULL; /* inner path does not accept parameters */ + + pclauses = NIL; + foreach(lc, joinrel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (join_clause_is_movable_into(rinfo, + joinrel->relids, + join_and_req) && + !join_clause_is_movable_into(rinfo, + outer_path->parent->relids, + outer_and_req) && + !join_clause_is_movable_into(rinfo, + inner_path->parent->relids, + inner_and_req)) + pclauses = lappend(pclauses, rinfo); + } + + /* Consider joinclauses generated by EquivalenceClasses, too */ + eclauses = generate_join_implied_equalities(root, + join_and_req, + required_outer, + joinrel); + /* We only want ones that aren't movable to lower levels */ + dropped_ecs = NIL; + foreach(lc, eclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* + * In principle, join_clause_is_movable_into() should accept anything + * returned by generate_join_implied_equalities(); but because its + * analysis is only approximate, sometimes it doesn't. So we + * currently cannot use this Assert; instead just assume it's okay to + * apply the joinclause at this level. + */ +#ifdef NOT_USED + Assert(join_clause_is_movable_into(rinfo, + joinrel->relids, + join_and_req)); +#endif + if (join_clause_is_movable_into(rinfo, + outer_path->parent->relids, + outer_and_req)) + continue; /* drop if movable into LHS */ + if (join_clause_is_movable_into(rinfo, + inner_path->parent->relids, + inner_and_req)) + { + /* drop if movable into RHS, but remember EC for use below */ + Assert(rinfo->left_ec == rinfo->right_ec); + dropped_ecs = lappend(dropped_ecs, rinfo->left_ec); + continue; + } + pclauses = lappend(pclauses, rinfo); + } + + /* + * EquivalenceClasses are harder to deal with than we could wish, because + * of the fact that a given EC can generate different clauses depending on + * context. Suppose we have an EC {X.X, Y.Y, Z.Z} where X and Y are the + * LHS and RHS of the current join and Z is in required_outer, and further + * suppose that the inner_path is parameterized by both X and Z. The code + * above will have produced either Z.Z = X.X or Z.Z = Y.Y from that EC, + * and in the latter case will have discarded it as being movable into the + * RHS. However, the EC machinery might have produced either Y.Y = X.X or + * Y.Y = Z.Z as the EC enforcement clause within the inner_path; it will + * not have produced both, and we can't readily tell from here which one + * it did pick. If we add no clause to this join, we'll end up with + * insufficient enforcement of the EC; either Z.Z or X.X will fail to be + * constrained to be equal to the other members of the EC. (When we come + * to join Z to this X/Y path, we will certainly drop whichever EC clause + * is generated at that join, so this omission won't get fixed later.) + * + * To handle this, for each EC we discarded such a clause from, try to + * generate a clause connecting the required_outer rels to the join's LHS + * ("Z.Z = X.X" in the terms of the above example). If successful, and if + * the clause can't be moved to the LHS, add it to the current join's + * restriction clauses. (If an EC cannot generate such a clause then it + * has nothing that needs to be enforced here, while if the clause can be + * moved into the LHS then it should have been enforced within that path.) + * + * Note that we don't need similar processing for ECs whose clause was + * considered to be movable into the LHS, because the LHS can't refer to + * the RHS so there is no comparable ambiguity about what it might + * actually be enforcing internally. + */ + if (dropped_ecs) + { + Relids real_outer_and_req; + + real_outer_and_req = bms_union(outer_path->parent->relids, + required_outer); + eclauses = + generate_join_implied_equalities_for_ecs(root, + dropped_ecs, + real_outer_and_req, + required_outer, + outer_path->parent); + foreach(lc, eclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* As above, can't quite assert this here */ +#ifdef NOT_USED + Assert(join_clause_is_movable_into(rinfo, + outer_path->parent->relids, + real_outer_and_req)); +#endif + if (!join_clause_is_movable_into(rinfo, + outer_path->parent->relids, + outer_and_req)) + pclauses = lappend(pclauses, rinfo); + } + } + + /* + * Now, attach the identified moved-down clauses to the caller's + * restrict_clauses list. By using list_concat in this order, we leave + * the original list structure of restrict_clauses undamaged. + */ + *restrict_clauses = list_concat(pclauses, *restrict_clauses); + + /* If we already have a PPI for this parameterization, just return it */ + if ((ppi = find_param_path_info(joinrel, required_outer))) + return ppi; + + /* Estimate the number of rows returned by the parameterized join */ + rows = get_parameterized_joinrel_size(root, joinrel, + outer_path, + inner_path, + sjinfo, + *restrict_clauses); + + /* + * And now we can build the ParamPathInfo. No point in saving the + * input-pair-dependent clause list, though. + * + * Note: in GEQO mode, we'll be called in a temporary memory context, but + * the joinrel structure is there too, so no problem. + */ + ppi = makeNode(ParamPathInfo); + ppi->ppi_req_outer = required_outer; + ppi->ppi_rows = rows; + ppi->ppi_clauses = NIL; + joinrel->ppilist = lappend(joinrel->ppilist, ppi); + + return ppi; +} + +/* + * get_appendrel_parampathinfo + * Get the ParamPathInfo for a parameterized path for an append relation. + * + * For an append relation, the rowcount estimate will just be the sum of + * the estimates for its children. However, we still need a ParamPathInfo + * to flag the fact that the path requires parameters. So this just creates + * a suitable struct with zero ppi_rows (and no ppi_clauses either, since + * the Append node isn't responsible for checking quals). + */ +ParamPathInfo * +get_appendrel_parampathinfo(RelOptInfo *appendrel, Relids required_outer) +{ + ParamPathInfo *ppi; + + /* If rel has LATERAL refs, every path for it should account for them */ + Assert(bms_is_subset(appendrel->lateral_relids, required_outer)); + + /* Unparameterized paths have no ParamPathInfo */ + if (bms_is_empty(required_outer)) + return NULL; + + Assert(!bms_overlap(appendrel->relids, required_outer)); + + /* If we already have a PPI for this parameterization, just return it */ + if ((ppi = find_param_path_info(appendrel, required_outer))) + return ppi; + + /* Else build the ParamPathInfo */ + ppi = makeNode(ParamPathInfo); + ppi->ppi_req_outer = required_outer; + ppi->ppi_rows = 0; + ppi->ppi_clauses = NIL; + appendrel->ppilist = lappend(appendrel->ppilist, ppi); + + return ppi; +} + +/* + * Returns a ParamPathInfo for the parameterization given by required_outer, if + * already available in the given rel. Returns NULL otherwise. + */ +ParamPathInfo * +find_param_path_info(RelOptInfo *rel, Relids required_outer) +{ + ListCell *lc; + + foreach(lc, rel->ppilist) + { + ParamPathInfo *ppi = (ParamPathInfo *) lfirst(lc); + + if (bms_equal(ppi->ppi_req_outer, required_outer)) + return ppi; + } + + return NULL; +} + +/* + * build_joinrel_partition_info + * Checks if the two relations being joined can use partitionwise join + * and if yes, initialize partitioning information of the resulting + * partitioned join relation. + */ +static void +build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, + RelOptInfo *inner_rel, List *restrictlist, + JoinType jointype) +{ + PartitionScheme part_scheme; + + /* Nothing to do if partitionwise join technique is disabled. */ + if (!enable_partitionwise_join) + { + Assert(!IS_PARTITIONED_REL(joinrel)); + return; + } + + /* + * We can only consider this join as an input to further partitionwise + * joins if (a) the input relations are partitioned and have + * consider_partitionwise_join=true, (b) the partition schemes match, and + * (c) we can identify an equi-join between the partition keys. Note that + * if it were possible for have_partkey_equi_join to return different + * answers for the same joinrel depending on which join ordering we try + * first, this logic would break. That shouldn't happen, though, because + * of the way the query planner deduces implied equalities and reorders + * the joins. Please see optimizer/README for details. + */ + if (outer_rel->part_scheme == NULL || inner_rel->part_scheme == NULL || + !outer_rel->consider_partitionwise_join || + !inner_rel->consider_partitionwise_join || + outer_rel->part_scheme != inner_rel->part_scheme || + !have_partkey_equi_join(joinrel, outer_rel, inner_rel, + jointype, restrictlist)) + { + Assert(!IS_PARTITIONED_REL(joinrel)); + return; + } + + part_scheme = outer_rel->part_scheme; + + /* + * This function will be called only once for each joinrel, hence it + * should not have partitioning fields filled yet. + */ + Assert(!joinrel->part_scheme && !joinrel->partexprs && + !joinrel->nullable_partexprs && !joinrel->part_rels && + !joinrel->boundinfo); + + /* + * If the join relation is partitioned, it uses the same partitioning + * scheme as the joining relations. + * + * Note: we calculate the partition bounds, number of partitions, and + * child-join relations of the join relation in try_partitionwise_join(). + */ + joinrel->part_scheme = part_scheme; + set_joinrel_partition_key_exprs(joinrel, outer_rel, inner_rel, jointype); + + /* + * Set the consider_partitionwise_join flag. + */ + Assert(outer_rel->consider_partitionwise_join); + Assert(inner_rel->consider_partitionwise_join); + joinrel->consider_partitionwise_join = true; +} + +/* + * have_partkey_equi_join + * + * Returns true if there exist equi-join conditions involving pairs + * of matching partition keys of the relations being joined for all + * partition keys. + */ +static bool +have_partkey_equi_join(RelOptInfo *joinrel, + RelOptInfo *rel1, RelOptInfo *rel2, + JoinType jointype, List *restrictlist) +{ + PartitionScheme part_scheme = rel1->part_scheme; + ListCell *lc; + int cnt_pks; + bool pk_has_clause[PARTITION_MAX_KEYS]; + bool strict_op; + + /* + * This function must only be called when the joined relations have same + * partitioning scheme. + */ + Assert(rel1->part_scheme == rel2->part_scheme); + Assert(part_scheme); + + memset(pk_has_clause, 0, sizeof(pk_has_clause)); + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + OpExpr *opexpr; + Expr *expr1; + Expr *expr2; + int ipk1; + int ipk2; + + /* If processing an outer join, only use its own join clauses. */ + if (IS_OUTER_JOIN(jointype) && + RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) + continue; + + /* Skip clauses which can not be used for a join. */ + if (!rinfo->can_join) + continue; + + /* Skip clauses which are not equality conditions. */ + if (!rinfo->mergeopfamilies && !OidIsValid(rinfo->hashjoinoperator)) + continue; + + /* Should be OK to assume it's an OpExpr. */ + opexpr = castNode(OpExpr, rinfo->clause); + + /* Match the operands to the relation. */ + if (bms_is_subset(rinfo->left_relids, rel1->relids) && + bms_is_subset(rinfo->right_relids, rel2->relids)) + { + expr1 = linitial(opexpr->args); + expr2 = lsecond(opexpr->args); + } + else if (bms_is_subset(rinfo->left_relids, rel2->relids) && + bms_is_subset(rinfo->right_relids, rel1->relids)) + { + expr1 = lsecond(opexpr->args); + expr2 = linitial(opexpr->args); + } + else + continue; + + /* + * Now we need to know whether the join operator is strict; see + * comments in pathnodes.h. + */ + strict_op = op_strict(opexpr->opno); + + /* + * Only clauses referencing the partition keys are useful for + * partitionwise join. + */ + ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op); + if (ipk1 < 0) + continue; + ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op); + if (ipk2 < 0) + continue; + + /* + * If the clause refers to keys at different ordinal positions, it can + * not be used for partitionwise join. + */ + if (ipk1 != ipk2) + continue; + + /* + * The clause allows partitionwise join only if it uses the same + * operator family as that specified by the partition key. + */ + if (rel1->part_scheme->strategy == PARTITION_STRATEGY_HASH) + { + if (!OidIsValid(rinfo->hashjoinoperator) || + !op_in_opfamily(rinfo->hashjoinoperator, + part_scheme->partopfamily[ipk1])) + continue; + } + else if (!list_member_oid(rinfo->mergeopfamilies, + part_scheme->partopfamily[ipk1])) + continue; + + /* Mark the partition key as having an equi-join clause. */ + pk_has_clause[ipk1] = true; + } + + /* Check whether every partition key has an equi-join condition. */ + for (cnt_pks = 0; cnt_pks < part_scheme->partnatts; cnt_pks++) + { + if (!pk_has_clause[cnt_pks]) + return false; + } + + return true; +} + +/* + * match_expr_to_partition_keys + * + * Tries to match an expression to one of the nullable or non-nullable + * partition keys of "rel". Returns the matched key's ordinal position, + * or -1 if the expression could not be matched to any of the keys. + * + * strict_op must be true if the expression will be compared with the + * partition key using a strict operator. This allows us to consider + * nullable as well as nonnullable partition keys. + */ +static int +match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op) +{ + int cnt; + + /* This function should be called only for partitioned relations. */ + Assert(rel->part_scheme); + Assert(rel->partexprs); + Assert(rel->nullable_partexprs); + + /* Remove any relabel decorations. */ + while (IsA(expr, RelabelType)) + expr = (Expr *) (castNode(RelabelType, expr))->arg; + + for (cnt = 0; cnt < rel->part_scheme->partnatts; cnt++) + { + ListCell *lc; + + /* We can always match to the non-nullable partition keys. */ + foreach(lc, rel->partexprs[cnt]) + { + if (equal(lfirst(lc), expr)) + return cnt; + } + + if (!strict_op) + continue; + + /* + * If it's a strict join operator then a NULL partition key on one + * side will not join to any partition key on the other side, and in + * particular such a row can't join to a row from a different + * partition on the other side. So, it's okay to search the nullable + * partition keys as well. + */ + foreach(lc, rel->nullable_partexprs[cnt]) + { + if (equal(lfirst(lc), expr)) + return cnt; + } + } + + return -1; +} + +/* + * set_joinrel_partition_key_exprs + * Initialize partition key expressions for a partitioned joinrel. + */ +static void +set_joinrel_partition_key_exprs(RelOptInfo *joinrel, + RelOptInfo *outer_rel, RelOptInfo *inner_rel, + JoinType jointype) +{ + PartitionScheme part_scheme = joinrel->part_scheme; + int partnatts = part_scheme->partnatts; + + joinrel->partexprs = (List **) palloc0(sizeof(List *) * partnatts); + joinrel->nullable_partexprs = + (List **) palloc0(sizeof(List *) * partnatts); + + /* + * The joinrel's partition expressions are the same as those of the input + * rels, but we must properly classify them as nullable or not in the + * joinrel's output. (Also, we add some more partition expressions if + * it's a FULL JOIN.) + */ + for (int cnt = 0; cnt < partnatts; cnt++) + { + /* mark these const to enforce that we copy them properly */ + const List *outer_expr = outer_rel->partexprs[cnt]; + const List *outer_null_expr = outer_rel->nullable_partexprs[cnt]; + const List *inner_expr = inner_rel->partexprs[cnt]; + const List *inner_null_expr = inner_rel->nullable_partexprs[cnt]; + List *partexpr = NIL; + List *nullable_partexpr = NIL; + ListCell *lc; + + switch (jointype) + { + /* + * A join relation resulting from an INNER join may be + * regarded as partitioned by either of the inner and outer + * relation keys. For example, A INNER JOIN B ON A.a = B.b + * can be regarded as partitioned on either A.a or B.b. So we + * add both keys to the joinrel's partexpr lists. However, + * anything that was already nullable still has to be treated + * as nullable. + */ + case JOIN_INNER: + partexpr = list_concat_copy(outer_expr, inner_expr); + nullable_partexpr = list_concat_copy(outer_null_expr, + inner_null_expr); + break; + + /* + * A join relation resulting from a SEMI or ANTI join may be + * regarded as partitioned by the outer relation keys. The + * inner relation's keys are no longer interesting; since they + * aren't visible in the join output, nothing could join to + * them. + */ + case JOIN_SEMI: + case JOIN_ANTI: + partexpr = list_copy(outer_expr); + nullable_partexpr = list_copy(outer_null_expr); + break; + + /* + * A join relation resulting from a LEFT OUTER JOIN likewise + * may be regarded as partitioned on the (non-nullable) outer + * relation keys. The inner (nullable) relation keys are okay + * as partition keys for further joins as long as they involve + * strict join operators. + */ + case JOIN_LEFT: + partexpr = list_copy(outer_expr); + nullable_partexpr = list_concat_copy(inner_expr, + outer_null_expr); + nullable_partexpr = list_concat(nullable_partexpr, + inner_null_expr); + break; + + /* + * For FULL OUTER JOINs, both relations are nullable, so the + * resulting join relation may be regarded as partitioned on + * either of inner and outer relation keys, but only for joins + * that involve strict join operators. + */ + case JOIN_FULL: + nullable_partexpr = list_concat_copy(outer_expr, + inner_expr); + nullable_partexpr = list_concat(nullable_partexpr, + outer_null_expr); + nullable_partexpr = list_concat(nullable_partexpr, + inner_null_expr); + + /* + * Also add CoalesceExprs corresponding to each possible + * full-join output variable (that is, left side coalesced to + * right side), so that we can match equijoin expressions + * using those variables. We really only need these for + * columns merged by JOIN USING, and only with the pairs of + * input items that correspond to the data structures that + * parse analysis would build for such variables. But it's + * hard to tell which those are, so just make all the pairs. + * Extra items in the nullable_partexprs list won't cause big + * problems. (It's possible that such items will get matched + * to user-written COALESCEs, but it should still be valid to + * partition on those, since they're going to be either the + * partition column or NULL; it's the same argument as for + * partitionwise nesting of any outer join.) We assume no + * type coercions are needed to make the coalesce expressions, + * since columns of different types won't have gotten + * classified as the same PartitionScheme. + */ + foreach(lc, list_concat_copy(outer_expr, outer_null_expr)) + { + Node *larg = (Node *) lfirst(lc); + ListCell *lc2; + + foreach(lc2, list_concat_copy(inner_expr, inner_null_expr)) + { + Node *rarg = (Node *) lfirst(lc2); + CoalesceExpr *c = makeNode(CoalesceExpr); + + c->coalescetype = exprType(larg); + c->coalescecollid = exprCollation(larg); + c->args = list_make2(larg, rarg); + c->location = -1; + nullable_partexpr = lappend(nullable_partexpr, c); + } + } + break; + + default: + elog(ERROR, "unrecognized join type: %d", (int) jointype); + } + + joinrel->partexprs[cnt] = partexpr; + joinrel->nullable_partexprs[cnt] = nullable_partexpr; + } +} + +/* + * build_child_join_reltarget + * Set up a child-join relation's reltarget from a parent-join relation. + */ +static void +build_child_join_reltarget(PlannerInfo *root, + RelOptInfo *parentrel, + RelOptInfo *childrel, + int nappinfos, + AppendRelInfo **appinfos) +{ + /* Build the targetlist */ + childrel->reltarget->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) parentrel->reltarget->exprs, + nappinfos, appinfos); + + /* Set the cost and width fields */ + childrel->reltarget->cost.startup = parentrel->reltarget->cost.startup; + childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple; + childrel->reltarget->width = parentrel->reltarget->width; +} diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c new file mode 100644 index 0000000..e7764f4 --- /dev/null +++ b/src/backend/optimizer/util/restrictinfo.c @@ -0,0 +1,655 @@ +/*------------------------------------------------------------------------- + * + * restrictinfo.c + * RestrictInfo node manipulation routines. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/restrictinfo.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/optimizer.h" +#include "optimizer/restrictinfo.h" + + +static RestrictInfo *make_restrictinfo_internal(PlannerInfo *root, + Expr *clause, + Expr *orclause, + bool is_pushed_down, + bool outerjoin_delayed, + bool pseudoconstant, + Index security_level, + Relids required_relids, + Relids outer_relids, + Relids nullable_relids); +static Expr *make_sub_restrictinfos(PlannerInfo *root, + Expr *clause, + bool is_pushed_down, + bool outerjoin_delayed, + bool pseudoconstant, + Index security_level, + Relids required_relids, + Relids outer_relids, + Relids nullable_relids); + + +/* + * make_restrictinfo + * + * Build a RestrictInfo node containing the given subexpression. + * + * The is_pushed_down, outerjoin_delayed, and pseudoconstant flags for the + * RestrictInfo must be supplied by the caller, as well as the correct values + * for security_level, outer_relids, and nullable_relids. + * required_relids can be NULL, in which case it defaults to the actual clause + * contents (i.e., clause_relids). + * + * We initialize fields that depend only on the given subexpression, leaving + * others that depend on context (or may never be needed at all) to be filled + * later. + */ +RestrictInfo * +make_restrictinfo(PlannerInfo *root, + Expr *clause, + bool is_pushed_down, + bool outerjoin_delayed, + bool pseudoconstant, + Index security_level, + Relids required_relids, + Relids outer_relids, + Relids nullable_relids) +{ + /* + * If it's an OR clause, build a modified copy with RestrictInfos inserted + * above each subclause of the top-level AND/OR structure. + */ + if (is_orclause(clause)) + return (RestrictInfo *) make_sub_restrictinfos(root, + clause, + is_pushed_down, + outerjoin_delayed, + pseudoconstant, + security_level, + required_relids, + outer_relids, + nullable_relids); + + /* Shouldn't be an AND clause, else AND/OR flattening messed up */ + Assert(!is_andclause(clause)); + + return make_restrictinfo_internal(root, + clause, + NULL, + is_pushed_down, + outerjoin_delayed, + pseudoconstant, + security_level, + required_relids, + outer_relids, + nullable_relids); +} + +/* + * make_restrictinfo_internal + * + * Common code for the main entry points and the recursive cases. + */ +static RestrictInfo * +make_restrictinfo_internal(PlannerInfo *root, + Expr *clause, + Expr *orclause, + bool is_pushed_down, + bool outerjoin_delayed, + bool pseudoconstant, + Index security_level, + Relids required_relids, + Relids outer_relids, + Relids nullable_relids) +{ + RestrictInfo *restrictinfo = makeNode(RestrictInfo); + + restrictinfo->clause = clause; + restrictinfo->orclause = orclause; + restrictinfo->is_pushed_down = is_pushed_down; + restrictinfo->outerjoin_delayed = outerjoin_delayed; + restrictinfo->pseudoconstant = pseudoconstant; + restrictinfo->can_join = false; /* may get set below */ + restrictinfo->security_level = security_level; + restrictinfo->outer_relids = outer_relids; + restrictinfo->nullable_relids = nullable_relids; + + /* + * If it's potentially delayable by lower-level security quals, figure out + * whether it's leakproof. We can skip testing this for level-zero quals, + * since they would never get delayed on security grounds anyway. + */ + if (security_level > 0) + restrictinfo->leakproof = !contain_leaked_vars((Node *) clause); + else + restrictinfo->leakproof = false; /* really, "don't know" */ + + /* + * Mark volatility as unknown. The contain_volatile_functions function + * will determine if there are any volatile functions when called for the + * first time with this RestrictInfo. + */ + restrictinfo->has_volatile = VOLATILITY_UNKNOWN; + + /* + * If it's a binary opclause, set up left/right relids info. In any case + * set up the total clause relids info. + */ + if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2) + { + restrictinfo->left_relids = pull_varnos(root, get_leftop(clause)); + restrictinfo->right_relids = pull_varnos(root, get_rightop(clause)); + + restrictinfo->clause_relids = bms_union(restrictinfo->left_relids, + restrictinfo->right_relids); + + /* + * Does it look like a normal join clause, i.e., a binary operator + * relating expressions that come from distinct relations? If so we + * might be able to use it in a join algorithm. Note that this is a + * purely syntactic test that is made regardless of context. + */ + if (!bms_is_empty(restrictinfo->left_relids) && + !bms_is_empty(restrictinfo->right_relids) && + !bms_overlap(restrictinfo->left_relids, + restrictinfo->right_relids)) + { + restrictinfo->can_join = true; + /* pseudoconstant should certainly not be true */ + Assert(!restrictinfo->pseudoconstant); + } + } + else + { + /* Not a binary opclause, so mark left/right relid sets as empty */ + restrictinfo->left_relids = NULL; + restrictinfo->right_relids = NULL; + /* and get the total relid set the hard way */ + restrictinfo->clause_relids = pull_varnos(root, (Node *) clause); + } + + /* required_relids defaults to clause_relids */ + if (required_relids != NULL) + restrictinfo->required_relids = required_relids; + else + restrictinfo->required_relids = restrictinfo->clause_relids; + + /* + * Fill in all the cacheable fields with "not yet set" markers. None of + * these will be computed until/unless needed. Note in particular that we + * don't mark a binary opclause as mergejoinable or hashjoinable here; + * that happens only if it appears in the right context (top level of a + * joinclause list). + */ + restrictinfo->parent_ec = NULL; + + restrictinfo->eval_cost.startup = -1; + restrictinfo->norm_selec = -1; + restrictinfo->outer_selec = -1; + + restrictinfo->mergeopfamilies = NIL; + + restrictinfo->left_ec = NULL; + restrictinfo->right_ec = NULL; + restrictinfo->left_em = NULL; + restrictinfo->right_em = NULL; + restrictinfo->scansel_cache = NIL; + + restrictinfo->outer_is_left = false; + + restrictinfo->hashjoinoperator = InvalidOid; + + restrictinfo->left_bucketsize = -1; + restrictinfo->right_bucketsize = -1; + restrictinfo->left_mcvfreq = -1; + restrictinfo->right_mcvfreq = -1; + + restrictinfo->left_hasheqoperator = InvalidOid; + restrictinfo->right_hasheqoperator = InvalidOid; + + return restrictinfo; +} + +/* + * Recursively insert sub-RestrictInfo nodes into a boolean expression. + * + * We put RestrictInfos above simple (non-AND/OR) clauses and above + * sub-OR clauses, but not above sub-AND clauses, because there's no need. + * This may seem odd but it is closely related to the fact that we use + * implicit-AND lists at top level of RestrictInfo lists. Only ORs and + * simple clauses are valid RestrictInfos. + * + * The same is_pushed_down, outerjoin_delayed, and pseudoconstant flag + * values can be applied to all RestrictInfo nodes in the result. Likewise + * for security_level, outer_relids, and nullable_relids. + * + * The given required_relids are attached to our top-level output, + * but any OR-clause constituents are allowed to default to just the + * contained rels. + */ +static Expr * +make_sub_restrictinfos(PlannerInfo *root, + Expr *clause, + bool is_pushed_down, + bool outerjoin_delayed, + bool pseudoconstant, + Index security_level, + Relids required_relids, + Relids outer_relids, + Relids nullable_relids) +{ + if (is_orclause(clause)) + { + List *orlist = NIL; + ListCell *temp; + + foreach(temp, ((BoolExpr *) clause)->args) + orlist = lappend(orlist, + make_sub_restrictinfos(root, + lfirst(temp), + is_pushed_down, + outerjoin_delayed, + pseudoconstant, + security_level, + NULL, + outer_relids, + nullable_relids)); + return (Expr *) make_restrictinfo_internal(root, + clause, + make_orclause(orlist), + is_pushed_down, + outerjoin_delayed, + pseudoconstant, + security_level, + required_relids, + outer_relids, + nullable_relids); + } + else if (is_andclause(clause)) + { + List *andlist = NIL; + ListCell *temp; + + foreach(temp, ((BoolExpr *) clause)->args) + andlist = lappend(andlist, + make_sub_restrictinfos(root, + lfirst(temp), + is_pushed_down, + outerjoin_delayed, + pseudoconstant, + security_level, + required_relids, + outer_relids, + nullable_relids)); + return make_andclause(andlist); + } + else + return (Expr *) make_restrictinfo_internal(root, + clause, + NULL, + is_pushed_down, + outerjoin_delayed, + pseudoconstant, + security_level, + required_relids, + outer_relids, + nullable_relids); +} + +/* + * commute_restrictinfo + * + * Given a RestrictInfo containing a binary opclause, produce a RestrictInfo + * representing the commutation of that clause. The caller must pass the + * OID of the commutator operator (which it's presumably looked up, else + * it would not know this is valid). + * + * Beware that the result shares sub-structure with the given RestrictInfo. + * That's okay for the intended usage with derived index quals, but might + * be hazardous if the source is subject to change. Also notice that we + * assume without checking that the commutator op is a member of the same + * btree and hash opclasses as the original op. + */ +RestrictInfo * +commute_restrictinfo(RestrictInfo *rinfo, Oid comm_op) +{ + RestrictInfo *result; + OpExpr *newclause; + OpExpr *clause = castNode(OpExpr, rinfo->clause); + + Assert(list_length(clause->args) == 2); + + /* flat-copy all the fields of clause ... */ + newclause = makeNode(OpExpr); + memcpy(newclause, clause, sizeof(OpExpr)); + + /* ... and adjust those we need to change to commute it */ + newclause->opno = comm_op; + newclause->opfuncid = InvalidOid; + newclause->args = list_make2(lsecond(clause->args), + linitial(clause->args)); + + /* likewise, flat-copy all the fields of rinfo ... */ + result = makeNode(RestrictInfo); + memcpy(result, rinfo, sizeof(RestrictInfo)); + + /* + * ... and adjust those we need to change. Note in particular that we can + * preserve any cached selectivity or cost estimates, since those ought to + * be the same for the new clause. Likewise we can keep the source's + * parent_ec. + */ + result->clause = (Expr *) newclause; + result->left_relids = rinfo->right_relids; + result->right_relids = rinfo->left_relids; + Assert(result->orclause == NULL); + result->left_ec = rinfo->right_ec; + result->right_ec = rinfo->left_ec; + result->left_em = rinfo->right_em; + result->right_em = rinfo->left_em; + result->scansel_cache = NIL; /* not worth updating this */ + if (rinfo->hashjoinoperator == clause->opno) + result->hashjoinoperator = comm_op; + else + result->hashjoinoperator = InvalidOid; + result->left_bucketsize = rinfo->right_bucketsize; + result->right_bucketsize = rinfo->left_bucketsize; + result->left_mcvfreq = rinfo->right_mcvfreq; + result->right_mcvfreq = rinfo->left_mcvfreq; + result->left_hasheqoperator = InvalidOid; + result->right_hasheqoperator = InvalidOid; + + return result; +} + +/* + * restriction_is_or_clause + * + * Returns t iff the restrictinfo node contains an 'or' clause. + */ +bool +restriction_is_or_clause(RestrictInfo *restrictinfo) +{ + if (restrictinfo->orclause != NULL) + return true; + else + return false; +} + +/* + * restriction_is_securely_promotable + * + * Returns true if it's okay to evaluate this clause "early", that is before + * other restriction clauses attached to the specified relation. + */ +bool +restriction_is_securely_promotable(RestrictInfo *restrictinfo, + RelOptInfo *rel) +{ + /* + * It's okay if there are no baserestrictinfo clauses for the rel that + * would need to go before this one, *or* if this one is leakproof. + */ + if (restrictinfo->security_level <= rel->baserestrict_min_security || + restrictinfo->leakproof) + return true; + else + return false; +} + +/* + * get_actual_clauses + * + * Returns a list containing the bare clauses from 'restrictinfo_list'. + * + * This is only to be used in cases where none of the RestrictInfos can + * be pseudoconstant clauses (for instance, it's OK on indexqual lists). + */ +List * +get_actual_clauses(List *restrictinfo_list) +{ + List *result = NIL; + ListCell *l; + + foreach(l, restrictinfo_list) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + Assert(!rinfo->pseudoconstant); + + result = lappend(result, rinfo->clause); + } + return result; +} + +/* + * extract_actual_clauses + * + * Extract bare clauses from 'restrictinfo_list', returning either the + * regular ones or the pseudoconstant ones per 'pseudoconstant'. + */ +List * +extract_actual_clauses(List *restrictinfo_list, + bool pseudoconstant) +{ + List *result = NIL; + ListCell *l; + + foreach(l, restrictinfo_list) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (rinfo->pseudoconstant == pseudoconstant) + result = lappend(result, rinfo->clause); + } + return result; +} + +/* + * extract_actual_join_clauses + * + * Extract bare clauses from 'restrictinfo_list', separating those that + * semantically match the join level from those that were pushed down. + * Pseudoconstant clauses are excluded from the results. + * + * This is only used at outer joins, since for plain joins we don't care + * about pushed-down-ness. + */ +void +extract_actual_join_clauses(List *restrictinfo_list, + Relids joinrelids, + List **joinquals, + List **otherquals) +{ + ListCell *l; + + *joinquals = NIL; + *otherquals = NIL; + + foreach(l, restrictinfo_list) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) + { + if (!rinfo->pseudoconstant) + *otherquals = lappend(*otherquals, rinfo->clause); + } + else + { + /* joinquals shouldn't have been marked pseudoconstant */ + Assert(!rinfo->pseudoconstant); + *joinquals = lappend(*joinquals, rinfo->clause); + } + } +} + +/* + * has_pseudoconstant_clauses + * + * Returns true if 'restrictinfo_list' includes pseudoconstant clauses. + * + * This is used when we determine whether to allow extensions to consider + * pushing down joins in add_paths_to_joinrel(). + */ +bool +has_pseudoconstant_clauses(PlannerInfo *root, + List *restrictinfo_list) +{ + ListCell *l; + + /* No need to look if we know there are no pseudoconstants */ + if (!root->hasPseudoConstantQuals) + return false; + + /* See if there are pseudoconstants in the RestrictInfo list */ + foreach(l, restrictinfo_list) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (rinfo->pseudoconstant) + return true; + } + return false; +} + + +/* + * join_clause_is_movable_to + * Test whether a join clause is a safe candidate for parameterization + * of a scan on the specified base relation. + * + * A movable join clause is one that can safely be evaluated at a rel below + * its normal semantic level (ie, its required_relids), if the values of + * variables that it would need from other rels are provided. + * + * We insist that the clause actually reference the target relation; this + * prevents undesirable movement of degenerate join clauses, and ensures + * that there is a unique place that a clause can be moved down to. + * + * We cannot move an outer-join clause into the non-nullable side of its + * outer join, as that would change the results (rows would be suppressed + * rather than being null-extended). + * + * Also there must not be an outer join below the clause that would null the + * Vars coming from the target relation. Otherwise the clause might give + * results different from what it would give at its normal semantic level. + * + * Also, the join clause must not use any relations that have LATERAL + * references to the target relation, since we could not put such rels on + * the outer side of a nestloop with the target relation. + */ +bool +join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel) +{ + /* Clause must physically reference target rel */ + if (!bms_is_member(baserel->relid, rinfo->clause_relids)) + return false; + + /* Cannot move an outer-join clause into the join's outer side */ + if (bms_is_member(baserel->relid, rinfo->outer_relids)) + return false; + + /* Target rel must not be nullable below the clause */ + if (bms_is_member(baserel->relid, rinfo->nullable_relids)) + return false; + + /* Clause must not use any rels with LATERAL references to this rel */ + if (bms_overlap(baserel->lateral_referencers, rinfo->clause_relids)) + return false; + + return true; +} + +/* + * join_clause_is_movable_into + * Test whether a join clause is movable and can be evaluated within + * the current join context. + * + * currentrelids: the relids of the proposed evaluation location + * current_and_outer: the union of currentrelids and the required_outer + * relids (parameterization's outer relations) + * + * The API would be a bit clearer if we passed the current relids and the + * outer relids separately and did bms_union internally; but since most + * callers need to apply this function to multiple clauses, we make the + * caller perform the union. + * + * Obviously, the clause must only refer to Vars available from the current + * relation plus the outer rels. We also check that it does reference at + * least one current Var, ensuring that the clause will be pushed down to + * a unique place in a parameterized join tree. And we check that we're + * not pushing the clause into its outer-join outer side, nor down into + * a lower outer join's inner side. + * + * The check about pushing a clause down into a lower outer join's inner side + * is only approximate; it sometimes returns "false" when actually it would + * be safe to use the clause here because we're still above the outer join + * in question. This is okay as long as the answers at different join levels + * are consistent: it just means we might sometimes fail to push a clause as + * far down as it could safely be pushed. It's unclear whether it would be + * worthwhile to do this more precisely. (But if it's ever fixed to be + * exactly accurate, there's an Assert in get_joinrel_parampathinfo() that + * should be re-enabled.) + * + * There's no check here equivalent to join_clause_is_movable_to's test on + * lateral_referencers. We assume the caller wouldn't be inquiring unless + * it'd verified that the proposed outer rels don't have lateral references + * to the current rel(s). (If we are considering join paths with the outer + * rels on the outside and the current rels on the inside, then this should + * have been checked at the outset of such consideration; see join_is_legal + * and the path parameterization checks in joinpath.c.) On the other hand, + * in join_clause_is_movable_to we are asking whether the clause could be + * moved for some valid set of outer rels, so we don't have the benefit of + * relying on prior checks for lateral-reference validity. + * + * Note: if this returns true, it means that the clause could be moved to + * this join relation, but that doesn't mean that this is the lowest join + * it could be moved to. Caller may need to make additional calls to verify + * that this doesn't succeed on either of the inputs of a proposed join. + * + * Note: get_joinrel_parampathinfo depends on the fact that if + * current_and_outer is NULL, this function will always return false + * (since one or the other of the first two tests must fail). + */ +bool +join_clause_is_movable_into(RestrictInfo *rinfo, + Relids currentrelids, + Relids current_and_outer) +{ + /* Clause must be evaluable given available context */ + if (!bms_is_subset(rinfo->clause_relids, current_and_outer)) + return false; + + /* Clause must physically reference at least one target rel */ + if (!bms_overlap(currentrelids, rinfo->clause_relids)) + return false; + + /* Cannot move an outer-join clause into the join's outer side */ + if (bms_overlap(currentrelids, rinfo->outer_relids)) + return false; + + /* + * Target rel(s) must not be nullable below the clause. This is + * approximate, in the safe direction, because the current join might be + * above the join where the nulling would happen, in which case the clause + * would work correctly here. But we don't have enough info to be sure. + */ + if (bms_overlap(currentrelids, rinfo->nullable_relids)) + return false; + + return true; +} diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c new file mode 100644 index 0000000..eed3e3f --- /dev/null +++ b/src/backend/optimizer/util/tlist.c @@ -0,0 +1,1258 @@ +/*------------------------------------------------------------------------- + * + * tlist.c + * Target list manipulation routines + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/tlist.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" +#include "optimizer/tlist.h" + + +/* + * Test if an expression node represents a SRF call. Beware multiple eval! + * + * Please note that this is only meant for use in split_pathtarget_at_srfs(); + * if you use it anywhere else, your code is almost certainly wrong for SRFs + * nested within expressions. Use expression_returns_set() instead. + */ +#define IS_SRF_CALL(node) \ + ((IsA(node, FuncExpr) && ((FuncExpr *) (node))->funcretset) || \ + (IsA(node, OpExpr) && ((OpExpr *) (node))->opretset)) + +/* + * Data structures for split_pathtarget_at_srfs(). To preserve the identity + * of sortgroupref items even if they are textually equal(), what we track is + * not just bare expressions but expressions plus their sortgroupref indexes. + */ +typedef struct +{ + Node *expr; /* some subexpression of a PathTarget */ + Index sortgroupref; /* its sortgroupref, or 0 if none */ +} split_pathtarget_item; + +typedef struct +{ + /* This is a List of bare expressions: */ + List *input_target_exprs; /* exprs available from input */ + /* These are Lists of Lists of split_pathtarget_items: */ + List *level_srfs; /* SRF exprs to evaluate at each level */ + List *level_input_vars; /* input vars needed at each level */ + List *level_input_srfs; /* input SRFs needed at each level */ + /* These are Lists of split_pathtarget_items: */ + List *current_input_vars; /* vars needed in current subexpr */ + List *current_input_srfs; /* SRFs needed in current subexpr */ + /* Auxiliary data for current split_pathtarget_walker traversal: */ + int current_depth; /* max SRF depth in current subexpr */ + Index current_sgref; /* current subexpr's sortgroupref, or 0 */ +} split_pathtarget_context; + +static bool split_pathtarget_walker(Node *node, + split_pathtarget_context *context); +static void add_sp_item_to_pathtarget(PathTarget *target, + split_pathtarget_item *item); +static void add_sp_items_to_pathtarget(PathTarget *target, List *items); + + +/***************************************************************************** + * Target list creation and searching utilities + *****************************************************************************/ + +/* + * tlist_member + * Finds the (first) member of the given tlist whose expression is + * equal() to the given expression. Result is NULL if no such member. + */ +TargetEntry * +tlist_member(Expr *node, List *targetlist) +{ + ListCell *temp; + + foreach(temp, targetlist) + { + TargetEntry *tlentry = (TargetEntry *) lfirst(temp); + + if (equal(node, tlentry->expr)) + return tlentry; + } + return NULL; +} + +/* + * tlist_member_match_var + * Same as above, except that we match the provided Var on the basis + * of varno/varattno/varlevelsup/vartype only, rather than full equal(). + * + * This is needed in some cases where we can't be sure of an exact typmod + * match. For safety, though, we insist on vartype match. + */ +static TargetEntry * +tlist_member_match_var(Var *var, List *targetlist) +{ + ListCell *temp; + + foreach(temp, targetlist) + { + TargetEntry *tlentry = (TargetEntry *) lfirst(temp); + Var *tlvar = (Var *) tlentry->expr; + + if (!tlvar || !IsA(tlvar, Var)) + continue; + if (var->varno == tlvar->varno && + var->varattno == tlvar->varattno && + var->varlevelsup == tlvar->varlevelsup && + var->vartype == tlvar->vartype) + return tlentry; + } + return NULL; +} + +/* + * add_to_flat_tlist + * Add more items to a flattened tlist (if they're not already in it) + * + * 'tlist' is the flattened tlist + * 'exprs' is a list of expressions (usually, but not necessarily, Vars) + * + * Returns the extended tlist. + */ +List * +add_to_flat_tlist(List *tlist, List *exprs) +{ + int next_resno = list_length(tlist) + 1; + ListCell *lc; + + foreach(lc, exprs) + { + Expr *expr = (Expr *) lfirst(lc); + + if (!tlist_member(expr, tlist)) + { + TargetEntry *tle; + + tle = makeTargetEntry(copyObject(expr), /* copy needed?? */ + next_resno++, + NULL, + false); + tlist = lappend(tlist, tle); + } + } + return tlist; +} + + +/* + * get_tlist_exprs + * Get just the expression subtrees of a tlist + * + * Resjunk columns are ignored unless includeJunk is true + */ +List * +get_tlist_exprs(List *tlist, bool includeJunk) +{ + List *result = NIL; + ListCell *l; + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk && !includeJunk) + continue; + + result = lappend(result, tle->expr); + } + return result; +} + + +/* + * count_nonjunk_tlist_entries + * What it says ... + */ +int +count_nonjunk_tlist_entries(List *tlist) +{ + int len = 0; + ListCell *l; + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (!tle->resjunk) + len++; + } + return len; +} + + +/* + * tlist_same_exprs + * Check whether two target lists contain the same expressions + * + * Note: this function is used to decide whether it's safe to jam a new tlist + * into a non-projection-capable plan node. Obviously we can't do that unless + * the node's tlist shows it already returns the column values we want. + * However, we can ignore the TargetEntry attributes resname, ressortgroupref, + * resorigtbl, resorigcol, and resjunk, because those are only labelings that + * don't affect the row values computed by the node. (Moreover, if we didn't + * ignore them, we'd frequently fail to make the desired optimization, since + * the planner tends to not bother to make resname etc. valid in intermediate + * plan nodes.) Note that on success, the caller must still jam the desired + * tlist into the plan node, else it won't have the desired labeling fields. + */ +bool +tlist_same_exprs(List *tlist1, List *tlist2) +{ + ListCell *lc1, + *lc2; + + if (list_length(tlist1) != list_length(tlist2)) + return false; /* not same length, so can't match */ + + forboth(lc1, tlist1, lc2, tlist2) + { + TargetEntry *tle1 = (TargetEntry *) lfirst(lc1); + TargetEntry *tle2 = (TargetEntry *) lfirst(lc2); + + if (!equal(tle1->expr, tle2->expr)) + return false; + } + + return true; +} + + +/* + * Does tlist have same output datatypes as listed in colTypes? + * + * Resjunk columns are ignored if junkOK is true; otherwise presence of + * a resjunk column will always cause a 'false' result. + * + * Note: currently no callers care about comparing typmods. + */ +bool +tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK) +{ + ListCell *l; + ListCell *curColType = list_head(colTypes); + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk) + { + if (!junkOK) + return false; + } + else + { + if (curColType == NULL) + return false; /* tlist longer than colTypes */ + if (exprType((Node *) tle->expr) != lfirst_oid(curColType)) + return false; + curColType = lnext(colTypes, curColType); + } + } + if (curColType != NULL) + return false; /* tlist shorter than colTypes */ + return true; +} + +/* + * Does tlist have same exposed collations as listed in colCollations? + * + * Identical logic to the above, but for collations. + */ +bool +tlist_same_collations(List *tlist, List *colCollations, bool junkOK) +{ + ListCell *l; + ListCell *curColColl = list_head(colCollations); + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk) + { + if (!junkOK) + return false; + } + else + { + if (curColColl == NULL) + return false; /* tlist longer than colCollations */ + if (exprCollation((Node *) tle->expr) != lfirst_oid(curColColl)) + return false; + curColColl = lnext(colCollations, curColColl); + } + } + if (curColColl != NULL) + return false; /* tlist shorter than colCollations */ + return true; +} + +/* + * apply_tlist_labeling + * Apply the TargetEntry labeling attributes of src_tlist to dest_tlist + * + * This is useful for reattaching column names etc to a plan's final output + * targetlist. + */ +void +apply_tlist_labeling(List *dest_tlist, List *src_tlist) +{ + ListCell *ld, + *ls; + + Assert(list_length(dest_tlist) == list_length(src_tlist)); + forboth(ld, dest_tlist, ls, src_tlist) + { + TargetEntry *dest_tle = (TargetEntry *) lfirst(ld); + TargetEntry *src_tle = (TargetEntry *) lfirst(ls); + + Assert(dest_tle->resno == src_tle->resno); + dest_tle->resname = src_tle->resname; + dest_tle->ressortgroupref = src_tle->ressortgroupref; + dest_tle->resorigtbl = src_tle->resorigtbl; + dest_tle->resorigcol = src_tle->resorigcol; + dest_tle->resjunk = src_tle->resjunk; + } +} + + +/* + * get_sortgroupref_tle + * Find the targetlist entry matching the given SortGroupRef index, + * and return it. + */ +TargetEntry * +get_sortgroupref_tle(Index sortref, List *targetList) +{ + ListCell *l; + + foreach(l, targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->ressortgroupref == sortref) + return tle; + } + + elog(ERROR, "ORDER/GROUP BY expression not found in targetlist"); + return NULL; /* keep compiler quiet */ +} + +/* + * get_sortgroupclause_tle + * Find the targetlist entry matching the given SortGroupClause + * by ressortgroupref, and return it. + */ +TargetEntry * +get_sortgroupclause_tle(SortGroupClause *sgClause, + List *targetList) +{ + return get_sortgroupref_tle(sgClause->tleSortGroupRef, targetList); +} + +/* + * get_sortgroupclause_expr + * Find the targetlist entry matching the given SortGroupClause + * by ressortgroupref, and return its expression. + */ +Node * +get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList) +{ + TargetEntry *tle = get_sortgroupclause_tle(sgClause, targetList); + + return (Node *) tle->expr; +} + +/* + * get_sortgrouplist_exprs + * Given a list of SortGroupClauses, build a list + * of the referenced targetlist expressions. + */ +List * +get_sortgrouplist_exprs(List *sgClauses, List *targetList) +{ + List *result = NIL; + ListCell *l; + + foreach(l, sgClauses) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); + Node *sortexpr; + + sortexpr = get_sortgroupclause_expr(sortcl, targetList); + result = lappend(result, sortexpr); + } + return result; +} + + +/***************************************************************************** + * Functions to extract data from a list of SortGroupClauses + * + * These don't really belong in tlist.c, but they are sort of related to the + * functions just above, and they don't seem to deserve their own file. + *****************************************************************************/ + +/* + * get_sortgroupref_clause + * Find the SortGroupClause matching the given SortGroupRef index, + * and return it. + */ +SortGroupClause * +get_sortgroupref_clause(Index sortref, List *clauses) +{ + ListCell *l; + + foreach(l, clauses) + { + SortGroupClause *cl = (SortGroupClause *) lfirst(l); + + if (cl->tleSortGroupRef == sortref) + return cl; + } + + elog(ERROR, "ORDER/GROUP BY expression not found in list"); + return NULL; /* keep compiler quiet */ +} + +/* + * get_sortgroupref_clause_noerr + * As above, but return NULL rather than throwing an error if not found. + */ +SortGroupClause * +get_sortgroupref_clause_noerr(Index sortref, List *clauses) +{ + ListCell *l; + + foreach(l, clauses) + { + SortGroupClause *cl = (SortGroupClause *) lfirst(l); + + if (cl->tleSortGroupRef == sortref) + return cl; + } + + return NULL; +} + +/* + * extract_grouping_ops - make an array of the equality operator OIDs + * for a SortGroupClause list + */ +Oid * +extract_grouping_ops(List *groupClause) +{ + int numCols = list_length(groupClause); + int colno = 0; + Oid *groupOperators; + ListCell *glitem; + + groupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + + groupOperators[colno] = groupcl->eqop; + Assert(OidIsValid(groupOperators[colno])); + colno++; + } + + return groupOperators; +} + +/* + * extract_grouping_collations - make an array of the grouping column collations + * for a SortGroupClause list + */ +Oid * +extract_grouping_collations(List *groupClause, List *tlist) +{ + int numCols = list_length(groupClause); + int colno = 0; + Oid *grpCollations; + ListCell *glitem; + + grpCollations = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist); + + grpCollations[colno++] = exprCollation((Node *) tle->expr); + } + + return grpCollations; +} + +/* + * extract_grouping_cols - make an array of the grouping column resnos + * for a SortGroupClause list + */ +AttrNumber * +extract_grouping_cols(List *groupClause, List *tlist) +{ + AttrNumber *grpColIdx; + int numCols = list_length(groupClause); + int colno = 0; + ListCell *glitem; + + grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist); + + grpColIdx[colno++] = tle->resno; + } + + return grpColIdx; +} + +/* + * grouping_is_sortable - is it possible to implement grouping list by sorting? + * + * This is easy since the parser will have included a sortop if one exists. + */ +bool +grouping_is_sortable(List *groupClause) +{ + ListCell *glitem; + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + + if (!OidIsValid(groupcl->sortop)) + return false; + } + return true; +} + +/* + * grouping_is_hashable - is it possible to implement grouping list by hashing? + * + * We rely on the parser to have set the hashable flag correctly. + */ +bool +grouping_is_hashable(List *groupClause) +{ + ListCell *glitem; + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + + if (!groupcl->hashable) + return false; + } + return true; +} + + +/***************************************************************************** + * PathTarget manipulation functions + * + * PathTarget is a somewhat stripped-down version of a full targetlist; it + * omits all the TargetEntry decoration except (optionally) sortgroupref data, + * and it adds evaluation cost and output data width info. + *****************************************************************************/ + +/* + * make_pathtarget_from_tlist + * Construct a PathTarget equivalent to the given targetlist. + * + * This leaves the cost and width fields as zeroes. Most callers will want + * to use create_pathtarget(), so as to get those set. + */ +PathTarget * +make_pathtarget_from_tlist(List *tlist) +{ + PathTarget *target = makeNode(PathTarget); + int i; + ListCell *lc; + + target->sortgrouprefs = (Index *) palloc(list_length(tlist) * sizeof(Index)); + + i = 0; + foreach(lc, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + target->exprs = lappend(target->exprs, tle->expr); + target->sortgrouprefs[i] = tle->ressortgroupref; + i++; + } + + /* + * Mark volatility as unknown. The contain_volatile_functions function + * will determine if there are any volatile functions when called for the + * first time with this PathTarget. + */ + target->has_volatile_expr = VOLATILITY_UNKNOWN; + + return target; +} + +/* + * make_tlist_from_pathtarget + * Construct a targetlist from a PathTarget. + */ +List * +make_tlist_from_pathtarget(PathTarget *target) +{ + List *tlist = NIL; + int i; + ListCell *lc; + + i = 0; + foreach(lc, target->exprs) + { + Expr *expr = (Expr *) lfirst(lc); + TargetEntry *tle; + + tle = makeTargetEntry(expr, + i + 1, + NULL, + false); + if (target->sortgrouprefs) + tle->ressortgroupref = target->sortgrouprefs[i]; + tlist = lappend(tlist, tle); + i++; + } + + return tlist; +} + +/* + * copy_pathtarget + * Copy a PathTarget. + * + * The new PathTarget has its own exprs List, but shares the underlying + * target expression trees with the old one. + */ +PathTarget * +copy_pathtarget(PathTarget *src) +{ + PathTarget *dst = makeNode(PathTarget); + + /* Copy scalar fields */ + memcpy(dst, src, sizeof(PathTarget)); + /* Shallow-copy the expression list */ + dst->exprs = list_copy(src->exprs); + /* Duplicate sortgrouprefs if any (if not, the memcpy handled this) */ + if (src->sortgrouprefs) + { + Size nbytes = list_length(src->exprs) * sizeof(Index); + + dst->sortgrouprefs = (Index *) palloc(nbytes); + memcpy(dst->sortgrouprefs, src->sortgrouprefs, nbytes); + } + return dst; +} + +/* + * create_empty_pathtarget + * Create an empty (zero columns, zero cost) PathTarget. + */ +PathTarget * +create_empty_pathtarget(void) +{ + /* This is easy, but we don't want callers to hard-wire this ... */ + return makeNode(PathTarget); +} + +/* + * add_column_to_pathtarget + * Append a target column to the PathTarget. + * + * As with make_pathtarget_from_tlist, we leave it to the caller to update + * the cost and width fields. + */ +void +add_column_to_pathtarget(PathTarget *target, Expr *expr, Index sortgroupref) +{ + /* Updating the exprs list is easy ... */ + target->exprs = lappend(target->exprs, expr); + /* ... the sortgroupref data, a bit less so */ + if (target->sortgrouprefs) + { + int nexprs = list_length(target->exprs); + + /* This might look inefficient, but actually it's usually cheap */ + target->sortgrouprefs = (Index *) + repalloc(target->sortgrouprefs, nexprs * sizeof(Index)); + target->sortgrouprefs[nexprs - 1] = sortgroupref; + } + else if (sortgroupref) + { + /* Adding sortgroupref labeling to a previously unlabeled target */ + int nexprs = list_length(target->exprs); + + target->sortgrouprefs = (Index *) palloc0(nexprs * sizeof(Index)); + target->sortgrouprefs[nexprs - 1] = sortgroupref; + } + + /* + * Reset has_volatile_expr to UNKNOWN. We just leave it up to + * contain_volatile_functions to set this properly again. Technically we + * could save some effort here and just check the new Expr, but it seems + * better to keep the logic for setting this flag in one location rather + * than duplicating the logic here. + */ + if (target->has_volatile_expr == VOLATILITY_NOVOLATILE) + target->has_volatile_expr = VOLATILITY_UNKNOWN; +} + +/* + * add_new_column_to_pathtarget + * Append a target column to the PathTarget, but only if it's not + * equal() to any pre-existing target expression. + * + * The caller cannot specify a sortgroupref, since it would be unclear how + * to merge that with a pre-existing column. + * + * As with make_pathtarget_from_tlist, we leave it to the caller to update + * the cost and width fields. + */ +void +add_new_column_to_pathtarget(PathTarget *target, Expr *expr) +{ + if (!list_member(target->exprs, expr)) + add_column_to_pathtarget(target, expr, 0); +} + +/* + * add_new_columns_to_pathtarget + * Apply add_new_column_to_pathtarget() for each element of the list. + */ +void +add_new_columns_to_pathtarget(PathTarget *target, List *exprs) +{ + ListCell *lc; + + foreach(lc, exprs) + { + Expr *expr = (Expr *) lfirst(lc); + + add_new_column_to_pathtarget(target, expr); + } +} + +/* + * apply_pathtarget_labeling_to_tlist + * Apply any sortgrouprefs in the PathTarget to matching tlist entries + * + * Here, we do not assume that the tlist entries are one-for-one with the + * PathTarget. The intended use of this function is to deal with cases + * where createplan.c has decided to use some other tlist and we have + * to identify what matches exist. + */ +void +apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target) +{ + int i; + ListCell *lc; + + /* Nothing to do if PathTarget has no sortgrouprefs data */ + if (target->sortgrouprefs == NULL) + return; + + i = 0; + foreach(lc, target->exprs) + { + Expr *expr = (Expr *) lfirst(lc); + TargetEntry *tle; + + if (target->sortgrouprefs[i]) + { + /* + * For Vars, use tlist_member_match_var's weakened matching rule; + * this allows us to deal with some cases where a set-returning + * function has been inlined, so that we now have more knowledge + * about what it returns than we did when the original Var was + * created. Otherwise, use regular equal() to find the matching + * TLE. (In current usage, only the Var case is actually needed; + * but it seems best to have sane behavior here for non-Vars too.) + */ + if (expr && IsA(expr, Var)) + tle = tlist_member_match_var((Var *) expr, tlist); + else + tle = tlist_member(expr, tlist); + + /* + * Complain if noplace for the sortgrouprefs label, or if we'd + * have to label a column twice. (The case where it already has + * the desired label probably can't happen, but we may as well + * allow for it.) + */ + if (!tle) + elog(ERROR, "ORDER/GROUP BY expression not found in targetlist"); + if (tle->ressortgroupref != 0 && + tle->ressortgroupref != target->sortgrouprefs[i]) + elog(ERROR, "targetlist item has multiple sortgroupref labels"); + + tle->ressortgroupref = target->sortgrouprefs[i]; + } + i++; + } +} + +/* + * split_pathtarget_at_srfs + * Split given PathTarget into multiple levels to position SRFs safely + * + * The executor can only handle set-returning functions that appear at the + * top level of the targetlist of a ProjectSet plan node. If we have any SRFs + * that are not at top level, we need to split up the evaluation into multiple + * plan levels in which each level satisfies this constraint. This function + * creates appropriate PathTarget(s) for each level. + * + * As an example, consider the tlist expression + * x + srf1(srf2(y + z)) + * This expression should appear as-is in the top PathTarget, but below that + * we must have a PathTarget containing + * x, srf1(srf2(y + z)) + * and below that, another PathTarget containing + * x, srf2(y + z) + * and below that, another PathTarget containing + * x, y, z + * When these tlists are processed by setrefs.c, subexpressions that match + * output expressions of the next lower tlist will be replaced by Vars, + * so that what the executor gets are tlists looking like + * Var1 + Var2 + * Var1, srf1(Var2) + * Var1, srf2(Var2 + Var3) + * x, y, z + * which satisfy the desired property. + * + * Another example is + * srf1(x), srf2(srf3(y)) + * That must appear as-is in the top PathTarget, but below that we need + * srf1(x), srf3(y) + * That is, each SRF must be computed at a level corresponding to the nesting + * depth of SRFs within its arguments. + * + * In some cases, a SRF has already been evaluated in some previous plan level + * and we shouldn't expand it again (that is, what we see in the target is + * already meant as a reference to a lower subexpression). So, don't expand + * any tlist expressions that appear in input_target, if that's not NULL. + * + * It's also important that we preserve any sortgroupref annotation appearing + * in the given target, especially on expressions matching input_target items. + * + * The outputs of this function are two parallel lists, one a list of + * PathTargets and the other an integer list of bool flags indicating + * whether the corresponding PathTarget contains any evaluable SRFs. + * The lists are given in the order they'd need to be evaluated in, with + * the "lowest" PathTarget first. So the last list entry is always the + * originally given PathTarget, and any entries before it indicate evaluation + * levels that must be inserted below it. The first list entry must not + * contain any SRFs (other than ones duplicating input_target entries), since + * it will typically be attached to a plan node that cannot evaluate SRFs. + * + * Note: using a list for the flags may seem like overkill, since there + * are only a few possible patterns for which levels contain SRFs. + * But this representation decouples callers from that knowledge. + */ +void +split_pathtarget_at_srfs(PlannerInfo *root, + PathTarget *target, PathTarget *input_target, + List **targets, List **targets_contain_srfs) +{ + split_pathtarget_context context; + int max_depth; + bool need_extra_projection; + List *prev_level_tlist; + int lci; + ListCell *lc, + *lc1, + *lc2, + *lc3; + + /* + * It's not unusual for planner.c to pass us two physically identical + * targets, in which case we can conclude without further ado that all + * expressions are available from the input. (The logic below would + * arrive at the same conclusion, but much more tediously.) + */ + if (target == input_target) + { + *targets = list_make1(target); + *targets_contain_srfs = list_make1_int(false); + return; + } + + /* Pass any input_target exprs down to split_pathtarget_walker() */ + context.input_target_exprs = input_target ? input_target->exprs : NIL; + + /* + * Initialize with empty level-zero lists, and no levels after that. + * (Note: we could dispense with representing level zero explicitly, since + * it will never receive any SRFs, but then we'd have to special-case that + * level when we get to building result PathTargets. Level zero describes + * the SRF-free PathTarget that will be given to the input plan node.) + */ + context.level_srfs = list_make1(NIL); + context.level_input_vars = list_make1(NIL); + context.level_input_srfs = list_make1(NIL); + + /* Initialize data we'll accumulate across all the target expressions */ + context.current_input_vars = NIL; + context.current_input_srfs = NIL; + max_depth = 0; + need_extra_projection = false; + + /* Scan each expression in the PathTarget looking for SRFs */ + lci = 0; + foreach(lc, target->exprs) + { + Node *node = (Node *) lfirst(lc); + + /* Tell split_pathtarget_walker about this expr's sortgroupref */ + context.current_sgref = get_pathtarget_sortgroupref(target, lci); + lci++; + + /* + * Find all SRFs and Vars (and Var-like nodes) in this expression, and + * enter them into appropriate lists within the context struct. + */ + context.current_depth = 0; + split_pathtarget_walker(node, &context); + + /* An expression containing no SRFs is of no further interest */ + if (context.current_depth == 0) + continue; + + /* + * Track max SRF nesting depth over the whole PathTarget. Also, if + * this expression establishes a new max depth, we no longer care + * whether previous expressions contained nested SRFs; we can handle + * any required projection for them in the final ProjectSet node. + */ + if (max_depth < context.current_depth) + { + max_depth = context.current_depth; + need_extra_projection = false; + } + + /* + * If any maximum-depth SRF is not at the top level of its expression, + * we'll need an extra Result node to compute the top-level scalar + * expression. + */ + if (max_depth == context.current_depth && !IS_SRF_CALL(node)) + need_extra_projection = true; + } + + /* + * If we found no SRFs needing evaluation (maybe they were all present in + * input_target, or maybe they were all removed by const-simplification), + * then no ProjectSet is needed; fall out. + */ + if (max_depth == 0) + { + *targets = list_make1(target); + *targets_contain_srfs = list_make1_int(false); + return; + } + + /* + * The Vars and SRF outputs needed at top level can be added to the last + * level_input lists if we don't need an extra projection step. If we do + * need one, add a SRF-free level to the lists. + */ + if (need_extra_projection) + { + context.level_srfs = lappend(context.level_srfs, NIL); + context.level_input_vars = lappend(context.level_input_vars, + context.current_input_vars); + context.level_input_srfs = lappend(context.level_input_srfs, + context.current_input_srfs); + } + else + { + lc = list_nth_cell(context.level_input_vars, max_depth); + lfirst(lc) = list_concat(lfirst(lc), context.current_input_vars); + lc = list_nth_cell(context.level_input_srfs, max_depth); + lfirst(lc) = list_concat(lfirst(lc), context.current_input_srfs); + } + + /* + * Now construct the output PathTargets. The original target can be used + * as-is for the last one, but we need to construct a new SRF-free target + * representing what the preceding plan node has to emit, as well as a + * target for each intermediate ProjectSet node. + */ + *targets = *targets_contain_srfs = NIL; + prev_level_tlist = NIL; + + forthree(lc1, context.level_srfs, + lc2, context.level_input_vars, + lc3, context.level_input_srfs) + { + List *level_srfs = (List *) lfirst(lc1); + PathTarget *ntarget; + + if (lnext(context.level_srfs, lc1) == NULL) + { + ntarget = target; + } + else + { + ntarget = create_empty_pathtarget(); + + /* + * This target should actually evaluate any SRFs of the current + * level, and it needs to propagate forward any Vars needed by + * later levels, as well as SRFs computed earlier and needed by + * later levels. + */ + add_sp_items_to_pathtarget(ntarget, level_srfs); + for_each_cell(lc, context.level_input_vars, + lnext(context.level_input_vars, lc2)) + { + List *input_vars = (List *) lfirst(lc); + + add_sp_items_to_pathtarget(ntarget, input_vars); + } + for_each_cell(lc, context.level_input_srfs, + lnext(context.level_input_srfs, lc3)) + { + List *input_srfs = (List *) lfirst(lc); + ListCell *lcx; + + foreach(lcx, input_srfs) + { + split_pathtarget_item *item = lfirst(lcx); + + if (list_member(prev_level_tlist, item->expr)) + add_sp_item_to_pathtarget(ntarget, item); + } + } + set_pathtarget_cost_width(root, ntarget); + } + + /* + * Add current target and does-it-compute-SRFs flag to output lists. + */ + *targets = lappend(*targets, ntarget); + *targets_contain_srfs = lappend_int(*targets_contain_srfs, + (level_srfs != NIL)); + + /* Remember this level's output for next pass */ + prev_level_tlist = ntarget->exprs; + } +} + +/* + * Recursively examine expressions for split_pathtarget_at_srfs. + * + * Note we make no effort here to prevent duplicate entries in the output + * lists. Duplicates will be gotten rid of later. + */ +static bool +split_pathtarget_walker(Node *node, split_pathtarget_context *context) +{ + if (node == NULL) + return false; + + /* + * A subexpression that matches an expression already computed in + * input_target can be treated like a Var (which indeed it will be after + * setrefs.c gets done with it), even if it's actually a SRF. Record it + * as being needed for the current expression, and ignore any + * substructure. (Note in particular that this preserves the identity of + * any expressions that appear as sortgrouprefs in input_target.) + */ + if (list_member(context->input_target_exprs, node)) + { + split_pathtarget_item *item = palloc(sizeof(split_pathtarget_item)); + + item->expr = node; + item->sortgroupref = context->current_sgref; + context->current_input_vars = lappend(context->current_input_vars, + item); + return false; + } + + /* + * Vars and Var-like constructs are expected to be gotten from the input, + * too. We assume that these constructs cannot contain any SRFs (if one + * does, there will be an executor failure from a misplaced SRF). + */ + if (IsA(node, Var) || + IsA(node, PlaceHolderVar) || + IsA(node, Aggref) || + IsA(node, GroupingFunc) || + IsA(node, WindowFunc)) + { + split_pathtarget_item *item = palloc(sizeof(split_pathtarget_item)); + + item->expr = node; + item->sortgroupref = context->current_sgref; + context->current_input_vars = lappend(context->current_input_vars, + item); + return false; + } + + /* + * If it's a SRF, recursively examine its inputs, determine its level, and + * make appropriate entries in the output lists. + */ + if (IS_SRF_CALL(node)) + { + split_pathtarget_item *item = palloc(sizeof(split_pathtarget_item)); + List *save_input_vars = context->current_input_vars; + List *save_input_srfs = context->current_input_srfs; + int save_current_depth = context->current_depth; + int srf_depth; + ListCell *lc; + + item->expr = node; + item->sortgroupref = context->current_sgref; + + context->current_input_vars = NIL; + context->current_input_srfs = NIL; + context->current_depth = 0; + context->current_sgref = 0; /* subexpressions are not sortgroup items */ + + (void) expression_tree_walker(node, split_pathtarget_walker, + (void *) context); + + /* Depth is one more than any SRF below it */ + srf_depth = context->current_depth + 1; + + /* If new record depth, initialize another level of output lists */ + if (srf_depth >= list_length(context->level_srfs)) + { + context->level_srfs = lappend(context->level_srfs, NIL); + context->level_input_vars = lappend(context->level_input_vars, NIL); + context->level_input_srfs = lappend(context->level_input_srfs, NIL); + } + + /* Record this SRF as needing to be evaluated at appropriate level */ + lc = list_nth_cell(context->level_srfs, srf_depth); + lfirst(lc) = lappend(lfirst(lc), item); + + /* Record its inputs as being needed at the same level */ + lc = list_nth_cell(context->level_input_vars, srf_depth); + lfirst(lc) = list_concat(lfirst(lc), context->current_input_vars); + lc = list_nth_cell(context->level_input_srfs, srf_depth); + lfirst(lc) = list_concat(lfirst(lc), context->current_input_srfs); + + /* + * Restore caller-level state and update it for presence of this SRF. + * Notice we report the SRF itself as being needed for evaluation of + * surrounding expression. + */ + context->current_input_vars = save_input_vars; + context->current_input_srfs = lappend(save_input_srfs, item); + context->current_depth = Max(save_current_depth, srf_depth); + + /* We're done here */ + return false; + } + + /* + * Otherwise, the node is a scalar (non-set) expression, so recurse to + * examine its inputs. + */ + context->current_sgref = 0; /* subexpressions are not sortgroup items */ + return expression_tree_walker(node, split_pathtarget_walker, + (void *) context); +} + +/* + * Add a split_pathtarget_item to the PathTarget, unless a matching item is + * already present. This is like add_new_column_to_pathtarget, but allows + * for sortgrouprefs to be handled. An item having zero sortgroupref can + * be merged with one that has a sortgroupref, acquiring the latter's + * sortgroupref. + * + * Note that we don't worry about possibly adding duplicate sortgrouprefs + * to the PathTarget. That would be bad, but it should be impossible unless + * the target passed to split_pathtarget_at_srfs already had duplicates. + * As long as it didn't, we can have at most one split_pathtarget_item with + * any particular nonzero sortgroupref. + */ +static void +add_sp_item_to_pathtarget(PathTarget *target, split_pathtarget_item *item) +{ + int lci; + ListCell *lc; + + /* + * Look for a pre-existing entry that is equal() and does not have a + * conflicting sortgroupref already. + */ + lci = 0; + foreach(lc, target->exprs) + { + Node *node = (Node *) lfirst(lc); + Index sgref = get_pathtarget_sortgroupref(target, lci); + + if ((item->sortgroupref == sgref || + item->sortgroupref == 0 || + sgref == 0) && + equal(item->expr, node)) + { + /* Found a match. Assign item's sortgroupref if it has one. */ + if (item->sortgroupref) + { + if (target->sortgrouprefs == NULL) + { + target->sortgrouprefs = (Index *) + palloc0(list_length(target->exprs) * sizeof(Index)); + } + target->sortgrouprefs[lci] = item->sortgroupref; + } + return; + } + lci++; + } + + /* + * No match, so add item to PathTarget. Copy the expr for safety. + */ + add_column_to_pathtarget(target, (Expr *) copyObject(item->expr), + item->sortgroupref); +} + +/* + * Apply add_sp_item_to_pathtarget to each element of list. + */ +static void +add_sp_items_to_pathtarget(PathTarget *target, List *items) +{ + ListCell *lc; + + foreach(lc, items) + { + split_pathtarget_item *item = lfirst(lc); + + add_sp_item_to_pathtarget(target, item); + } +} diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c new file mode 100644 index 0000000..ebc6ce8 --- /dev/null +++ b/src/backend/optimizer/util/var.c @@ -0,0 +1,903 @@ +/*------------------------------------------------------------------------- + * + * var.c + * Var node manipulation routines + * + * Note: for most purposes, PlaceHolderVar is considered a Var too, + * even if its contained expression is variable-free. Also, CurrentOfExpr + * is treated as a Var for purposes of determining whether an expression + * contains variables. + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/var.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/sysattr.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#include "optimizer/placeholder.h" +#include "optimizer/prep.h" +#include "parser/parsetree.h" +#include "rewrite/rewriteManip.h" + + +typedef struct +{ + Relids varnos; + PlannerInfo *root; + int sublevels_up; +} pull_varnos_context; + +typedef struct +{ + Bitmapset *varattnos; + Index varno; +} pull_varattnos_context; + +typedef struct +{ + List *vars; + int sublevels_up; +} pull_vars_context; + +typedef struct +{ + int var_location; + int sublevels_up; +} locate_var_of_level_context; + +typedef struct +{ + List *varlist; + int flags; +} pull_var_clause_context; + +typedef struct +{ + Query *query; /* outer Query */ + int sublevels_up; + bool possible_sublink; /* could aliases include a SubLink? */ + bool inserted_sublink; /* have we inserted a SubLink? */ +} flatten_join_alias_vars_context; + +static bool pull_varnos_walker(Node *node, + pull_varnos_context *context); +static bool pull_varattnos_walker(Node *node, pull_varattnos_context *context); +static bool pull_vars_walker(Node *node, pull_vars_context *context); +static bool contain_var_clause_walker(Node *node, void *context); +static bool contain_vars_of_level_walker(Node *node, int *sublevels_up); +static bool locate_var_of_level_walker(Node *node, + locate_var_of_level_context *context); +static bool pull_var_clause_walker(Node *node, + pull_var_clause_context *context); +static Node *flatten_join_alias_vars_mutator(Node *node, + flatten_join_alias_vars_context *context); +static Relids alias_relid_set(Query *query, Relids relids); + + +/* + * pull_varnos + * Create a set of all the distinct varnos present in a parsetree. + * Only varnos that reference level-zero rtable entries are considered. + * + * "root" can be passed as NULL if it is not necessary to process + * PlaceHolderVars. + * + * NOTE: this is used on not-yet-planned expressions. It may therefore find + * bare SubLinks, and if so it needs to recurse into them to look for uplevel + * references to the desired rtable level! But when we find a completed + * SubPlan, we only need to look at the parameters passed to the subplan. + */ +Relids +pull_varnos(PlannerInfo *root, Node *node) +{ + pull_varnos_context context; + + context.varnos = NULL; + context.root = root; + context.sublevels_up = 0; + + /* + * Must be prepared to start with a Query or a bare expression tree; if + * it's a Query, we don't want to increment sublevels_up. + */ + query_or_expression_tree_walker(node, + pull_varnos_walker, + (void *) &context, + 0); + + return context.varnos; +} + +/* + * pull_varnos_of_level + * Create a set of all the distinct varnos present in a parsetree. + * Only Vars of the specified level are considered. + */ +Relids +pull_varnos_of_level(PlannerInfo *root, Node *node, int levelsup) +{ + pull_varnos_context context; + + context.varnos = NULL; + context.root = root; + context.sublevels_up = levelsup; + + /* + * Must be prepared to start with a Query or a bare expression tree; if + * it's a Query, we don't want to increment sublevels_up. + */ + query_or_expression_tree_walker(node, + pull_varnos_walker, + (void *) &context, + 0); + + return context.varnos; +} + +static bool +pull_varnos_walker(Node *node, pull_varnos_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varlevelsup == context->sublevels_up) + context->varnos = bms_add_member(context->varnos, var->varno); + return false; + } + if (IsA(node, CurrentOfExpr)) + { + CurrentOfExpr *cexpr = (CurrentOfExpr *) node; + + if (context->sublevels_up == 0) + context->varnos = bms_add_member(context->varnos, cexpr->cvarno); + return false; + } + if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + /* + * If a PlaceHolderVar is not of the target query level, ignore it, + * instead recursing into its expression to see if it contains any + * vars that are of the target level. We'll also do that when the + * caller doesn't pass a "root" pointer. (We probably shouldn't see + * PlaceHolderVars at all in such cases, but if we do, this is a + * reasonable behavior.) + */ + if (phv->phlevelsup == context->sublevels_up && + context->root != NULL) + { + /* + * Ideally, the PHV's contribution to context->varnos is its + * ph_eval_at set. However, this code can be invoked before + * that's been computed. If we cannot find a PlaceHolderInfo, + * fall back to the conservative assumption that the PHV will be + * evaluated at its syntactic level (phv->phrels). + * + * There is a second hazard: this code is also used to examine + * qual clauses during deconstruct_jointree, when we may have a + * PlaceHolderInfo but its ph_eval_at value is not yet final, so + * that theoretically we could obtain a relid set that's smaller + * than we'd see later on. That should never happen though, + * because we deconstruct the jointree working upwards. Any outer + * join that forces delay of evaluation of a given qual clause + * will be processed before we examine that clause here, so the + * ph_eval_at value should have been updated to include it. + * + * Another problem is that a PlaceHolderVar can appear in quals or + * tlists that have been translated for use in a child appendrel. + * Typically such a PHV is a parameter expression sourced by some + * other relation, so that the translation from parent appendrel + * to child doesn't change its phrels, and we should still take + * ph_eval_at at face value. But in corner cases, the PHV's + * original phrels can include the parent appendrel itself, in + * which case the translated PHV will have the child appendrel in + * phrels, and we must translate ph_eval_at to match. + */ + PlaceHolderInfo *phinfo = NULL; + + if (phv->phlevelsup == 0) + { + ListCell *lc; + + foreach(lc, context->root->placeholder_list) + { + phinfo = (PlaceHolderInfo *) lfirst(lc); + if (phinfo->phid == phv->phid) + break; + phinfo = NULL; + } + } + if (phinfo == NULL) + { + /* No PlaceHolderInfo yet, use phrels */ + context->varnos = bms_add_members(context->varnos, + phv->phrels); + } + else if (bms_equal(phv->phrels, phinfo->ph_var->phrels)) + { + /* Normal case: use ph_eval_at */ + context->varnos = bms_add_members(context->varnos, + phinfo->ph_eval_at); + } + else + { + /* Translated PlaceHolderVar: translate ph_eval_at to match */ + Relids newevalat, + delta; + + /* remove what was removed from phv->phrels ... */ + delta = bms_difference(phinfo->ph_var->phrels, phv->phrels); + newevalat = bms_difference(phinfo->ph_eval_at, delta); + /* ... then if that was in fact part of ph_eval_at ... */ + if (!bms_equal(newevalat, phinfo->ph_eval_at)) + { + /* ... add what was added */ + delta = bms_difference(phv->phrels, phinfo->ph_var->phrels); + newevalat = bms_join(newevalat, delta); + } + context->varnos = bms_join(context->varnos, + newevalat); + } + return false; /* don't recurse into expression */ + } + } + else if (IsA(node, Query)) + { + /* Recurse into RTE subquery or not-yet-planned sublink subquery */ + bool result; + + context->sublevels_up++; + result = query_tree_walker((Query *) node, pull_varnos_walker, + (void *) context, 0); + context->sublevels_up--; + return result; + } + return expression_tree_walker(node, pull_varnos_walker, + (void *) context); +} + + +/* + * pull_varattnos + * Find all the distinct attribute numbers present in an expression tree, + * and add them to the initial contents of *varattnos. + * Only Vars of the given varno and rtable level zero are considered. + * + * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that + * we can include system attributes (e.g., OID) in the bitmap representation. + * + * Currently, this does not support unplanned subqueries; that is not needed + * for current uses. It will handle already-planned SubPlan nodes, though, + * looking into only the "testexpr" and the "args" list. (The subplan cannot + * contain any other references to Vars of the current level.) + */ +void +pull_varattnos(Node *node, Index varno, Bitmapset **varattnos) +{ + pull_varattnos_context context; + + context.varattnos = *varattnos; + context.varno = varno; + + (void) pull_varattnos_walker(node, &context); + + *varattnos = context.varattnos; +} + +static bool +pull_varattnos_walker(Node *node, pull_varattnos_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varno == context->varno && var->varlevelsup == 0) + context->varattnos = + bms_add_member(context->varattnos, + var->varattno - FirstLowInvalidHeapAttributeNumber); + return false; + } + + /* Should not find an unplanned subquery */ + Assert(!IsA(node, Query)); + + return expression_tree_walker(node, pull_varattnos_walker, + (void *) context); +} + + +/* + * pull_vars_of_level + * Create a list of all Vars (and PlaceHolderVars) referencing the + * specified query level in the given parsetree. + * + * Caution: the Vars are not copied, only linked into the list. + */ +List * +pull_vars_of_level(Node *node, int levelsup) +{ + pull_vars_context context; + + context.vars = NIL; + context.sublevels_up = levelsup; + + /* + * Must be prepared to start with a Query or a bare expression tree; if + * it's a Query, we don't want to increment sublevels_up. + */ + query_or_expression_tree_walker(node, + pull_vars_walker, + (void *) &context, + 0); + + return context.vars; +} + +static bool +pull_vars_walker(Node *node, pull_vars_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varlevelsup == context->sublevels_up) + context->vars = lappend(context->vars, var); + return false; + } + if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + if (phv->phlevelsup == context->sublevels_up) + context->vars = lappend(context->vars, phv); + /* we don't want to look into the contained expression */ + return false; + } + if (IsA(node, Query)) + { + /* Recurse into RTE subquery or not-yet-planned sublink subquery */ + bool result; + + context->sublevels_up++; + result = query_tree_walker((Query *) node, pull_vars_walker, + (void *) context, 0); + context->sublevels_up--; + return result; + } + return expression_tree_walker(node, pull_vars_walker, + (void *) context); +} + + +/* + * contain_var_clause + * Recursively scan a clause to discover whether it contains any Var nodes + * (of the current query level). + * + * Returns true if any varnode found. + * + * Does not examine subqueries, therefore must only be used after reduction + * of sublinks to subplans! + */ +bool +contain_var_clause(Node *node) +{ + return contain_var_clause_walker(node, NULL); +} + +static bool +contain_var_clause_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + if (((Var *) node)->varlevelsup == 0) + return true; /* abort the tree traversal and return true */ + return false; + } + if (IsA(node, CurrentOfExpr)) + return true; + if (IsA(node, PlaceHolderVar)) + { + if (((PlaceHolderVar *) node)->phlevelsup == 0) + return true; /* abort the tree traversal and return true */ + /* else fall through to check the contained expr */ + } + return expression_tree_walker(node, contain_var_clause_walker, context); +} + + +/* + * contain_vars_of_level + * Recursively scan a clause to discover whether it contains any Var nodes + * of the specified query level. + * + * Returns true if any such Var found. + * + * Will recurse into sublinks. Also, may be invoked directly on a Query. + */ +bool +contain_vars_of_level(Node *node, int levelsup) +{ + int sublevels_up = levelsup; + + return query_or_expression_tree_walker(node, + contain_vars_of_level_walker, + (void *) &sublevels_up, + 0); +} + +static bool +contain_vars_of_level_walker(Node *node, int *sublevels_up) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + if (((Var *) node)->varlevelsup == *sublevels_up) + return true; /* abort tree traversal and return true */ + return false; + } + if (IsA(node, CurrentOfExpr)) + { + if (*sublevels_up == 0) + return true; + return false; + } + if (IsA(node, PlaceHolderVar)) + { + if (((PlaceHolderVar *) node)->phlevelsup == *sublevels_up) + return true; /* abort the tree traversal and return true */ + /* else fall through to check the contained expr */ + } + if (IsA(node, Query)) + { + /* Recurse into subselects */ + bool result; + + (*sublevels_up)++; + result = query_tree_walker((Query *) node, + contain_vars_of_level_walker, + (void *) sublevels_up, + 0); + (*sublevels_up)--; + return result; + } + return expression_tree_walker(node, + contain_vars_of_level_walker, + (void *) sublevels_up); +} + + +/* + * locate_var_of_level + * Find the parse location of any Var of the specified query level. + * + * Returns -1 if no such Var is in the querytree, or if they all have + * unknown parse location. (The former case is probably caller error, + * but we don't bother to distinguish it from the latter case.) + * + * Will recurse into sublinks. Also, may be invoked directly on a Query. + * + * Note: it might seem appropriate to merge this functionality into + * contain_vars_of_level, but that would complicate that function's API. + * Currently, the only uses of this function are for error reporting, + * and so shaving cycles probably isn't very important. + */ +int +locate_var_of_level(Node *node, int levelsup) +{ + locate_var_of_level_context context; + + context.var_location = -1; /* in case we find nothing */ + context.sublevels_up = levelsup; + + (void) query_or_expression_tree_walker(node, + locate_var_of_level_walker, + (void *) &context, + 0); + + return context.var_location; +} + +static bool +locate_var_of_level_walker(Node *node, + locate_var_of_level_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varlevelsup == context->sublevels_up && + var->location >= 0) + { + context->var_location = var->location; + return true; /* abort tree traversal and return true */ + } + return false; + } + if (IsA(node, CurrentOfExpr)) + { + /* since CurrentOfExpr doesn't carry location, nothing we can do */ + return false; + } + /* No extra code needed for PlaceHolderVar; just look in contained expr */ + if (IsA(node, Query)) + { + /* Recurse into subselects */ + bool result; + + context->sublevels_up++; + result = query_tree_walker((Query *) node, + locate_var_of_level_walker, + (void *) context, + 0); + context->sublevels_up--; + return result; + } + return expression_tree_walker(node, + locate_var_of_level_walker, + (void *) context); +} + + +/* + * pull_var_clause + * Recursively pulls all Var nodes from an expression clause. + * + * Aggrefs are handled according to these bits in 'flags': + * PVC_INCLUDE_AGGREGATES include Aggrefs in output list + * PVC_RECURSE_AGGREGATES recurse into Aggref arguments + * neither flag throw error if Aggref found + * Vars within an Aggref's expression are included in the result only + * when PVC_RECURSE_AGGREGATES is specified. + * + * WindowFuncs are handled according to these bits in 'flags': + * PVC_INCLUDE_WINDOWFUNCS include WindowFuncs in output list + * PVC_RECURSE_WINDOWFUNCS recurse into WindowFunc arguments + * neither flag throw error if WindowFunc found + * Vars within a WindowFunc's expression are included in the result only + * when PVC_RECURSE_WINDOWFUNCS is specified. + * + * PlaceHolderVars are handled according to these bits in 'flags': + * PVC_INCLUDE_PLACEHOLDERS include PlaceHolderVars in output list + * PVC_RECURSE_PLACEHOLDERS recurse into PlaceHolderVar arguments + * neither flag throw error if PlaceHolderVar found + * Vars within a PHV's expression are included in the result only + * when PVC_RECURSE_PLACEHOLDERS is specified. + * + * GroupingFuncs are treated exactly like Aggrefs, and so do not need + * their own flag bits. + * + * CurrentOfExpr nodes are ignored in all cases. + * + * Upper-level vars (with varlevelsup > 0) should not be seen here, + * likewise for upper-level Aggrefs and PlaceHolderVars. + * + * Returns list of nodes found. Note the nodes themselves are not + * copied, only referenced. + * + * Does not examine subqueries, therefore must only be used after reduction + * of sublinks to subplans! + */ +List * +pull_var_clause(Node *node, int flags) +{ + pull_var_clause_context context; + + /* Assert that caller has not specified inconsistent flags */ + Assert((flags & (PVC_INCLUDE_AGGREGATES | PVC_RECURSE_AGGREGATES)) + != (PVC_INCLUDE_AGGREGATES | PVC_RECURSE_AGGREGATES)); + Assert((flags & (PVC_INCLUDE_WINDOWFUNCS | PVC_RECURSE_WINDOWFUNCS)) + != (PVC_INCLUDE_WINDOWFUNCS | PVC_RECURSE_WINDOWFUNCS)); + Assert((flags & (PVC_INCLUDE_PLACEHOLDERS | PVC_RECURSE_PLACEHOLDERS)) + != (PVC_INCLUDE_PLACEHOLDERS | PVC_RECURSE_PLACEHOLDERS)); + + context.varlist = NIL; + context.flags = flags; + + pull_var_clause_walker(node, &context); + return context.varlist; +} + +static bool +pull_var_clause_walker(Node *node, pull_var_clause_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + if (((Var *) node)->varlevelsup != 0) + elog(ERROR, "Upper-level Var found where not expected"); + context->varlist = lappend(context->varlist, node); + return false; + } + else if (IsA(node, Aggref)) + { + if (((Aggref *) node)->agglevelsup != 0) + elog(ERROR, "Upper-level Aggref found where not expected"); + if (context->flags & PVC_INCLUDE_AGGREGATES) + { + context->varlist = lappend(context->varlist, node); + /* we do NOT descend into the contained expression */ + return false; + } + else if (context->flags & PVC_RECURSE_AGGREGATES) + { + /* fall through to recurse into the aggregate's arguments */ + } + else + elog(ERROR, "Aggref found where not expected"); + } + else if (IsA(node, GroupingFunc)) + { + if (((GroupingFunc *) node)->agglevelsup != 0) + elog(ERROR, "Upper-level GROUPING found where not expected"); + if (context->flags & PVC_INCLUDE_AGGREGATES) + { + context->varlist = lappend(context->varlist, node); + /* we do NOT descend into the contained expression */ + return false; + } + else if (context->flags & PVC_RECURSE_AGGREGATES) + { + /* fall through to recurse into the GroupingFunc's arguments */ + } + else + elog(ERROR, "GROUPING found where not expected"); + } + else if (IsA(node, WindowFunc)) + { + /* WindowFuncs have no levelsup field to check ... */ + if (context->flags & PVC_INCLUDE_WINDOWFUNCS) + { + context->varlist = lappend(context->varlist, node); + /* we do NOT descend into the contained expressions */ + return false; + } + else if (context->flags & PVC_RECURSE_WINDOWFUNCS) + { + /* fall through to recurse into the windowfunc's arguments */ + } + else + elog(ERROR, "WindowFunc found where not expected"); + } + else if (IsA(node, PlaceHolderVar)) + { + if (((PlaceHolderVar *) node)->phlevelsup != 0) + elog(ERROR, "Upper-level PlaceHolderVar found where not expected"); + if (context->flags & PVC_INCLUDE_PLACEHOLDERS) + { + context->varlist = lappend(context->varlist, node); + /* we do NOT descend into the contained expression */ + return false; + } + else if (context->flags & PVC_RECURSE_PLACEHOLDERS) + { + /* fall through to recurse into the placeholder's expression */ + } + else + elog(ERROR, "PlaceHolderVar found where not expected"); + } + return expression_tree_walker(node, pull_var_clause_walker, + (void *) context); +} + + +/* + * flatten_join_alias_vars + * Replace Vars that reference JOIN outputs with references to the original + * relation variables instead. This allows quals involving such vars to be + * pushed down. Whole-row Vars that reference JOIN relations are expanded + * into RowExpr constructs that name the individual output Vars. This + * is necessary since we will not scan the JOIN as a base relation, which + * is the only way that the executor can directly handle whole-row Vars. + * + * This also adjusts relid sets found in some expression node types to + * substitute the contained base rels for any join relid. + * + * If a JOIN contains sub-selects that have been flattened, its join alias + * entries might now be arbitrary expressions, not just Vars. This affects + * this function in one important way: we might find ourselves inserting + * SubLink expressions into subqueries, and we must make sure that their + * Query.hasSubLinks fields get set to true if so. If there are any + * SubLinks in the join alias lists, the outer Query should already have + * hasSubLinks = true, so this is only relevant to un-flattened subqueries. + * + * NOTE: this is used on not-yet-planned expressions. We do not expect it + * to be applied directly to the whole Query, so if we see a Query to start + * with, we do want to increment sublevels_up (this occurs for LATERAL + * subqueries). + */ +Node * +flatten_join_alias_vars(Query *query, Node *node) +{ + flatten_join_alias_vars_context context; + + context.query = query; + context.sublevels_up = 0; + /* flag whether join aliases could possibly contain SubLinks */ + context.possible_sublink = query->hasSubLinks; + /* if hasSubLinks is already true, no need to work hard */ + context.inserted_sublink = query->hasSubLinks; + + return flatten_join_alias_vars_mutator(node, &context); +} + +static Node * +flatten_join_alias_vars_mutator(Node *node, + flatten_join_alias_vars_context *context) +{ + if (node == NULL) + return NULL; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + RangeTblEntry *rte; + Node *newvar; + + /* No change unless Var belongs to a JOIN of the target level */ + if (var->varlevelsup != context->sublevels_up) + return node; /* no need to copy, really */ + rte = rt_fetch(var->varno, context->query->rtable); + if (rte->rtekind != RTE_JOIN) + return node; + if (var->varattno == InvalidAttrNumber) + { + /* Must expand whole-row reference */ + RowExpr *rowexpr; + List *fields = NIL; + List *colnames = NIL; + ListCell *lv; + ListCell *ln; + + Assert(list_length(rte->joinaliasvars) == list_length(rte->eref->colnames)); + forboth(lv, rte->joinaliasvars, ln, rte->eref->colnames) + { + newvar = (Node *) lfirst(lv); + /* Ignore dropped columns */ + if (newvar == NULL) + continue; + newvar = copyObject(newvar); + + /* + * If we are expanding an alias carried down from an upper + * query, must adjust its varlevelsup fields. + */ + if (context->sublevels_up != 0) + IncrementVarSublevelsUp(newvar, context->sublevels_up, 0); + /* Preserve original Var's location, if possible */ + if (IsA(newvar, Var)) + ((Var *) newvar)->location = var->location; + /* Recurse in case join input is itself a join */ + /* (also takes care of setting inserted_sublink if needed) */ + newvar = flatten_join_alias_vars_mutator(newvar, context); + fields = lappend(fields, newvar); + /* We need the names of non-dropped columns, too */ + colnames = lappend(colnames, copyObject((Node *) lfirst(ln))); + } + rowexpr = makeNode(RowExpr); + rowexpr->args = fields; + rowexpr->row_typeid = var->vartype; + rowexpr->row_format = COERCE_IMPLICIT_CAST; + /* vartype will always be RECORDOID, so we always need colnames */ + rowexpr->colnames = colnames; + rowexpr->location = var->location; + + return (Node *) rowexpr; + } + + /* Expand join alias reference */ + Assert(var->varattno > 0); + newvar = (Node *) list_nth(rte->joinaliasvars, var->varattno - 1); + Assert(newvar != NULL); + newvar = copyObject(newvar); + + /* + * If we are expanding an alias carried down from an upper query, must + * adjust its varlevelsup fields. + */ + if (context->sublevels_up != 0) + IncrementVarSublevelsUp(newvar, context->sublevels_up, 0); + + /* Preserve original Var's location, if possible */ + if (IsA(newvar, Var)) + ((Var *) newvar)->location = var->location; + + /* Recurse in case join input is itself a join */ + newvar = flatten_join_alias_vars_mutator(newvar, context); + + /* Detect if we are adding a sublink to query */ + if (context->possible_sublink && !context->inserted_sublink) + context->inserted_sublink = checkExprHasSubLink(newvar); + + return newvar; + } + if (IsA(node, PlaceHolderVar)) + { + /* Copy the PlaceHolderVar node with correct mutation of subnodes */ + PlaceHolderVar *phv; + + phv = (PlaceHolderVar *) expression_tree_mutator(node, + flatten_join_alias_vars_mutator, + (void *) context); + /* now fix PlaceHolderVar's relid sets */ + if (phv->phlevelsup == context->sublevels_up) + { + phv->phrels = alias_relid_set(context->query, + phv->phrels); + } + return (Node *) phv; + } + + if (IsA(node, Query)) + { + /* Recurse into RTE subquery or not-yet-planned sublink subquery */ + Query *newnode; + bool save_inserted_sublink; + + context->sublevels_up++; + save_inserted_sublink = context->inserted_sublink; + context->inserted_sublink = ((Query *) node)->hasSubLinks; + newnode = query_tree_mutator((Query *) node, + flatten_join_alias_vars_mutator, + (void *) context, + QTW_IGNORE_JOINALIASES); + newnode->hasSubLinks |= context->inserted_sublink; + context->inserted_sublink = save_inserted_sublink; + context->sublevels_up--; + return (Node *) newnode; + } + /* Already-planned tree not supported */ + Assert(!IsA(node, SubPlan)); + /* Shouldn't need to handle these planner auxiliary nodes here */ + Assert(!IsA(node, SpecialJoinInfo)); + Assert(!IsA(node, PlaceHolderInfo)); + Assert(!IsA(node, MinMaxAggInfo)); + + return expression_tree_mutator(node, flatten_join_alias_vars_mutator, + (void *) context); +} + +/* + * alias_relid_set: in a set of RT indexes, replace joins by their + * underlying base relids + */ +static Relids +alias_relid_set(Query *query, Relids relids) +{ + Relids result = NULL; + int rtindex; + + rtindex = -1; + while ((rtindex = bms_next_member(relids, rtindex)) >= 0) + { + RangeTblEntry *rte = rt_fetch(rtindex, query->rtable); + + if (rte->rtekind == RTE_JOIN) + result = bms_join(result, get_relids_for_join(query, rtindex)); + else + result = bms_add_member(result, rtindex); + } + return result; +} -- cgit v1.2.3