summaryrefslogtreecommitdiffstats
path: root/src/backend/parser/parse_cte.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/parser/parse_cte.c
parentInitial commit. (diff)
downloadpostgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz
postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/parser/parse_cte.c')
-rw-r--r--src/backend/parser/parse_cte.c1166
1 files changed, 1166 insertions, 0 deletions
diff --git a/src/backend/parser/parse_cte.c b/src/backend/parser/parse_cte.c
new file mode 100644
index 0000000..f6ae963
--- /dev/null
+++ b/src/backend/parser/parse_cte.c
@@ -0,0 +1,1166 @@
+/*-------------------------------------------------------------------------
+ *
+ * parse_cte.c
+ * handle CTEs (common table expressions) in parser
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/parser/parse_cte.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/analyze.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_cte.h"
+#include "parser/parse_expr.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/typcache.h"
+
+
+/* Enumeration of contexts in which a self-reference is disallowed */
+typedef enum
+{
+ RECURSION_OK,
+ RECURSION_NONRECURSIVETERM, /* inside the left-hand term */
+ RECURSION_SUBLINK, /* inside a sublink */
+ RECURSION_OUTERJOIN, /* inside nullable side of an outer join */
+ RECURSION_INTERSECT, /* underneath INTERSECT (ALL) */
+ RECURSION_EXCEPT /* underneath EXCEPT (ALL) */
+} RecursionContext;
+
+/* Associated error messages --- each must have one %s for CTE name */
+static const char *const recursion_errormsgs[] = {
+ /* RECURSION_OK */
+ NULL,
+ /* RECURSION_NONRECURSIVETERM */
+ gettext_noop("recursive reference to query \"%s\" must not appear within its non-recursive term"),
+ /* RECURSION_SUBLINK */
+ gettext_noop("recursive reference to query \"%s\" must not appear within a subquery"),
+ /* RECURSION_OUTERJOIN */
+ gettext_noop("recursive reference to query \"%s\" must not appear within an outer join"),
+ /* RECURSION_INTERSECT */
+ gettext_noop("recursive reference to query \"%s\" must not appear within INTERSECT"),
+ /* RECURSION_EXCEPT */
+ gettext_noop("recursive reference to query \"%s\" must not appear within EXCEPT")
+};
+
+/*
+ * For WITH RECURSIVE, we have to find an ordering of the clause members
+ * with no forward references, and determine which members are recursive
+ * (i.e., self-referential). It is convenient to do this with an array
+ * of CteItems instead of a list of CommonTableExprs.
+ */
+typedef struct CteItem
+{
+ CommonTableExpr *cte; /* One CTE to examine */
+ int id; /* Its ID number for dependencies */
+ Bitmapset *depends_on; /* CTEs depended on (not including self) */
+} CteItem;
+
+/* CteState is what we need to pass around in the tree walkers */
+typedef struct CteState
+{
+ /* global state: */
+ ParseState *pstate; /* global parse state */
+ CteItem *items; /* array of CTEs and extra data */
+ int numitems; /* number of CTEs */
+ /* working state during a tree walk: */
+ int curitem; /* index of item currently being examined */
+ List *innerwiths; /* list of lists of CommonTableExpr */
+ /* working state for checkWellFormedRecursion walk only: */
+ int selfrefcount; /* number of self-references detected */
+ RecursionContext context; /* context to allow or disallow self-ref */
+} CteState;
+
+
+static void analyzeCTE(ParseState *pstate, CommonTableExpr *cte);
+
+/* Dependency processing functions */
+static void makeDependencyGraph(CteState *cstate);
+static bool makeDependencyGraphWalker(Node *node, CteState *cstate);
+static void TopologicalSort(ParseState *pstate, CteItem *items, int numitems);
+
+/* Recursion validity checker functions */
+static void checkWellFormedRecursion(CteState *cstate);
+static bool checkWellFormedRecursionWalker(Node *node, CteState *cstate);
+static void checkWellFormedSelectStmt(SelectStmt *stmt, CteState *cstate);
+
+
+/*
+ * transformWithClause -
+ * Transform the list of WITH clause "common table expressions" into
+ * Query nodes.
+ *
+ * The result is the list of transformed CTEs to be put into the output
+ * Query. (This is in fact the same as the ending value of p_ctenamespace,
+ * but it seems cleaner to not expose that in the function's API.)
+ */
+List *
+transformWithClause(ParseState *pstate, WithClause *withClause)
+{
+ ListCell *lc;
+
+ /* Only one WITH clause per query level */
+ Assert(pstate->p_ctenamespace == NIL);
+ Assert(pstate->p_future_ctes == NIL);
+
+ /*
+ * For either type of WITH, there must not be duplicate CTE names in the
+ * list. Check this right away so we needn't worry later.
+ *
+ * Also, tentatively mark each CTE as non-recursive, and initialize its
+ * reference count to zero, and set pstate->p_hasModifyingCTE if needed.
+ */
+ foreach(lc, withClause->ctes)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
+ ListCell *rest;
+
+ for_each_cell(rest, withClause->ctes, lnext(withClause->ctes, lc))
+ {
+ CommonTableExpr *cte2 = (CommonTableExpr *) lfirst(rest);
+
+ if (strcmp(cte->ctename, cte2->ctename) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_ALIAS),
+ errmsg("WITH query name \"%s\" specified more than once",
+ cte2->ctename),
+ parser_errposition(pstate, cte2->location)));
+ }
+
+ cte->cterecursive = false;
+ cte->cterefcount = 0;
+
+ if (!IsA(cte->ctequery, SelectStmt))
+ {
+ /* must be a data-modifying statement */
+ Assert(IsA(cte->ctequery, InsertStmt) ||
+ IsA(cte->ctequery, UpdateStmt) ||
+ IsA(cte->ctequery, DeleteStmt));
+
+ pstate->p_hasModifyingCTE = true;
+ }
+ }
+
+ if (withClause->recursive)
+ {
+ /*
+ * For WITH RECURSIVE, we rearrange the list elements if needed to
+ * eliminate forward references. First, build a work array and set up
+ * the data structure needed by the tree walkers.
+ */
+ CteState cstate;
+ int i;
+
+ cstate.pstate = pstate;
+ cstate.numitems = list_length(withClause->ctes);
+ cstate.items = (CteItem *) palloc0(cstate.numitems * sizeof(CteItem));
+ i = 0;
+ foreach(lc, withClause->ctes)
+ {
+ cstate.items[i].cte = (CommonTableExpr *) lfirst(lc);
+ cstate.items[i].id = i;
+ i++;
+ }
+
+ /*
+ * Find all the dependencies and sort the CteItems into a safe
+ * processing order. Also, mark CTEs that contain self-references.
+ */
+ makeDependencyGraph(&cstate);
+
+ /*
+ * Check that recursive queries are well-formed.
+ */
+ checkWellFormedRecursion(&cstate);
+
+ /*
+ * Set up the ctenamespace for parse analysis. Per spec, all the WITH
+ * items are visible to all others, so stuff them all in before parse
+ * analysis. We build the list in safe processing order so that the
+ * planner can process the queries in sequence.
+ */
+ for (i = 0; i < cstate.numitems; i++)
+ {
+ CommonTableExpr *cte = cstate.items[i].cte;
+
+ pstate->p_ctenamespace = lappend(pstate->p_ctenamespace, cte);
+ }
+
+ /*
+ * Do parse analysis in the order determined by the topological sort.
+ */
+ for (i = 0; i < cstate.numitems; i++)
+ {
+ CommonTableExpr *cte = cstate.items[i].cte;
+
+ analyzeCTE(pstate, cte);
+ }
+ }
+ else
+ {
+ /*
+ * For non-recursive WITH, just analyze each CTE in sequence and then
+ * add it to the ctenamespace. This corresponds to the spec's
+ * definition of the scope of each WITH name. However, to allow error
+ * reports to be aware of the possibility of an erroneous reference,
+ * we maintain a list in p_future_ctes of the not-yet-visible CTEs.
+ */
+ pstate->p_future_ctes = list_copy(withClause->ctes);
+
+ foreach(lc, withClause->ctes)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
+
+ analyzeCTE(pstate, cte);
+ pstate->p_ctenamespace = lappend(pstate->p_ctenamespace, cte);
+ pstate->p_future_ctes = list_delete_first(pstate->p_future_ctes);
+ }
+ }
+
+ return pstate->p_ctenamespace;
+}
+
+
+/*
+ * Perform the actual parse analysis transformation of one CTE. All
+ * CTEs it depends on have already been loaded into pstate->p_ctenamespace,
+ * and have been marked with the correct output column names/types.
+ */
+static void
+analyzeCTE(ParseState *pstate, CommonTableExpr *cte)
+{
+ Query *query;
+
+ /* Analysis not done already */
+ Assert(!IsA(cte->ctequery, Query));
+
+ query = parse_sub_analyze(cte->ctequery, pstate, cte, false, true);
+ cte->ctequery = (Node *) query;
+
+ /*
+ * Check that we got something reasonable. These first two cases should
+ * be prevented by the grammar.
+ */
+ if (!IsA(query, Query))
+ elog(ERROR, "unexpected non-Query statement in WITH");
+ if (query->utilityStmt != NULL)
+ elog(ERROR, "unexpected utility statement in WITH");
+
+ /*
+ * We disallow data-modifying WITH except at the top level of a query,
+ * because it's not clear when such a modification should be executed.
+ */
+ if (query->commandType != CMD_SELECT &&
+ pstate->parentParseState != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("WITH clause containing a data-modifying statement must be at the top level"),
+ parser_errposition(pstate, cte->location)));
+
+ /*
+ * CTE queries are always marked not canSetTag. (Currently this only
+ * matters for data-modifying statements, for which the flag will be
+ * propagated to the ModifyTable plan node.)
+ */
+ query->canSetTag = false;
+
+ if (!cte->cterecursive)
+ {
+ /* Compute the output column names/types if not done yet */
+ analyzeCTETargetList(pstate, cte, GetCTETargetList(cte));
+ }
+ else
+ {
+ /*
+ * Verify that the previously determined output column types and
+ * collations match what the query really produced. We have to check
+ * this because the recursive term could have overridden the
+ * non-recursive term, and we don't have any easy way to fix that.
+ */
+ ListCell *lctlist,
+ *lctyp,
+ *lctypmod,
+ *lccoll;
+ int varattno;
+
+ lctyp = list_head(cte->ctecoltypes);
+ lctypmod = list_head(cte->ctecoltypmods);
+ lccoll = list_head(cte->ctecolcollations);
+ varattno = 0;
+ foreach(lctlist, GetCTETargetList(cte))
+ {
+ TargetEntry *te = (TargetEntry *) lfirst(lctlist);
+ Node *texpr;
+
+ if (te->resjunk)
+ continue;
+ varattno++;
+ Assert(varattno == te->resno);
+ if (lctyp == NULL || lctypmod == NULL || lccoll == NULL) /* shouldn't happen */
+ elog(ERROR, "wrong number of output columns in WITH");
+ texpr = (Node *) te->expr;
+ if (exprType(texpr) != lfirst_oid(lctyp) ||
+ exprTypmod(texpr) != lfirst_int(lctypmod))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("recursive query \"%s\" column %d has type %s in non-recursive term but type %s overall",
+ cte->ctename, varattno,
+ format_type_with_typemod(lfirst_oid(lctyp),
+ lfirst_int(lctypmod)),
+ format_type_with_typemod(exprType(texpr),
+ exprTypmod(texpr))),
+ errhint("Cast the output of the non-recursive term to the correct type."),
+ parser_errposition(pstate, exprLocation(texpr))));
+ if (exprCollation(texpr) != lfirst_oid(lccoll))
+ ereport(ERROR,
+ (errcode(ERRCODE_COLLATION_MISMATCH),
+ errmsg("recursive query \"%s\" column %d has collation \"%s\" in non-recursive term but collation \"%s\" overall",
+ cte->ctename, varattno,
+ get_collation_name(lfirst_oid(lccoll)),
+ get_collation_name(exprCollation(texpr))),
+ errhint("Use the COLLATE clause to set the collation of the non-recursive term."),
+ parser_errposition(pstate, exprLocation(texpr))));
+ lctyp = lnext(cte->ctecoltypes, lctyp);
+ lctypmod = lnext(cte->ctecoltypmods, lctypmod);
+ lccoll = lnext(cte->ctecolcollations, lccoll);
+ }
+ if (lctyp != NULL || lctypmod != NULL || lccoll != NULL) /* shouldn't happen */
+ elog(ERROR, "wrong number of output columns in WITH");
+ }
+
+ if (cte->search_clause || cte->cycle_clause)
+ {
+ Query *ctequery;
+ SetOperationStmt *sos;
+
+ if (!cte->cterecursive)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("WITH query is not recursive"),
+ parser_errposition(pstate, cte->location)));
+
+ /*
+ * SQL requires a WITH list element (CTE) to be "expandable" in order
+ * to allow a search or cycle clause. That is a stronger requirement
+ * than just being recursive. It basically means the query expression
+ * looks like
+ *
+ * non-recursive query UNION [ALL] recursive query
+ *
+ * and that the recursive query is not itself a set operation.
+ *
+ * As of this writing, most of these criteria are already satisfied by
+ * all recursive CTEs allowed by PostgreSQL. In the future, if
+ * further variants recursive CTEs are accepted, there might be
+ * further checks required here to determine what is "expandable".
+ */
+
+ ctequery = castNode(Query, cte->ctequery);
+ Assert(ctequery->setOperations);
+ sos = castNode(SetOperationStmt, ctequery->setOperations);
+
+ /*
+ * This left side check is not required for expandability, but
+ * rewriteSearchAndCycle() doesn't currently have support for it, so
+ * we catch it here.
+ */
+ if (!IsA(sos->larg, RangeTblRef))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("with a SEARCH or CYCLE clause, the left side of the UNION must be a SELECT")));
+
+ if (!IsA(sos->rarg, RangeTblRef))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("with a SEARCH or CYCLE clause, the right side of the UNION must be a SELECT")));
+ }
+
+ if (cte->search_clause)
+ {
+ ListCell *lc;
+ List *seen = NIL;
+
+ foreach(lc, cte->search_clause->search_col_list)
+ {
+ Value *colname = lfirst(lc);
+
+ if (!list_member(cte->ctecolnames, colname))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("search column \"%s\" not in WITH query column list",
+ strVal(colname)),
+ parser_errposition(pstate, cte->search_clause->location)));
+
+ if (list_member(seen, colname))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("search column \"%s\" specified more than once",
+ strVal(colname)),
+ parser_errposition(pstate, cte->search_clause->location)));
+ seen = lappend(seen, colname);
+ }
+
+ if (list_member(cte->ctecolnames, makeString(cte->search_clause->search_seq_column)))
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("search sequence column name \"%s\" already used in WITH query column list",
+ cte->search_clause->search_seq_column),
+ parser_errposition(pstate, cte->search_clause->location));
+ }
+
+ if (cte->cycle_clause)
+ {
+ ListCell *lc;
+ List *seen = NIL;
+ TypeCacheEntry *typentry;
+ Oid op;
+
+ foreach(lc, cte->cycle_clause->cycle_col_list)
+ {
+ Value *colname = lfirst(lc);
+
+ if (!list_member(cte->ctecolnames, colname))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cycle column \"%s\" not in WITH query column list",
+ strVal(colname)),
+ parser_errposition(pstate, cte->cycle_clause->location)));
+
+ if (list_member(seen, colname))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("cycle column \"%s\" specified more than once",
+ strVal(colname)),
+ parser_errposition(pstate, cte->cycle_clause->location)));
+ seen = lappend(seen, colname);
+ }
+
+ if (list_member(cte->ctecolnames, makeString(cte->cycle_clause->cycle_mark_column)))
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cycle mark column name \"%s\" already used in WITH query column list",
+ cte->cycle_clause->cycle_mark_column),
+ parser_errposition(pstate, cte->cycle_clause->location));
+
+ cte->cycle_clause->cycle_mark_value = transformExpr(pstate, cte->cycle_clause->cycle_mark_value,
+ EXPR_KIND_CYCLE_MARK);
+ cte->cycle_clause->cycle_mark_default = transformExpr(pstate, cte->cycle_clause->cycle_mark_default,
+ EXPR_KIND_CYCLE_MARK);
+
+ if (list_member(cte->ctecolnames, makeString(cte->cycle_clause->cycle_path_column)))
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cycle path column name \"%s\" already used in WITH query column list",
+ cte->cycle_clause->cycle_path_column),
+ parser_errposition(pstate, cte->cycle_clause->location));
+
+ if (strcmp(cte->cycle_clause->cycle_mark_column,
+ cte->cycle_clause->cycle_path_column) == 0)
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cycle mark column name and cycle path column name are the same"),
+ parser_errposition(pstate, cte->cycle_clause->location));
+
+ cte->cycle_clause->cycle_mark_type = select_common_type(pstate,
+ list_make2(cte->cycle_clause->cycle_mark_value,
+ cte->cycle_clause->cycle_mark_default),
+ "CYCLE", NULL);
+ cte->cycle_clause->cycle_mark_value = coerce_to_common_type(pstate,
+ cte->cycle_clause->cycle_mark_value,
+ cte->cycle_clause->cycle_mark_type,
+ "CYCLE/SET/TO");
+ cte->cycle_clause->cycle_mark_default = coerce_to_common_type(pstate,
+ cte->cycle_clause->cycle_mark_default,
+ cte->cycle_clause->cycle_mark_type,
+ "CYCLE/SET/DEFAULT");
+
+ cte->cycle_clause->cycle_mark_typmod = select_common_typmod(pstate,
+ list_make2(cte->cycle_clause->cycle_mark_value,
+ cte->cycle_clause->cycle_mark_default),
+ cte->cycle_clause->cycle_mark_type);
+
+ cte->cycle_clause->cycle_mark_collation = select_common_collation(pstate,
+ list_make2(cte->cycle_clause->cycle_mark_value,
+ cte->cycle_clause->cycle_mark_default),
+ true);
+
+ typentry = lookup_type_cache(cte->cycle_clause->cycle_mark_type, TYPECACHE_EQ_OPR);
+ if (!typentry->eq_opr)
+ ereport(ERROR,
+ errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an equality operator for type %s",
+ format_type_be(cte->cycle_clause->cycle_mark_type)));
+ op = get_negator(typentry->eq_opr);
+ if (!op)
+ ereport(ERROR,
+ errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an inequality operator for type %s",
+ format_type_be(cte->cycle_clause->cycle_mark_type)));
+
+ cte->cycle_clause->cycle_mark_neop = op;
+ }
+
+ if (cte->search_clause && cte->cycle_clause)
+ {
+ if (strcmp(cte->search_clause->search_seq_column,
+ cte->cycle_clause->cycle_mark_column) == 0)
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("search sequence column name and cycle mark column name are the same"),
+ parser_errposition(pstate, cte->search_clause->location));
+
+ if (strcmp(cte->search_clause->search_seq_column,
+ cte->cycle_clause->cycle_path_column) == 0)
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("search sequence column name and cycle path column name are the same"),
+ parser_errposition(pstate, cte->search_clause->location));
+ }
+}
+
+/*
+ * Compute derived fields of a CTE, given the transformed output targetlist
+ *
+ * For a nonrecursive CTE, this is called after transforming the CTE's query.
+ * For a recursive CTE, we call it after transforming the non-recursive term,
+ * and pass the targetlist emitted by the non-recursive term only.
+ *
+ * Note: in the recursive case, the passed pstate is actually the one being
+ * used to analyze the CTE's query, so it is one level lower down than in
+ * the nonrecursive case. This doesn't matter since we only use it for
+ * error message context anyway.
+ */
+void
+analyzeCTETargetList(ParseState *pstate, CommonTableExpr *cte, List *tlist)
+{
+ int numaliases;
+ int varattno;
+ ListCell *tlistitem;
+
+ /* Not done already ... */
+ Assert(cte->ctecolnames == NIL);
+
+ /*
+ * We need to determine column names, types, and collations. The alias
+ * column names override anything coming from the query itself. (Note:
+ * the SQL spec says that the alias list must be empty or exactly as long
+ * as the output column set; but we allow it to be shorter for consistency
+ * with Alias handling.)
+ */
+ cte->ctecolnames = copyObject(cte->aliascolnames);
+ cte->ctecoltypes = cte->ctecoltypmods = cte->ctecolcollations = NIL;
+ numaliases = list_length(cte->aliascolnames);
+ varattno = 0;
+ foreach(tlistitem, tlist)
+ {
+ TargetEntry *te = (TargetEntry *) lfirst(tlistitem);
+ Oid coltype;
+ int32 coltypmod;
+ Oid colcoll;
+
+ if (te->resjunk)
+ continue;
+ varattno++;
+ Assert(varattno == te->resno);
+ if (varattno > numaliases)
+ {
+ char *attrname;
+
+ attrname = pstrdup(te->resname);
+ cte->ctecolnames = lappend(cte->ctecolnames, makeString(attrname));
+ }
+ coltype = exprType((Node *) te->expr);
+ coltypmod = exprTypmod((Node *) te->expr);
+ colcoll = exprCollation((Node *) te->expr);
+
+ /*
+ * If the CTE is recursive, force the exposed column type of any
+ * "unknown" column to "text". We must deal with this here because
+ * we're called on the non-recursive term before there's been any
+ * attempt to force unknown output columns to some other type. We
+ * have to resolve unknowns before looking at the recursive term.
+ *
+ * The column might contain 'foo' COLLATE "bar", so don't override
+ * collation if it's already set.
+ */
+ if (cte->cterecursive && coltype == UNKNOWNOID)
+ {
+ coltype = TEXTOID;
+ coltypmod = -1; /* should be -1 already, but be sure */
+ if (!OidIsValid(colcoll))
+ colcoll = DEFAULT_COLLATION_OID;
+ }
+ cte->ctecoltypes = lappend_oid(cte->ctecoltypes, coltype);
+ cte->ctecoltypmods = lappend_int(cte->ctecoltypmods, coltypmod);
+ cte->ctecolcollations = lappend_oid(cte->ctecolcollations, colcoll);
+ }
+ if (varattno < numaliases)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("WITH query \"%s\" has %d columns available but %d columns specified",
+ cte->ctename, varattno, numaliases),
+ parser_errposition(pstate, cte->location)));
+}
+
+
+/*
+ * Identify the cross-references of a list of WITH RECURSIVE items,
+ * and sort into an order that has no forward references.
+ */
+static void
+makeDependencyGraph(CteState *cstate)
+{
+ int i;
+
+ for (i = 0; i < cstate->numitems; i++)
+ {
+ CommonTableExpr *cte = cstate->items[i].cte;
+
+ cstate->curitem = i;
+ cstate->innerwiths = NIL;
+ makeDependencyGraphWalker((Node *) cte->ctequery, cstate);
+ Assert(cstate->innerwiths == NIL);
+ }
+
+ TopologicalSort(cstate->pstate, cstate->items, cstate->numitems);
+}
+
+/*
+ * Tree walker function to detect cross-references and self-references of the
+ * CTEs in a WITH RECURSIVE list.
+ */
+static bool
+makeDependencyGraphWalker(Node *node, CteState *cstate)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, RangeVar))
+ {
+ RangeVar *rv = (RangeVar *) node;
+
+ /* If unqualified name, might be a CTE reference */
+ if (!rv->schemaname)
+ {
+ ListCell *lc;
+ int i;
+
+ /* ... but first see if it's captured by an inner WITH */
+ foreach(lc, cstate->innerwiths)
+ {
+ List *withlist = (List *) lfirst(lc);
+ ListCell *lc2;
+
+ foreach(lc2, withlist)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc2);
+
+ if (strcmp(rv->relname, cte->ctename) == 0)
+ return false; /* yes, so bail out */
+ }
+ }
+
+ /* No, could be a reference to the query level we are working on */
+ for (i = 0; i < cstate->numitems; i++)
+ {
+ CommonTableExpr *cte = cstate->items[i].cte;
+
+ if (strcmp(rv->relname, cte->ctename) == 0)
+ {
+ int myindex = cstate->curitem;
+
+ if (i != myindex)
+ {
+ /* Add cross-item dependency */
+ cstate->items[myindex].depends_on =
+ bms_add_member(cstate->items[myindex].depends_on,
+ cstate->items[i].id);
+ }
+ else
+ {
+ /* Found out this one is self-referential */
+ cte->cterecursive = true;
+ }
+ break;
+ }
+ }
+ }
+ return false;
+ }
+ if (IsA(node, SelectStmt))
+ {
+ SelectStmt *stmt = (SelectStmt *) node;
+ ListCell *lc;
+
+ if (stmt->withClause)
+ {
+ if (stmt->withClause->recursive)
+ {
+ /*
+ * In the RECURSIVE case, all query names of the WITH are
+ * visible to all WITH items as well as the main query. So
+ * push them all on, process, pop them all off.
+ */
+ cstate->innerwiths = lcons(stmt->withClause->ctes,
+ cstate->innerwiths);
+ foreach(lc, stmt->withClause->ctes)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
+
+ (void) makeDependencyGraphWalker(cte->ctequery, cstate);
+ }
+ (void) raw_expression_tree_walker(node,
+ makeDependencyGraphWalker,
+ (void *) cstate);
+ cstate->innerwiths = list_delete_first(cstate->innerwiths);
+ }
+ else
+ {
+ /*
+ * In the non-RECURSIVE case, query names are visible to the
+ * WITH items after them and to the main query.
+ */
+ cstate->innerwiths = lcons(NIL, cstate->innerwiths);
+ foreach(lc, stmt->withClause->ctes)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
+ ListCell *cell1;
+
+ (void) makeDependencyGraphWalker(cte->ctequery, cstate);
+ /* note that recursion could mutate innerwiths list */
+ cell1 = list_head(cstate->innerwiths);
+ lfirst(cell1) = lappend((List *) lfirst(cell1), cte);
+ }
+ (void) raw_expression_tree_walker(node,
+ makeDependencyGraphWalker,
+ (void *) cstate);
+ cstate->innerwiths = list_delete_first(cstate->innerwiths);
+ }
+ /* We're done examining the SelectStmt */
+ return false;
+ }
+ /* if no WITH clause, just fall through for normal processing */
+ }
+ if (IsA(node, WithClause))
+ {
+ /*
+ * Prevent raw_expression_tree_walker from recursing directly into a
+ * WITH clause. We need that to happen only under the control of the
+ * code above.
+ */
+ return false;
+ }
+ return raw_expression_tree_walker(node,
+ makeDependencyGraphWalker,
+ (void *) cstate);
+}
+
+/*
+ * Sort by dependencies, using a standard topological sort operation
+ */
+static void
+TopologicalSort(ParseState *pstate, CteItem *items, int numitems)
+{
+ int i,
+ j;
+
+ /* for each position in sequence ... */
+ for (i = 0; i < numitems; i++)
+ {
+ /* ... scan the remaining items to find one that has no dependencies */
+ for (j = i; j < numitems; j++)
+ {
+ if (bms_is_empty(items[j].depends_on))
+ break;
+ }
+
+ /* if we didn't find one, the dependency graph has a cycle */
+ if (j >= numitems)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("mutual recursion between WITH items is not implemented"),
+ parser_errposition(pstate, items[i].cte->location)));
+
+ /*
+ * Found one. Move it to front and remove it from every other item's
+ * dependencies.
+ */
+ if (i != j)
+ {
+ CteItem tmp;
+
+ tmp = items[i];
+ items[i] = items[j];
+ items[j] = tmp;
+ }
+
+ /*
+ * Items up through i are known to have no dependencies left, so we
+ * can skip them in this loop.
+ */
+ for (j = i + 1; j < numitems; j++)
+ {
+ items[j].depends_on = bms_del_member(items[j].depends_on,
+ items[i].id);
+ }
+ }
+}
+
+
+/*
+ * Check that recursive queries are well-formed.
+ */
+static void
+checkWellFormedRecursion(CteState *cstate)
+{
+ int i;
+
+ for (i = 0; i < cstate->numitems; i++)
+ {
+ CommonTableExpr *cte = cstate->items[i].cte;
+ SelectStmt *stmt = (SelectStmt *) cte->ctequery;
+
+ Assert(!IsA(stmt, Query)); /* not analyzed yet */
+
+ /* Ignore items that weren't found to be recursive */
+ if (!cte->cterecursive)
+ continue;
+
+ /* Must be a SELECT statement */
+ if (!IsA(stmt, SelectStmt))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_RECURSION),
+ errmsg("recursive query \"%s\" must not contain data-modifying statements",
+ cte->ctename),
+ parser_errposition(cstate->pstate, cte->location)));
+
+ /* Must have top-level UNION */
+ if (stmt->op != SETOP_UNION)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_RECURSION),
+ errmsg("recursive query \"%s\" does not have the form non-recursive-term UNION [ALL] recursive-term",
+ cte->ctename),
+ parser_errposition(cstate->pstate, cte->location)));
+
+ /* The left-hand operand mustn't contain self-reference at all */
+ cstate->curitem = i;
+ cstate->innerwiths = NIL;
+ cstate->selfrefcount = 0;
+ cstate->context = RECURSION_NONRECURSIVETERM;
+ checkWellFormedRecursionWalker((Node *) stmt->larg, cstate);
+ Assert(cstate->innerwiths == NIL);
+
+ /* Right-hand operand should contain one reference in a valid place */
+ cstate->curitem = i;
+ cstate->innerwiths = NIL;
+ cstate->selfrefcount = 0;
+ cstate->context = RECURSION_OK;
+ checkWellFormedRecursionWalker((Node *) stmt->rarg, cstate);
+ Assert(cstate->innerwiths == NIL);
+ if (cstate->selfrefcount != 1) /* shouldn't happen */
+ elog(ERROR, "missing recursive reference");
+
+ /* WITH mustn't contain self-reference, either */
+ if (stmt->withClause)
+ {
+ cstate->curitem = i;
+ cstate->innerwiths = NIL;
+ cstate->selfrefcount = 0;
+ cstate->context = RECURSION_SUBLINK;
+ checkWellFormedRecursionWalker((Node *) stmt->withClause->ctes,
+ cstate);
+ Assert(cstate->innerwiths == NIL);
+ }
+
+ /*
+ * Disallow ORDER BY and similar decoration atop the UNION. These
+ * don't make sense because it's impossible to figure out what they
+ * mean when we have only part of the recursive query's results. (If
+ * we did allow them, we'd have to check for recursive references
+ * inside these subtrees.)
+ */
+ if (stmt->sortClause)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ORDER BY in a recursive query is not implemented"),
+ parser_errposition(cstate->pstate,
+ exprLocation((Node *) stmt->sortClause))));
+ if (stmt->limitOffset)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("OFFSET in a recursive query is not implemented"),
+ parser_errposition(cstate->pstate,
+ exprLocation(stmt->limitOffset))));
+ if (stmt->limitCount)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("LIMIT in a recursive query is not implemented"),
+ parser_errposition(cstate->pstate,
+ exprLocation(stmt->limitCount))));
+ if (stmt->lockingClause)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("FOR UPDATE/SHARE in a recursive query is not implemented"),
+ parser_errposition(cstate->pstate,
+ exprLocation((Node *) stmt->lockingClause))));
+ }
+}
+
+/*
+ * Tree walker function to detect invalid self-references in a recursive query.
+ */
+static bool
+checkWellFormedRecursionWalker(Node *node, CteState *cstate)
+{
+ RecursionContext save_context = cstate->context;
+
+ if (node == NULL)
+ return false;
+ if (IsA(node, RangeVar))
+ {
+ RangeVar *rv = (RangeVar *) node;
+
+ /* If unqualified name, might be a CTE reference */
+ if (!rv->schemaname)
+ {
+ ListCell *lc;
+ CommonTableExpr *mycte;
+
+ /* ... but first see if it's captured by an inner WITH */
+ foreach(lc, cstate->innerwiths)
+ {
+ List *withlist = (List *) lfirst(lc);
+ ListCell *lc2;
+
+ foreach(lc2, withlist)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc2);
+
+ if (strcmp(rv->relname, cte->ctename) == 0)
+ return false; /* yes, so bail out */
+ }
+ }
+
+ /* No, could be a reference to the query level we are working on */
+ mycte = cstate->items[cstate->curitem].cte;
+ if (strcmp(rv->relname, mycte->ctename) == 0)
+ {
+ /* Found a recursive reference to the active query */
+ if (cstate->context != RECURSION_OK)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_RECURSION),
+ errmsg(recursion_errormsgs[cstate->context],
+ mycte->ctename),
+ parser_errposition(cstate->pstate,
+ rv->location)));
+ /* Count references */
+ if (++(cstate->selfrefcount) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_RECURSION),
+ errmsg("recursive reference to query \"%s\" must not appear more than once",
+ mycte->ctename),
+ parser_errposition(cstate->pstate,
+ rv->location)));
+ }
+ }
+ return false;
+ }
+ if (IsA(node, SelectStmt))
+ {
+ SelectStmt *stmt = (SelectStmt *) node;
+ ListCell *lc;
+
+ if (stmt->withClause)
+ {
+ if (stmt->withClause->recursive)
+ {
+ /*
+ * In the RECURSIVE case, all query names of the WITH are
+ * visible to all WITH items as well as the main query. So
+ * push them all on, process, pop them all off.
+ */
+ cstate->innerwiths = lcons(stmt->withClause->ctes,
+ cstate->innerwiths);
+ foreach(lc, stmt->withClause->ctes)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
+
+ (void) checkWellFormedRecursionWalker(cte->ctequery, cstate);
+ }
+ checkWellFormedSelectStmt(stmt, cstate);
+ cstate->innerwiths = list_delete_first(cstate->innerwiths);
+ }
+ else
+ {
+ /*
+ * In the non-RECURSIVE case, query names are visible to the
+ * WITH items after them and to the main query.
+ */
+ cstate->innerwiths = lcons(NIL, cstate->innerwiths);
+ foreach(lc, stmt->withClause->ctes)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
+ ListCell *cell1;
+
+ (void) checkWellFormedRecursionWalker(cte->ctequery, cstate);
+ /* note that recursion could mutate innerwiths list */
+ cell1 = list_head(cstate->innerwiths);
+ lfirst(cell1) = lappend((List *) lfirst(cell1), cte);
+ }
+ checkWellFormedSelectStmt(stmt, cstate);
+ cstate->innerwiths = list_delete_first(cstate->innerwiths);
+ }
+ }
+ else
+ checkWellFormedSelectStmt(stmt, cstate);
+ /* We're done examining the SelectStmt */
+ return false;
+ }
+ if (IsA(node, WithClause))
+ {
+ /*
+ * Prevent raw_expression_tree_walker from recursing directly into a
+ * WITH clause. We need that to happen only under the control of the
+ * code above.
+ */
+ return false;
+ }
+ if (IsA(node, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) node;
+
+ switch (j->jointype)
+ {
+ case JOIN_INNER:
+ checkWellFormedRecursionWalker(j->larg, cstate);
+ checkWellFormedRecursionWalker(j->rarg, cstate);
+ checkWellFormedRecursionWalker(j->quals, cstate);
+ break;
+ case JOIN_LEFT:
+ checkWellFormedRecursionWalker(j->larg, cstate);
+ if (save_context == RECURSION_OK)
+ cstate->context = RECURSION_OUTERJOIN;
+ checkWellFormedRecursionWalker(j->rarg, cstate);
+ cstate->context = save_context;
+ checkWellFormedRecursionWalker(j->quals, cstate);
+ break;
+ case JOIN_FULL:
+ if (save_context == RECURSION_OK)
+ cstate->context = RECURSION_OUTERJOIN;
+ checkWellFormedRecursionWalker(j->larg, cstate);
+ checkWellFormedRecursionWalker(j->rarg, cstate);
+ cstate->context = save_context;
+ checkWellFormedRecursionWalker(j->quals, cstate);
+ break;
+ case JOIN_RIGHT:
+ if (save_context == RECURSION_OK)
+ cstate->context = RECURSION_OUTERJOIN;
+ checkWellFormedRecursionWalker(j->larg, cstate);
+ cstate->context = save_context;
+ checkWellFormedRecursionWalker(j->rarg, cstate);
+ checkWellFormedRecursionWalker(j->quals, cstate);
+ break;
+ default:
+ elog(ERROR, "unrecognized join type: %d",
+ (int) j->jointype);
+ }
+ return false;
+ }
+ if (IsA(node, SubLink))
+ {
+ SubLink *sl = (SubLink *) node;
+
+ /*
+ * we intentionally override outer context, since subquery is
+ * independent
+ */
+ cstate->context = RECURSION_SUBLINK;
+ checkWellFormedRecursionWalker(sl->subselect, cstate);
+ cstate->context = save_context;
+ checkWellFormedRecursionWalker(sl->testexpr, cstate);
+ return false;
+ }
+ return raw_expression_tree_walker(node,
+ checkWellFormedRecursionWalker,
+ (void *) cstate);
+}
+
+/*
+ * subroutine for checkWellFormedRecursionWalker: process a SelectStmt
+ * without worrying about its WITH clause
+ */
+static void
+checkWellFormedSelectStmt(SelectStmt *stmt, CteState *cstate)
+{
+ RecursionContext save_context = cstate->context;
+
+ if (save_context != RECURSION_OK)
+ {
+ /* just recurse without changing state */
+ raw_expression_tree_walker((Node *) stmt,
+ checkWellFormedRecursionWalker,
+ (void *) cstate);
+ }
+ else
+ {
+ switch (stmt->op)
+ {
+ case SETOP_NONE:
+ case SETOP_UNION:
+ raw_expression_tree_walker((Node *) stmt,
+ checkWellFormedRecursionWalker,
+ (void *) cstate);
+ break;
+ case SETOP_INTERSECT:
+ if (stmt->all)
+ cstate->context = RECURSION_INTERSECT;
+ checkWellFormedRecursionWalker((Node *) stmt->larg,
+ cstate);
+ checkWellFormedRecursionWalker((Node *) stmt->rarg,
+ cstate);
+ cstate->context = save_context;
+ checkWellFormedRecursionWalker((Node *) stmt->sortClause,
+ cstate);
+ checkWellFormedRecursionWalker((Node *) stmt->limitOffset,
+ cstate);
+ checkWellFormedRecursionWalker((Node *) stmt->limitCount,
+ cstate);
+ checkWellFormedRecursionWalker((Node *) stmt->lockingClause,
+ cstate);
+ /* stmt->withClause is intentionally ignored here */
+ break;
+ case SETOP_EXCEPT:
+ if (stmt->all)
+ cstate->context = RECURSION_EXCEPT;
+ checkWellFormedRecursionWalker((Node *) stmt->larg,
+ cstate);
+ cstate->context = RECURSION_EXCEPT;
+ checkWellFormedRecursionWalker((Node *) stmt->rarg,
+ cstate);
+ cstate->context = save_context;
+ checkWellFormedRecursionWalker((Node *) stmt->sortClause,
+ cstate);
+ checkWellFormedRecursionWalker((Node *) stmt->limitOffset,
+ cstate);
+ checkWellFormedRecursionWalker((Node *) stmt->limitCount,
+ cstate);
+ checkWellFormedRecursionWalker((Node *) stmt->lockingClause,
+ cstate);
+ /* stmt->withClause is intentionally ignored here */
+ break;
+ default:
+ elog(ERROR, "unrecognized set op: %d",
+ (int) stmt->op);
+ }
+ }
+}